Compare commits
	
		
			400 Commits
		
	
	
		
			2014.04.07
			...
			2014.06.24
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 36ddd8b3f7 | ||
|   | 7575d52a73 | ||
|   | 9a2dc4f7ac | ||
|   | c5cd249e41 | ||
|   | 8940c1c058 | ||
|   | 27ec04b232 | ||
|   | d2824416aa | ||
|   | 18061bbab0 | ||
|   | 4ecbbcbcea | ||
|   | 55c97a03e1 | ||
|   | 98aeac6ea9 | ||
|   | 8bfb6723cb | ||
|   | a20575e8ae | ||
|   | 7724572519 | ||
|   | d763637f6a | ||
|   | c26e9ac4b2 | ||
|   | 896bf55352 | ||
|   | a23ba9b53c | ||
|   | 38a9339baf | ||
|   | def8b4039f | ||
|   | a14e1538fe | ||
|   | 5f28a1acad | ||
|   | 25e9953c6f | ||
|   | f9df094ca5 | ||
|   | b60a469023 | ||
|   | 7012631257 | ||
|   | e6c9f80c48 | ||
|   | 895ce482b1 | ||
|   | e5da4021eb | ||
|   | 2371053565 | ||
|   | 33bf9033e0 | ||
|   | 35eacd0dae | ||
|   | 96bef88f5f | ||
|   | 5524b242a7 | ||
|   | a013eba65f | ||
|   | 36755d40b4 | ||
|   | 7d568f5ab8 | ||
|   | a7207cd580 | ||
|   | e8ef659cd9 | ||
|   | b0adbe98fb | ||
|   | 0c361c41b8 | ||
|   | c5469e046a | ||
|   | 4d2f143ce5 | ||
|   | 8f93030c85 | ||
|   | fdb9aebead | ||
|   | 3141feb73b | ||
|   | 9706f3f802 | ||
|   | d5e944359e | ||
|   | 826ec77fb2 | ||
|   | 2656f4eb6a | ||
|   | 2b88feedf7 | ||
|   | 23566e0d78 | ||
|   | 828553b614 | ||
|   | 3048e82a94 | ||
|   | 09ffa08ba1 | ||
|   | e0b4cc489f | ||
|   | 15e423407f | ||
|   | 702e522044 | ||
|   | 77abae55df | ||
|   | 617c0b2239 | ||
|   | 814d4257df | ||
|   | 23ae281b31 | ||
|   | 94128d6b0d | ||
|   | 059009c592 | ||
|   | 9cc977f104 | ||
|   | 1c0ade7afa | ||
|   | f2741c8d3a | ||
|   | 6ab8f3584a | ||
|   | 8ae5ce1726 | ||
|   | eb92077720 | ||
|   | 90e0fd4bad | ||
|   | 05741e05d9 | ||
|   | 9aa6637644 | ||
|   | d30d28156d | ||
|   | be6d722904 | ||
|   | d551980823 | ||
|   | f0a6c3d2bc | ||
|   | 4e0fb1280a | ||
|   | 24f5251cce | ||
|   | ac1390eee8 | ||
|   | 4a5b4d34dc | ||
|   | 63adb0cc61 | ||
|   | 3c80377b69 | ||
|   | 24577db241 | ||
|   | 566bd96da8 | ||
|   | ebdb64d605 | ||
|   | a6ffb92f0b | ||
|   | 3217377b3c | ||
|   | 24da5893fc | ||
|   | 087ca2cb07 | ||
|   | b4e7447458 | ||
|   | a45e6aadd7 | ||
|   | 70e322695d | ||
|   | 6a15923b77 | ||
|   | 7ffad0af5a | ||
|   | 0e3ae92441 | ||
|   | b3ae826f7a | ||
|   | dede691aca | ||
|   | fb6a5b965b | ||
|   | 6340716b3a | ||
|   | b675b32e6b | ||
|   | 6a3fa81ffb | ||
|   | df53a98f2b | ||
|   | db23d8d2a2 | ||
|   | 0d69795014 | ||
|   | 3374f3fdc2 | ||
|   | 4bf0727b1f | ||
|   | 263bd4ec50 | ||
|   | b7e8b6e37a | ||
|   | ceb7a17f34 | ||
|   | 1a2f2e1e66 | ||
|   | 6803016858 | ||
|   | 9b7c4fd981 | ||
|   | dc31942f42 | ||
|   | 1f6b8f3115 | ||
|   | 9c7b79acd9 | ||
|   | 9168308579 | ||
|   | 7e8fdb1aae | ||
|   | 386ba39cac | ||
|   | 236d0cd07c | ||
|   | ed86f38a11 | ||
|   | 6db80ad2db | ||
|   | 14470ac87b | ||
|   | 0cdf576d86 | ||
|   | 4ffeca4ea2 | ||
|   | 211fd6c674 | ||
|   | 6ebb46c106 | ||
|   | 0f97c9a06f | ||
|   | 77fb72646f | ||
|   | aae74e3832 | ||
|   | 894e730911 | ||
|   | 63961d87a6 | ||
|   | 87fe568c28 | ||
|   | 46531b374d | ||
|   | 9e8753911c | ||
|   | 5c6b1e578c | ||
|   | 8f0c8fb452 | ||
|   | b702ecebf0 | ||
|   | 950dc95e97 | ||
|   | d9dd3584e1 | ||
|   | 15a9f36849 | ||
|   | d0087d4ff2 | ||
|   | cc5ada6f4c | ||
|   | dfb2e1a325 | ||
|   | 65bab327b4 | ||
|   | 9eeb7abc6b | ||
|   | c70df21099 | ||
|   | 418424e5f5 | ||
|   | 8477466125 | ||
|   | 865dbd4a26 | ||
|   | b1e6f55912 | ||
|   | 4d78f3b770 | ||
|   | 7f739999e9 | ||
|   | 0f8a01d4f3 | ||
|   | e2bf499b14 | ||
|   | 7cf4547ab6 | ||
|   | 8ae980807a | ||
|   | eec4d8ef96 | ||
|   | 1c783bca88 | ||
|   | ac73651f66 | ||
|   | e5ceb3bfda | ||
|   | c2ef29234c | ||
|   | 1a1826c1af | ||
|   | c7c6d43fe1 | ||
|   | 2902d44f99 | ||
|   | d6e4ba287b | ||
|   | f50ee8d1c3 | ||
|   | 0e67ab0d8e | ||
|   | 77541837e5 | ||
|   | e3a6576f35 | ||
|   | 89bb8e97ee | ||
|   | 375696b1b1 | ||
|   | 4ea5c7b70d | ||
|   | 8dfa187b8a | ||
|   | c1ed1f7055 | ||
|   | 1514f74967 | ||
|   | 2e8323e3f7 | ||
|   | 69f8364042 | ||
|   | 79981f039b | ||
|   | 34d863f3fc | ||
|   | 91994c2c81 | ||
|   | 3ee4b60d56 | ||
|   | 76e92371ac | ||
|   | 08af0205f9 | ||
|   | a725fb1f43 | ||
|   | 05ee2b6dad | ||
|   | b74feacac5 | ||
|   | 426b52fc5d | ||
|   | 5c30b26846 | ||
|   | f07b74fc18 | ||
|   | a5a45015ba | ||
|   | beee53de06 | ||
|   | 8712f2bea7 | ||
|   | ea102818c9 | ||
|   | 0a871f6880 | ||
|   | 481efc84a8 | ||
|   | 01ed5c9be3 | ||
|   | ad3bc6acd5 | ||
|   | 5afa7f8bee | ||
|   | ec8deefc27 | ||
|   | a2d5a4ee64 | ||
|   | dffcc2ea0c | ||
|   | 1800eeefed | ||
|   | d7e7dedbde | ||
|   | d19bb9c0aa | ||
|   | 3ef79a974a | ||
|   | bc6800fbed | ||
|   | 65314dccf8 | ||
|   | feb7221209 | ||
|   | 56a94d8cbb | ||
|   | 24e6ec8ac8 | ||
|   | 87724af7a8 | ||
|   | b65c3e77e8 | ||
|   | 5301304bf2 | ||
|   | 948bcc60df | ||
|   | 25dfe0eb10 | ||
|   | 8e71456a81 | ||
|   | ccdd34ed78 | ||
|   | 26d886354f | ||
|   | a172b258ac | ||
|   | 7b93c2c204 | ||
|   | 57c7411f46 | ||
|   | d0a122348e | ||
|   | e4cbb5f382 | ||
|   | c1bce22f23 | ||
|   | e3abbbe301 | ||
|   | 55b36e3710 | ||
|   | 877bea9ce1 | ||
|   | 33c7ff861e | ||
|   | 749fe60c1e | ||
|   | 63b31b059c | ||
|   | 1476b497eb | ||
|   | e399853d0c | ||
|   | fdb205b19e | ||
|   | fbe8053120 | ||
|   | ea783d01e1 | ||
|   | b7d73595dc | ||
|   | e97e53eeed | ||
|   | 342f630dbf | ||
|   | 69c8fb9e5d | ||
|   | 5f0f8013ac | ||
|   | b5368acee8 | ||
|   | f71959fcf5 | ||
|   | 5c9f3b8b16 | ||
|   | bebd6f9308 | ||
|   | 84a2806c16 | ||
|   | d0111a7409 | ||
|   | aab8874c55 | ||
|   | fcf5b01746 | ||
|   | 4de9e9a6db | ||
|   | 0067d6c4be | ||
|   | 2099125333 | ||
|   | b48f147d5a | ||
|   | 4f3e943080 | ||
|   | 7558830fa3 | ||
|   | 867274e997 | ||
|   | 6515778305 | ||
|   | 3b1dfc0f2f | ||
|   | d664de44b7 | ||
|   | bbe99d26ec | ||
|   | 50fc59968e | ||
|   | b8b01bb92a | ||
|   | eb45133451 | ||
|   | 10c0e2d818 | ||
|   | 669f0e7cda | ||
|   | 32fd27ec98 | ||
|   | 0c13f378de | ||
|   | 0049594efb | ||
|   | 113c7d3eb0 | ||
|   | 549371fc99 | ||
|   | 957f27e5bb | ||
|   | 1f8c19767b | ||
|   | a383a98af6 | ||
|   | acd69589a5 | ||
|   | b30b8698ea | ||
|   | f1f25be6db | ||
|   | deab8c1960 | ||
|   | c57f775710 | ||
|   | e75cafe9fb | ||
|   | 33ab8453c4 | ||
|   | ebd3c7b370 | ||
|   | 29645a1d44 | ||
|   | 22d99a801a | ||
|   | 57b8d84cd9 | ||
|   | 65e4ad5bfe | ||
|   | 98b7d476d9 | ||
|   | 201e3c99b9 | ||
|   | 8a7a4a9796 | ||
|   | df297c8794 | ||
|   | 3f53a75f02 | ||
|   | 7c360e3a04 | ||
|   | d2176c8011 | ||
|   | aa92f06308 | ||
|   | e00c9cf599 | ||
|   | ba60a3ebe0 | ||
|   | efb7e11988 | ||
|   | a55c8b7aac | ||
|   | a980bc4324 | ||
|   | 4b10aadffc | ||
|   | 5bec574859 | ||
|   | d11271dd29 | ||
|   | 1d9d26d09b | ||
|   | c0292e8ab7 | ||
|   | f44e5d8b43 | ||
|   | 6ea74538e3 | ||
|   | 24b8924b46 | ||
|   | 86a3c67112 | ||
|   | 8be874370d | ||
|   | aec74dd95a | ||
|   | 6890574256 | ||
|   | d03745c684 | ||
|   | 28746fbd59 | ||
|   | 0321213c11 | ||
|   | 3f0aae4244 | ||
|   | 48099643cc | ||
|   | 621f33c9d0 | ||
|   | f07a9f6f43 | ||
|   | e51880fd32 | ||
|   | 88ce273da4 | ||
|   | b9ba5dfa28 | ||
|   | 4086f11929 | ||
|   | 478c2c6193 | ||
|   | d2d6481afb | ||
|   | 43acb120f3 | ||
|   | e8f2025edf | ||
|   | a4eb9578af | ||
|   | fa35cdad02 | ||
|   | d1b9c912a4 | ||
|   | edec83a025 | ||
|   | c0a7c60815 | ||
|   | 117a7d1944 | ||
|   | a40e0dd434 | ||
|   | 188b086dd9 | ||
|   | 1f27d2c0e1 | ||
|   | 7560096db5 | ||
|   | 282cb9c7ba | ||
|   | 3a9d6790ad | ||
|   | 0610a3e0b2 | ||
|   | 7f9c31df88 | ||
|   | 3fa6b6e293 | ||
|   | 3c50b99ab4 | ||
|   | 52fadd5fb2 | ||
|   | 5367fe7f4d | ||
|   | 427588f6e7 | ||
|   | 51745be312 | ||
|   | d7f1e7c88f | ||
|   | 4145a257be | ||
|   | 525dc9809e | ||
|   | 1bf3210816 | ||
|   | e6c6d10d99 | ||
|   | f270256e06 | ||
|   | f401c6f69f | ||
|   | b075d25bed | ||
|   | 3d1bb6b4dd | ||
|   | 1db2666916 | ||
|   | 8f5c0218d8 | ||
|   | d7666dff82 | ||
|   | 2d4c98dbd1 | ||
|   | fd50bf623c | ||
|   | d360a14678 | ||
|   | d0f2ab6969 | ||
|   | de906ef543 | ||
|   | 2fb3deeca1 | ||
|   | 66398056f1 | ||
|   | 77477fa4c9 | ||
|   | a169e18ce1 | ||
|   | 381640e3ac | ||
|   | 37e3410137 | ||
|   | 97b5196960 | ||
|   | 6a4f3528c8 | ||
|   | b9c76aa1a9 | ||
|   | 0d3070d364 | ||
|   | 7753cadbfa | ||
|   | 3950450342 | ||
|   | c82b1fdad6 | ||
|   | b0fb63abe8 | ||
|   | 3ab34c603e | ||
|   | 7d6413341a | ||
|   | 140012d0f6 | ||
|   | 4be9f8c814 | ||
|   | 5c802bac37 | ||
|   | 6c30ff756a | ||
|   | 62749e4708 | ||
|   | 6b7dee4b38 | ||
|   | ef2041eb4e | ||
|   | 29e3e682af | ||
|   | f983c44199 | ||
|   | e4db19511a | ||
|   | c47d21da80 | ||
|   | 269aecd0c0 | ||
|   | aafddb2b0a | ||
|   | 6262ac8ac5 | ||
|   | 89938c719e | ||
|   | ec0fafbb19 | ||
|   | a5863bdf33 | ||
|   | b58ddb32ba | ||
|   | 784763c565 | ||
|   | 39c68260c0 | ||
|   | 149254d0d5 | ||
|   | 0c14e2fbe3 | 
| @@ -3,6 +3,7 @@ python: | ||||
|   - "2.6" | ||||
|   - "2.7" | ||||
|   - "3.3" | ||||
|   - "3.4" | ||||
| script: nosetests test --verbose | ||||
| notifications: | ||||
|   email: | ||||
|   | ||||
							
								
								
									
										14
									
								
								CHANGELOG
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								CHANGELOG
									
									
									
									
									
								
							| @@ -1,14 +0,0 @@ | ||||
| 2013.01.02  Codename: GIULIA | ||||
|  | ||||
|     * Add support for ComedyCentral clips <nto> | ||||
|     * Corrected Vimeo description fetching <Nick Daniels> | ||||
|     * Added the --no-post-overwrites argument <Barbu Paul - Gheorghe> | ||||
|     * --verbose offers more environment info | ||||
|     * New info_dict field: uploader_id | ||||
|     * New updates system, with signature checking | ||||
|     * New IEs: NBA, JustinTV, FunnyOrDie, TweetReel, Steam, Ustream | ||||
|     * Fixed IEs: BlipTv | ||||
|     * Fixed for Python 3 IEs: Xvideo, Youku, XNXX, Dailymotion, Vimeo, InfoQ | ||||
|     * Simplified IEs and test code | ||||
|     * Various (Python 3 and other) fixes | ||||
|     * Revamped and expanded tests | ||||
							
								
								
									
										8
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,7 +1,7 @@ | ||||
| all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion | ||||
|  | ||||
| clean: | ||||
| 	rm -rf youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz | ||||
|  | ||||
| cleanall: clean | ||||
| 	rm -f youtube-dl youtube-dl.exe | ||||
| @@ -55,7 +55,9 @@ README.txt: README.md | ||||
| 	pandoc -f markdown -t plain README.md -o README.txt | ||||
|  | ||||
| youtube-dl.1: README.md | ||||
| 	pandoc -s -f markdown -t man README.md -o youtube-dl.1 | ||||
| 	python devscripts/prepare_manpage.py >youtube-dl.1.temp.md | ||||
| 	pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 | ||||
| 	rm -f youtube-dl.1.temp.md | ||||
|  | ||||
| youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in | ||||
| 	python devscripts/bash-completion.py | ||||
| @@ -75,6 +77,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- | ||||
| 		--exclude 'docs/_build' \ | ||||
| 		-- \ | ||||
| 		bin devscripts test youtube_dl docs \ | ||||
| 		CHANGELOG LICENSE README.md README.txt \ | ||||
| 		LICENSE README.md README.txt \ | ||||
| 		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \ | ||||
| 		youtube-dl | ||||
|   | ||||
							
								
								
									
										22
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,11 +1,24 @@ | ||||
| % YOUTUBE-DL(1) | ||||
|  | ||||
| # NAME | ||||
| youtube-dl - download videos from youtube.com or other video platforms | ||||
|  | ||||
| # SYNOPSIS | ||||
| **youtube-dl** [OPTIONS] URL [URL...] | ||||
|  | ||||
| # INSTALLATION | ||||
|  | ||||
| To install it right away for all UNIX users (Linux, OS X, etc.), type: | ||||
|  | ||||
|     sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl | ||||
|     sudo chmod a+x /usr/local/bin/youtube-dl | ||||
|  | ||||
| If you do not have curl, you can alternatively use a recent wget: | ||||
|  | ||||
|     sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl | ||||
|     sudo chmod a+x /usr/local/bin/youtube-dl | ||||
|  | ||||
| Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). | ||||
|  | ||||
| Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html . | ||||
|  | ||||
| # DESCRIPTION | ||||
| **youtube-dl** is a small command-line program to download videos from | ||||
| YouTube.com and a few more sites. It requires the Python interpreter, version | ||||
| @@ -250,6 +263,7 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      default | ||||
|     --embed-subs                     embed subtitles in the video (only for mp4 | ||||
|                                      videos) | ||||
|     --embed-thumbnail                embed thumbnail in the audio as cover art | ||||
|     --add-metadata                   write metadata to the video file | ||||
|     --xattrs                         write metadata to the video file's xattrs | ||||
|                                      (using dublin core and xdg standards) | ||||
| @@ -457,7 +471,7 @@ If your report is shorter than two lines, it is almost certainly missing some of | ||||
|  | ||||
| For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. | ||||
|  | ||||
| Site support requests must contain an example URL. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. | ||||
| Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. | ||||
|  | ||||
| ###  Are you using the latest version? | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,7 @@ header = oldreadme[:oldreadme.index('# OPTIONS')] | ||||
| footer = oldreadme[oldreadme.index('# CONFIGURATION'):] | ||||
|  | ||||
| options = helptext[helptext.index('  General Options:') + 19:] | ||||
| options = re.sub(r'^  (\w.+)$', r'## \1', options, flags=re.M) | ||||
| options = re.sub(r'(?m)^  (\w.+)$', r'## \1', options) | ||||
| options = '# OPTIONS\n' + options + '\n' | ||||
|  | ||||
| with io.open(README_FILE, 'w', encoding='utf-8') as f: | ||||
|   | ||||
							
								
								
									
										20
									
								
								devscripts/prepare_manpage.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								devscripts/prepare_manpage.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | ||||
|  | ||||
| import io | ||||
| import os.path | ||||
| import sys | ||||
| import re | ||||
|  | ||||
| ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||||
| README_FILE = os.path.join(ROOT_DIR, 'README.md') | ||||
|  | ||||
| with io.open(README_FILE, encoding='utf-8') as f: | ||||
|     readme = f.read() | ||||
|  | ||||
| PREFIX = '%YOUTUBE-DL(1)\n\n# NAME\n' | ||||
| readme = re.sub(r'(?s)# INSTALLATION.*?(?=# DESCRIPTION)', '', readme) | ||||
| readme = PREFIX + readme | ||||
|  | ||||
| if sys.version_info < (3, 0): | ||||
|     print(readme.encode('utf-8')) | ||||
| else: | ||||
|     print(readme) | ||||
| @@ -45,9 +45,9 @@ fi | ||||
| /bin/echo -e "\n### Changing version in version.py..." | ||||
| sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
|  | ||||
| /bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..." | ||||
| /bin/echo -e "\n### Committing README.md and youtube_dl/version.py..." | ||||
| make README.md | ||||
| git add CHANGELOG README.md youtube_dl/version.py | ||||
| git add README.md youtube_dl/version.py | ||||
| git commit -m "release $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now tagging, signing and pushing..." | ||||
|   | ||||
| @@ -74,13 +74,19 @@ class FakeYDL(YoutubeDL): | ||||
|             old_report_warning(message) | ||||
|         self.report_warning = types.MethodType(report_warning, self) | ||||
|  | ||||
| def gettestcases(): | ||||
|  | ||||
| def gettestcases(include_onlymatching=False): | ||||
|     for ie in youtube_dl.extractor.gen_extractors(): | ||||
|         t = getattr(ie, '_TEST', None) | ||||
|         if t: | ||||
|             t['name'] = type(ie).__name__[:-len('IE')] | ||||
|             yield t | ||||
|         for t in getattr(ie, '_TESTS', []): | ||||
|             assert not hasattr(ie, '_TESTS'), \ | ||||
|                 '%s has _TEST and _TESTS' % type(ie).__name__ | ||||
|             tests = [t] | ||||
|         else: | ||||
|             tests = getattr(ie, '_TESTS', []) | ||||
|         for t in tests: | ||||
|             if not include_onlymatching and t.get('only_matching', False): | ||||
|                 continue | ||||
|             t['name'] = type(ie).__name__[:-len('IE')] | ||||
|             yield t | ||||
|  | ||||
| @@ -101,7 +107,7 @@ def expect_info_dict(self, expected_dict, got_dict): | ||||
|         elif isinstance(expected, type): | ||||
|             got = got_dict.get(info_field) | ||||
|             self.assertTrue(isinstance(got, expected), | ||||
|                 u'Expected type %r, but got value %r of type %r' % (expected, got, type(got))) | ||||
|                 u'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got))) | ||||
|         else: | ||||
|             if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|                 got = 'md5:' + md5(got_dict.get(info_field)) | ||||
| @@ -128,3 +134,17 @@ def expect_info_dict(self, expected_dict, got_dict): | ||||
|             missing_keys, | ||||
|             'Missing keys in test definition: %s' % ( | ||||
|                 ', '.join(sorted(missing_keys)))) | ||||
|  | ||||
|  | ||||
| def assertRegexpMatches(self, text, regexp, msg=None): | ||||
|     if hasattr(self, 'assertRegexpMatches'): | ||||
|         return self.assertRegexpMatches(text, regexp, msg) | ||||
|     else: | ||||
|         m = re.match(regexp, text) | ||||
|         if not m: | ||||
|             note = 'Regexp didn\'t match: %r not found in %r' % (regexp, text) | ||||
|             if msg is None: | ||||
|                 msg = note | ||||
|             else: | ||||
|                 msg = note + ', ' + msg | ||||
|             self.assertTrue(m, msg) | ||||
|   | ||||
| @@ -8,7 +8,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from test.helper import FakeYDL, assertRegexpMatches | ||||
| from youtube_dl import YoutubeDL | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
|  | ||||
| @@ -67,7 +67,7 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['ext'], 'mp4') | ||||
|  | ||||
|         # No prefer_free_formats => prefer mp4 and flv for greater compatibilty | ||||
|         # No prefer_free_formats => prefer mp4 and flv for greater compatibility | ||||
|         ydl = YDL() | ||||
|         ydl.params['prefer_free_formats'] = False | ||||
|         formats = [ | ||||
| @@ -274,6 +274,12 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         # Replace missing fields with 'NA' | ||||
|         self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') | ||||
|  | ||||
|     def test_format_note(self): | ||||
|         ydl = YoutubeDL() | ||||
|         self.assertEqual(ydl._format_note({}), '') | ||||
|         assertRegexpMatches(self, ydl._format_note({ | ||||
|             'vbr': 10, | ||||
|         }), '^\s*10k$') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -13,7 +13,7 @@ from youtube_dl import YoutubeDL | ||||
|  | ||||
|  | ||||
| def _download_restricted(url, filename, age): | ||||
|     """ Returns true iff the file has been downloaded """ | ||||
|     """ Returns true if the file has been downloaded """ | ||||
|  | ||||
|     params = { | ||||
|         'age_limit': age, | ||||
|   | ||||
| @@ -49,6 +49,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) | ||||
|         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) | ||||
|         self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) | ||||
|         self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) | ||||
|  | ||||
|     def test_youtube_channel_matching(self): | ||||
|         assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) | ||||
| @@ -76,20 +77,20 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | ||||
|  | ||||
|     def test_justin_tv_channelid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) | ||||
|         self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv')) | ||||
|         self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv')) | ||||
|         self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv')) | ||||
|         self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv')) | ||||
|         self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv')) | ||||
|         self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv')) | ||||
|         self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/')) | ||||
|         self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/')) | ||||
|  | ||||
|     def test_justintv_videoid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) | ||||
|         self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483')) | ||||
|  | ||||
|     def test_justin_tv_chapterid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) | ||||
|         self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361')) | ||||
|  | ||||
|     def test_youtube_extract(self): | ||||
|         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) | ||||
| @@ -105,7 +106,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
|         for tc in gettestcases(): | ||||
|         for tc in gettestcases(include_onlymatching=True): | ||||
|             url = tc['url'] | ||||
|             for ie in ies: | ||||
|                 if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): | ||||
| @@ -156,6 +157,18 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', | ||||
|             ['ComedyCentralShows']) | ||||
|         self.assertMatch( | ||||
|             'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', | ||||
|             ['ComedyCentralShows']) | ||||
|  | ||||
|     def test_yahoo_https(self): | ||||
|         # https://github.com/rg3/youtube-dl/issues/2701 | ||||
| @@ -163,5 +176,6 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|             'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', | ||||
|             ['Yahoo']) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -10,6 +10,7 @@ import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import ( | ||||
|     assertRegexpMatches, | ||||
|     expect_info_dict, | ||||
|     FakeYDL, | ||||
| ) | ||||
| @@ -22,9 +23,12 @@ from youtube_dl.extractor import ( | ||||
|     VimeoUserIE, | ||||
|     VimeoAlbumIE, | ||||
|     VimeoGroupsIE, | ||||
|     VineUserIE, | ||||
|     UstreamChannelIE, | ||||
|     SoundcloudSetIE, | ||||
|     SoundcloudUserIE, | ||||
|     SoundcloudPlaylistIE, | ||||
|     TeacherTubeClassroomIE, | ||||
|     LivestreamIE, | ||||
|     NHLVideocenterIE, | ||||
|     BambuserChannelIE, | ||||
| @@ -43,6 +47,7 @@ from youtube_dl.extractor import ( | ||||
|     XTubeUserIE, | ||||
|     InstagramUserIE, | ||||
|     CSpanIE, | ||||
|     AolIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -99,13 +104,20 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['title'], 'Rolex Awards for Enterprise') | ||||
|         self.assertTrue(len(result['entries']) > 72) | ||||
|  | ||||
|     def test_vine_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = VineUserIE(dl) | ||||
|         result = ie.extract('https://vine.co/Visa') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertTrue(len(result['entries']) >= 50) | ||||
|  | ||||
|     def test_ustream_channel(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = UstreamChannelIE(dl) | ||||
|         result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') | ||||
|         result = ie.extract('http://www.ustream.tv/channel/channeljapan') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '5124905') | ||||
|         self.assertTrue(len(result['entries']) >= 6) | ||||
|         self.assertEqual(result['id'], '10874166') | ||||
|         self.assertTrue(len(result['entries']) >= 54) | ||||
|  | ||||
|     def test_soundcloud_set(self): | ||||
|         dl = FakeYDL() | ||||
| @@ -123,6 +135,17 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['id'], '9615865') | ||||
|         self.assertTrue(len(result['entries']) >= 12) | ||||
|  | ||||
|     def test_soundcloud_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = SoundcloudPlaylistIE(dl) | ||||
|         result = ie.extract('http://api.soundcloud.com/playlists/4110309') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '4110309') | ||||
|         self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]') | ||||
|         assertRegexpMatches( | ||||
|             self, result['description'], r'TILT Brass - Bowery Poetry Club') | ||||
|         self.assertEqual(len(result['entries']), 6) | ||||
|  | ||||
|     def test_livestream_event(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = LivestreamIE(dl) | ||||
| @@ -187,20 +210,20 @@ class TestPlaylists(unittest.TestCase): | ||||
|     def test_ivi_compilation(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = IviCompilationIE(dl) | ||||
|         result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') | ||||
|         result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'dezhurnyi_angel') | ||||
|         self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') | ||||
|         self.assertTrue(len(result['entries']) >= 36) | ||||
|          | ||||
|         self.assertEqual(result['id'], 'dvoe_iz_lartsa') | ||||
|         self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)') | ||||
|         self.assertTrue(len(result['entries']) >= 24) | ||||
|  | ||||
|     def test_ivi_compilation_season(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = IviCompilationIE(dl) | ||||
|         result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2') | ||||
|         result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'dezhurnyi_angel/season2') | ||||
|         self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон') | ||||
|         self.assertTrue(len(result['entries']) >= 20) | ||||
|         self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1') | ||||
|         self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон') | ||||
|         self.assertTrue(len(result['entries']) >= 12) | ||||
|          | ||||
|     def test_imdb_list(self): | ||||
|         dl = FakeYDL() | ||||
| @@ -327,6 +350,24 @@ class TestPlaylists(unittest.TestCase): | ||||
|         whole_duration = sum(e['duration'] for e in result['entries']) | ||||
|         self.assertEqual(whole_duration, 14855) | ||||
|  | ||||
|     def test_aol_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = AolIE(dl) | ||||
|         result = ie.extract( | ||||
|             'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '152147') | ||||
|         self.assertEqual( | ||||
|             result['title'], 'Brace Yourself - Today\'s Weirdest News') | ||||
|         self.assertTrue(len(result['entries']) >= 10) | ||||
|  | ||||
|     def test_TeacherTubeClassroom(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = TeacherTubeClassroomIE(dl) | ||||
|         result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'rbhagwati2') | ||||
|         self.assertTrue(len(result['entries']) >= 20) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -181,7 +181,7 @@ class TestTedSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 28) | ||||
|         self.assertTrue(len(subtitles.keys()) >= 28) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning(u'Automatic Captions not supported by this server') | ||||
|   | ||||
| @@ -112,11 +112,11 @@ class TestYoutubeLists(unittest.TestCase): | ||||
|     def test_youtube_mix(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = YoutubePlaylistIE(dl) | ||||
|         result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y') | ||||
|         result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') | ||||
|         entries = result['entries'] | ||||
|         self.assertTrue(len(entries) >= 20) | ||||
|         original_video = entries[0] | ||||
|         self.assertEqual(original_video['id'], 'rjFaenf1T-Y') | ||||
|         self.assertEqual(original_video['id'], 'OQpdSVF_k_w') | ||||
|  | ||||
|     def test_youtube_toptracks(self): | ||||
|         print('Skipping: The playlist page gives error 500') | ||||
|   | ||||
							
								
								
									
										116
									
								
								youtube_dl/YoutubeDL.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										116
									
								
								youtube_dl/YoutubeDL.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -31,6 +31,7 @@ from .utils import ( | ||||
|     ContentTooShortError, | ||||
|     date_from_str, | ||||
|     DateRange, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     determine_ext, | ||||
|     DownloadError, | ||||
|     encodeFilename, | ||||
| @@ -287,7 +288,7 @@ class YoutubeDL(object): | ||||
|         return self.to_stdout(message, skip_eol, check_quiet=True) | ||||
|  | ||||
|     def _write_string(self, s, out=None): | ||||
|         write_string(s, out=out, encoding=self.get_encoding()) | ||||
|         write_string(s, out=out, encoding=self.params.get('encoding')) | ||||
|  | ||||
|     def to_stdout(self, message, skip_eol=False, check_quiet=False): | ||||
|         """Print message to stdout if not in quiet mode.""" | ||||
| @@ -440,7 +441,8 @@ class YoutubeDL(object): | ||||
|                                  if v is not None) | ||||
|             template_dict = collections.defaultdict(lambda: 'NA', template_dict) | ||||
|  | ||||
|             tmpl = os.path.expanduser(self.params['outtmpl']) | ||||
|             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|             tmpl = os.path.expanduser(outtmpl) | ||||
|             filename = tmpl % template_dict | ||||
|             return filename | ||||
|         except ValueError as err: | ||||
| @@ -715,6 +717,17 @@ class YoutubeDL(object): | ||||
|             info_dict['playlist'] = None | ||||
|             info_dict['playlist_index'] = None | ||||
|  | ||||
|         thumbnails = info_dict.get('thumbnails') | ||||
|         if thumbnails: | ||||
|             thumbnails.sort(key=lambda t: ( | ||||
|                 t.get('width'), t.get('height'), t.get('url'))) | ||||
|             for t in thumbnails: | ||||
|                 if 'width' in t and 'height' in t: | ||||
|                     t['resolution'] = '%dx%d' % (t['width'], t['height']) | ||||
|  | ||||
|         if thumbnails and 'thumbnail' not in info_dict: | ||||
|             info_dict['thumbnail'] = thumbnails[-1]['url'] | ||||
|  | ||||
|         if 'display_id' not in info_dict and 'id' in info_dict: | ||||
|             info_dict['display_id'] = info_dict['id'] | ||||
|  | ||||
| @@ -936,7 +949,7 @@ class YoutubeDL(object): | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                                 subfile.write(sub) | ||||
|                 except (OSError, IOError): | ||||
|                     self.report_error('Cannot write subtitles file ' + descfn) | ||||
|                     self.report_error('Cannot write subtitles file ' + sub_filename) | ||||
|                     return | ||||
|  | ||||
|         if self.params.get('writeinfojson', False): | ||||
| @@ -1025,10 +1038,11 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def download(self, url_list): | ||||
|         """Download a given list of URLs.""" | ||||
|         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|         if (len(url_list) > 1 and | ||||
|                 '%' not in self.params['outtmpl'] | ||||
|                 '%' not in outtmpl | ||||
|                 and self.params.get('max_downloads') != 1): | ||||
|             raise SameFileError(self.params['outtmpl']) | ||||
|             raise SameFileError(outtmpl) | ||||
|  | ||||
|         for url in url_list: | ||||
|             try: | ||||
| @@ -1139,57 +1153,57 @@ class YoutubeDL(object): | ||||
|             res = default | ||||
|         return res | ||||
|  | ||||
|     def list_formats(self, info_dict): | ||||
|         def format_note(fdict): | ||||
|             res = '' | ||||
|             if fdict.get('ext') in ['f4f', 'f4m']: | ||||
|                 res += '(unsupported) ' | ||||
|             if fdict.get('format_note') is not None: | ||||
|                 res += fdict['format_note'] + ' ' | ||||
|             if fdict.get('tbr') is not None: | ||||
|                 res += '%4dk ' % fdict['tbr'] | ||||
|             if fdict.get('container') is not None: | ||||
|                 if res: | ||||
|                     res += ', ' | ||||
|                 res += '%s container' % fdict['container'] | ||||
|             if (fdict.get('vcodec') is not None and | ||||
|                     fdict.get('vcodec') != 'none'): | ||||
|                 if res: | ||||
|                     res += ', ' | ||||
|                 res += fdict['vcodec'] | ||||
|                 if fdict.get('vbr') is not None: | ||||
|                     res += '@' | ||||
|             elif fdict.get('vbr') is not None and fdict.get('abr') is not None: | ||||
|                 res += 'video@' | ||||
|     def _format_note(self, fdict): | ||||
|         res = '' | ||||
|         if fdict.get('ext') in ['f4f', 'f4m']: | ||||
|             res += '(unsupported) ' | ||||
|         if fdict.get('format_note') is not None: | ||||
|             res += fdict['format_note'] + ' ' | ||||
|         if fdict.get('tbr') is not None: | ||||
|             res += '%4dk ' % fdict['tbr'] | ||||
|         if fdict.get('container') is not None: | ||||
|             if res: | ||||
|                 res += ', ' | ||||
|             res += '%s container' % fdict['container'] | ||||
|         if (fdict.get('vcodec') is not None and | ||||
|                 fdict.get('vcodec') != 'none'): | ||||
|             if res: | ||||
|                 res += ', ' | ||||
|             res += fdict['vcodec'] | ||||
|             if fdict.get('vbr') is not None: | ||||
|                 res += '%4dk' % fdict['vbr'] | ||||
|             if fdict.get('acodec') is not None: | ||||
|                 if res: | ||||
|                     res += ', ' | ||||
|                 if fdict['acodec'] == 'none': | ||||
|                     res += 'video only' | ||||
|                 else: | ||||
|                     res += '%-5s' % fdict['acodec'] | ||||
|             elif fdict.get('abr') is not None: | ||||
|                 if res: | ||||
|                     res += ', ' | ||||
|                 res += 'audio' | ||||
|             if fdict.get('abr') is not None: | ||||
|                 res += '@%3dk' % fdict['abr'] | ||||
|             if fdict.get('asr') is not None: | ||||
|                 res += ' (%5dHz)' % fdict['asr'] | ||||
|             if fdict.get('filesize') is not None: | ||||
|                 if res: | ||||
|                     res += ', ' | ||||
|                 res += format_bytes(fdict['filesize']) | ||||
|             return res | ||||
|                 res += '@' | ||||
|         elif fdict.get('vbr') is not None and fdict.get('abr') is not None: | ||||
|             res += 'video@' | ||||
|         if fdict.get('vbr') is not None: | ||||
|             res += '%4dk' % fdict['vbr'] | ||||
|         if fdict.get('acodec') is not None: | ||||
|             if res: | ||||
|                 res += ', ' | ||||
|             if fdict['acodec'] == 'none': | ||||
|                 res += 'video only' | ||||
|             else: | ||||
|                 res += '%-5s' % fdict['acodec'] | ||||
|         elif fdict.get('abr') is not None: | ||||
|             if res: | ||||
|                 res += ', ' | ||||
|             res += 'audio' | ||||
|         if fdict.get('abr') is not None: | ||||
|             res += '@%3dk' % fdict['abr'] | ||||
|         if fdict.get('asr') is not None: | ||||
|             res += ' (%5dHz)' % fdict['asr'] | ||||
|         if fdict.get('filesize') is not None: | ||||
|             if res: | ||||
|                 res += ', ' | ||||
|             res += format_bytes(fdict['filesize']) | ||||
|         return res | ||||
|  | ||||
|     def list_formats(self, info_dict): | ||||
|         def line(format, idlen=20): | ||||
|             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( | ||||
|                 format['format_id'], | ||||
|                 format['ext'], | ||||
|                 self.format_resolution(format), | ||||
|                 format_note(format), | ||||
|                 self._format_note(format), | ||||
|             )) | ||||
|  | ||||
|         formats = info_dict.get('formats', [info_dict]) | ||||
| @@ -1197,8 +1211,8 @@ class YoutubeDL(object): | ||||
|                     max(len(f['format_id']) for f in formats)) | ||||
|         formats_s = [line(f, idlen) for f in formats] | ||||
|         if len(formats) > 1: | ||||
|             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)' | ||||
|             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)' | ||||
|             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' | ||||
|             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' | ||||
|  | ||||
|         header_line = line({ | ||||
|             'format_id': 'format code', 'ext': 'extension', | ||||
|   | ||||
| @@ -53,6 +53,12 @@ __authors__  = ( | ||||
|     'Mattias Harrysson', | ||||
|     'phaer', | ||||
|     'Sainyam Kapoor', | ||||
|     'Nicolas Évrard', | ||||
|     'Jason Normore', | ||||
|     'Hoje Lee', | ||||
|     'Adam Thalhammer', | ||||
|     'Georg Jähnig', | ||||
|     'Ralf Haring', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -72,6 +78,7 @@ from .utils import ( | ||||
|     compat_getpass, | ||||
|     compat_print, | ||||
|     DateRange, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     decodeOption, | ||||
|     get_term_width, | ||||
|     DownloadError, | ||||
| @@ -92,6 +99,8 @@ from .extractor import gen_extractors | ||||
| from .version import __version__ | ||||
| from .YoutubeDL import YoutubeDL | ||||
| from .postprocessor import ( | ||||
|     AtomicParsleyPP, | ||||
|     FFmpegAudioFixPP, | ||||
|     FFmpegMetadataPP, | ||||
|     FFmpegVideoConvertor, | ||||
|     FFmpegExtractAudioPP, | ||||
| @@ -503,6 +512,8 @@ def parseOpts(overrideArguments=None): | ||||
|             help='do not overwrite post-processed files; the post-processed files are overwritten by default') | ||||
|     postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, | ||||
|             help='embed subtitles in the video (only for mp4 videos)') | ||||
|     postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, | ||||
|             help='embed thumbnail in the audio as cover art') | ||||
|     postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, | ||||
|             help='write metadata to the video file') | ||||
|     postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False, | ||||
| @@ -672,7 +683,7 @@ def _real_main(argv=None): | ||||
|         if not opts.audioquality.isdigit(): | ||||
|             parser.error(u'invalid audio quality specified') | ||||
|     if opts.recodevideo is not None: | ||||
|         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']: | ||||
|         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']: | ||||
|             parser.error(u'invalid video recode format specified') | ||||
|     if opts.date is not None: | ||||
|         date = DateRange.day(opts.date) | ||||
| @@ -701,7 +712,7 @@ def _real_main(argv=None): | ||||
|             or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s') | ||||
|             or (opts.useid and u'%(id)s.%(ext)s') | ||||
|             or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') | ||||
|             or u'%(title)s-%(id)s.%(ext)s') | ||||
|             or DEFAULT_OUTTMPL) | ||||
|     if not os.path.splitext(outtmpl)[1] and opts.extractaudio: | ||||
|         parser.error(u'Cannot download a video and extract audio into the same' | ||||
|                      u' file! Use "{0}.%(ext)s" instead of "{0}" as the output' | ||||
| @@ -808,6 +819,10 @@ def _real_main(argv=None): | ||||
|             ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) | ||||
|         if opts.xattrs: | ||||
|             ydl.add_post_processor(XAttrMetadataPP()) | ||||
|         if opts.embedthumbnail: | ||||
|             if not opts.addmetadata: | ||||
|                 ydl.add_post_processor(FFmpegAudioFixPP()) | ||||
|             ydl.add_post_processor(AtomicParsleyPP()) | ||||
|  | ||||
|         # Update version | ||||
|         if opts.update_self: | ||||
|   | ||||
| @@ -25,7 +25,7 @@ class HlsFD(FileDownloader): | ||||
|             except (OSError, IOError): | ||||
|                 pass | ||||
|         else: | ||||
|             self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found') | ||||
|             self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.') | ||||
|         cmd = [program] + args | ||||
|  | ||||
|         retval = subprocess.call(cmd) | ||||
|   | ||||
| @@ -14,6 +14,8 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class HttpFD(FileDownloader): | ||||
|     _TEST_FILE_SIZE = 10241 | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|         tmpfilename = self.temp_name(filename) | ||||
| @@ -28,8 +30,10 @@ class HttpFD(FileDownloader): | ||||
|         basic_request = compat_urllib_request.Request(url, None, headers) | ||||
|         request = compat_urllib_request.Request(url, None, headers) | ||||
|  | ||||
|         if self.params.get('test', False): | ||||
|             request.add_header('Range', 'bytes=0-10240') | ||||
|         is_test = self.params.get('test', False) | ||||
|  | ||||
|         if is_test: | ||||
|             request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1)) | ||||
|  | ||||
|         # Establish possible resume length | ||||
|         if os.path.isfile(encodeFilename(tmpfilename)): | ||||
| @@ -100,6 +104,15 @@ class HttpFD(FileDownloader): | ||||
|             return False | ||||
|  | ||||
|         data_len = data.info().get('Content-length', None) | ||||
|  | ||||
|         # Range HTTP header may be ignored/unsupported by a webserver | ||||
|         # (e.g. extractor/scivee.py, extractor/bambuser.py). | ||||
|         # However, for a test we still would like to download just a piece of a file. | ||||
|         # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control | ||||
|         # block size when downloading a file. | ||||
|         if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): | ||||
|             data_len = self._TEST_FILE_SIZE | ||||
|  | ||||
|         if data_len is not None: | ||||
|             data_len = int(data_len) + resume_len | ||||
|             min_data_len = self.params.get("min_filesize", None) | ||||
| @@ -118,7 +131,7 @@ class HttpFD(FileDownloader): | ||||
|         while True: | ||||
|             # Download and write | ||||
|             before = time.time() | ||||
|             data_block = data.read(block_size) | ||||
|             data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) | ||||
|             after = time.time() | ||||
|             if len(data_block) == 0: | ||||
|                 break | ||||
| @@ -162,6 +175,9 @@ class HttpFD(FileDownloader): | ||||
|                 'speed': speed, | ||||
|             }) | ||||
|  | ||||
|             if is_test and byte_counter == data_len: | ||||
|                 break | ||||
|  | ||||
|             # Apply rate limit | ||||
|             self.slow_down(start, byte_counter - resume_len) | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,7 @@ from .common import FileDownloader | ||||
| from ..utils import ( | ||||
|     encodeFilename, | ||||
|     format_bytes, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -95,6 +96,7 @@ class RtmpFD(FileDownloader): | ||||
|         flash_version = info_dict.get('flash_version', None) | ||||
|         live = info_dict.get('rtmp_live', False) | ||||
|         conn = info_dict.get('rtmp_conn', None) | ||||
|         protocol = info_dict.get('rtmp_protocol', None) | ||||
|  | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
| @@ -104,7 +106,7 @@ class RtmpFD(FileDownloader): | ||||
|         try: | ||||
|             subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
|         except (OSError, IOError): | ||||
|             self.report_error('RTMP download detected but "rtmpdump" could not be run') | ||||
|             self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.') | ||||
|             return False | ||||
|  | ||||
|         # Download using rtmpdump. rtmpdump returns exit code 2 when | ||||
| @@ -127,8 +129,13 @@ class RtmpFD(FileDownloader): | ||||
|             basic_args += ['--flashVer', flash_version] | ||||
|         if live: | ||||
|             basic_args += ['--live'] | ||||
|         if conn: | ||||
|         if isinstance(conn, list): | ||||
|             for entry in conn: | ||||
|                 basic_args += ['--conn', entry] | ||||
|         elif isinstance(conn, compat_str): | ||||
|             basic_args += ['--conn', conn] | ||||
|         if protocol is not None: | ||||
|             basic_args += ['--protocol', protocol] | ||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)] | ||||
|  | ||||
|         if sys.platform == 'win32' and sys.version_info < (3, 0): | ||||
|   | ||||
| @@ -20,6 +20,7 @@ from .auengine import AUEngineIE | ||||
| from .bambuser import BambuserIE, BambuserChannelIE | ||||
| from .bandcamp import BandcampIE, BandcampAlbumIE | ||||
| from .bbccouk import BBCCoUkIE | ||||
| from .bilibili import BiliBiliIE | ||||
| from .blinkx import BlinkxIE | ||||
| from .bliptv import BlipTVIE, BlipTVUserIE | ||||
| from .bloomberg import BloombergIE | ||||
| @@ -40,6 +41,7 @@ from .cinemassacre import CinemassacreIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| from .clipsyndicate import ClipsyndicateIE | ||||
| from .clubic import ClubicIE | ||||
| from .cmt import CMTIE | ||||
| from .cnet import CNETIE | ||||
| from .cnn import ( | ||||
| @@ -70,6 +72,7 @@ from .ehow import EHowIE | ||||
| from .eighttracks import EightTracksIE | ||||
| from .eitb import EitbIE | ||||
| from .elpais import ElPaisIE | ||||
| from .empflix import EmpflixIE | ||||
| from .engadget import EngadgetIE | ||||
| from .escapist import EscapistIE | ||||
| from .everyonesmixtape import EveryonesMixtapeIE | ||||
| @@ -77,6 +80,7 @@ from .exfm import ExfmIE | ||||
| from .extremetube import ExtremeTubeIE | ||||
| from .facebook import FacebookIE | ||||
| from .faz import FazIE | ||||
| from .fc2 import FC2IE | ||||
| from .firstpost import FirstpostIE | ||||
| from .firsttv import FirstTVIE | ||||
| from .fivemin import FiveMinIE | ||||
| @@ -105,12 +109,15 @@ from .gdcvault import GDCVaultIE | ||||
| from .generic import GenericIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlesearch import GoogleSearchIE | ||||
| from .gorillavid import GorillaVidIE | ||||
| from .hark import HarkIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| from .hotnewhiphop import HotNewHipHopIE | ||||
| from .howcast import HowcastIE | ||||
| from .huffpost import HuffPostIE | ||||
| from .hypem import HypemIE | ||||
| from .iconosquare import IconosquareIE | ||||
| from .ign import IGNIE, OneUPIE | ||||
| from .imdb import ( | ||||
|     ImdbIE, | ||||
| @@ -136,6 +143,7 @@ from .khanacademy import KhanAcademyIE | ||||
| from .kickstarter import KickStarterIE | ||||
| from .keek import KeekIE | ||||
| from .kontrtube import KontrTubeIE | ||||
| from .ku6 import Ku6IE | ||||
| from .la7 import LA7IE | ||||
| from .lifenews import LifeNewsIE | ||||
| from .liveleak import LiveLeakIE | ||||
| @@ -158,9 +166,11 @@ from .mofosex import MofosexIE | ||||
| from .mooshare import MooshareIE | ||||
| from .morningstar import MorningstarIE | ||||
| from .motorsport import MotorsportIE | ||||
| from .moviezine import MoviezineIE | ||||
| from .movshare import MovShareIE | ||||
| from .mtv import ( | ||||
|     MTVIE, | ||||
|     MTVServicesEmbeddedIE, | ||||
|     MTVIggyIE, | ||||
| ) | ||||
| from .musicplayon import MusicPlayOnIE | ||||
| @@ -177,15 +187,23 @@ from .nbc import ( | ||||
| from .ndr import NDRIE | ||||
| from .ndtv import NDTVIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .newstube import NewstubeIE | ||||
| from .nfb import NFBIE | ||||
| from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .niconico import NiconicoIE | ||||
| from .ninegag import NineGagIE | ||||
| from .noco import NocoIE | ||||
| from .normalboots import NormalbootsIE | ||||
| from .novamov import NovaMovIE | ||||
| from .nowness import NownessIE | ||||
| from .nowvideo import NowVideoIE | ||||
| from .nrk import ( | ||||
|     NRKIE, | ||||
|     NRKTVIE, | ||||
| ) | ||||
| from .ntv import NTVIE | ||||
| from .nytimes import NYTimesIE | ||||
| from .nuvid import NuvidIE | ||||
| from .oe1 import OE1IE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| @@ -200,12 +218,14 @@ from .pornotube import PornotubeIE | ||||
| from .prosiebensat1 import ProSiebenSat1IE | ||||
| from .pyvideo import PyvideoIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| from .rai import RaiIE | ||||
| from .rbmaradio import RBMARadioIE | ||||
| from .redtube import RedTubeIE | ||||
| from .ringtv import RingTVIE | ||||
| from .ro220 import Ro220IE | ||||
| from .rottentomatoes import RottenTomatoesIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtbf import RTBFIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .rts import RTSIE | ||||
| from .rtve import RTVEALaCartaIE | ||||
| @@ -217,9 +237,11 @@ from .rutube import ( | ||||
| ) | ||||
| from .rutv import RUTVIE | ||||
| from .savefrom import SaveFromIE | ||||
| from .scivee import SciVeeIE | ||||
| from .servingsys import ServingSysIE | ||||
| from .sina import SinaIE | ||||
| from .slideshare import SlideshareIE | ||||
| from .slutload import SlutloadIE | ||||
| from .smotri import ( | ||||
|     SmotriIE, | ||||
|     SmotriCommunityIE, | ||||
| @@ -227,7 +249,12 @@ from .smotri import ( | ||||
|     SmotriBroadcastIE, | ||||
| ) | ||||
| from .sohu import SohuIE | ||||
| from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE | ||||
| from .soundcloud import ( | ||||
|     SoundcloudIE, | ||||
|     SoundcloudSetIE, | ||||
|     SoundcloudUserIE, | ||||
|     SoundcloudPlaylistIE | ||||
| ) | ||||
| from .southparkstudios import ( | ||||
|     SouthParkStudiosIE, | ||||
|     SouthparkDeIE, | ||||
| @@ -235,14 +262,21 @@ from .southparkstudios import ( | ||||
| from .space import SpaceIE | ||||
| from .spankwire import SpankwireIE | ||||
| from .spiegel import SpiegelIE | ||||
| from .spiegeltv import SpiegeltvIE | ||||
| from .spike import SpikeIE | ||||
| from .stanfordoc import StanfordOpenClassroomIE | ||||
| from .statigram import StatigramIE | ||||
| from .steam import SteamIE | ||||
| from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .swrmediathek import SWRMediathekIE | ||||
| from .syfy import SyfyIE | ||||
| from .sztvhu import SztvHuIE | ||||
| from .tagesschau import TagesschauIE | ||||
| from .teachertube import ( | ||||
|     TeacherTubeIE, | ||||
|     TeacherTubeClassroomIE, | ||||
| ) | ||||
| from .teachingchannel import TeachingChannelIE | ||||
| from .teamcoco import TeamcocoIE | ||||
| from .techtalks import TechTalksIE | ||||
| from .ted import TEDIE | ||||
| @@ -251,6 +285,7 @@ from .tf1 import TF1IE | ||||
| from .theplatform import ThePlatformIE | ||||
| from .thisav import ThisAVIE | ||||
| from .tinypic import TinyPicIE | ||||
| from .tlc import TlcIE, TlcDeIE | ||||
| from .toutv import TouTvIE | ||||
| from .toypics import ToypicsUserIE, ToypicsIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| @@ -274,12 +309,14 @@ from .veehd import VeeHDIE | ||||
| from .veoh import VeohIE | ||||
| from .vesti import VestiIE | ||||
| from .vevo import VevoIE | ||||
| from .vh1 import VH1IE | ||||
| from .viddler import ViddlerIE | ||||
| from .videobam import VideoBamIE | ||||
| from .videodetective import VideoDetectiveIE | ||||
| from .videolecturesnet import VideoLecturesNetIE | ||||
| from .videofyme import VideofyMeIE | ||||
| from .videopremium import VideoPremiumIE | ||||
| from .videott import VideoTtIE | ||||
| from .videoweed import VideoWeedIE | ||||
| from .vimeo import ( | ||||
|     VimeoIE, | ||||
| @@ -288,21 +325,29 @@ from .vimeo import ( | ||||
|     VimeoAlbumIE, | ||||
|     VimeoGroupsIE, | ||||
|     VimeoReviewIE, | ||||
|     VimeoWatchLaterIE, | ||||
| ) | ||||
| from .vine import ( | ||||
|     VineIE, | ||||
|     VineUserIE, | ||||
| ) | ||||
| from .vine import VineIE | ||||
| from .viki import VikiIE | ||||
| from .vk import VKIE | ||||
| from .vube import VubeIE | ||||
| from .vuclip import VuClipIE | ||||
| from .vulture import VultureIE | ||||
| from .washingtonpost import WashingtonPostIE | ||||
| from .wat import WatIE | ||||
| from .wdr import ( | ||||
|     WDRIE, | ||||
|     WDRMobileIE, | ||||
|     WDRMausIE, | ||||
| ) | ||||
| from .weibo import WeiboIE | ||||
| from .wimp import WimpIE | ||||
| from .wistia import WistiaIE | ||||
| from .worldstarhiphop import WorldStarHipHopIE | ||||
| from .wrzuta import WrzutaIE | ||||
| from .xbef import XBefIE | ||||
| from .xhamster import XHamsterIE | ||||
| from .xnxx import XNXXIE | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -16,6 +15,7 @@ class AftonbladetIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', | ||||
|             'description': 'Jupiters måne mest aktiv av alla himlakroppar', | ||||
|             'timestamp': 1394142732, | ||||
|             'upload_date': '20140306', | ||||
|         }, | ||||
|     } | ||||
| @@ -27,17 +27,17 @@ class AftonbladetIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # find internal video meta data | ||||
|         META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' | ||||
|         meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' | ||||
|         internal_meta_id = self._html_search_regex( | ||||
|             r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id') | ||||
|         internal_meta_url = META_URL % internal_meta_id | ||||
|         internal_meta_url = meta_url % internal_meta_id | ||||
|         internal_meta_json = self._download_json( | ||||
|             internal_meta_url, video_id, 'Downloading video meta data') | ||||
|  | ||||
|         # find internal video formats | ||||
|         FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' | ||||
|         format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' | ||||
|         internal_video_id = internal_meta_json['videoId'] | ||||
|         internal_formats_url = FORMATS_URL % internal_video_id | ||||
|         internal_formats_url = format_url % internal_video_id | ||||
|         internal_formats_json = self._download_json( | ||||
|             internal_formats_url, video_id, 'Downloading video formats') | ||||
|  | ||||
| @@ -54,16 +54,13 @@ class AftonbladetIE(InfoExtractor): | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished']) | ||||
|         upload_date = timestamp.strftime('%Y%m%d') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': internal_meta_json['title'], | ||||
|             'formats': formats, | ||||
|             'thumbnail': internal_meta_json['imageUrl'], | ||||
|             'description': internal_meta_json['shortPreamble'], | ||||
|             'upload_date': upload_date, | ||||
|             'timestamp': internal_meta_json['timePublished'], | ||||
|             'duration': internal_meta_json['duration'], | ||||
|             'view_count': internal_meta_json['views'], | ||||
|         } | ||||
|   | ||||
| @@ -8,7 +8,18 @@ from .fivemin import FiveMinIE | ||||
|  | ||||
| class AolIE(InfoExtractor): | ||||
|     IE_NAME = 'on.aol.com' | ||||
|     _VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?: | ||||
|             aol-video:| | ||||
|             http://on\.aol\.com/ | ||||
|             (?: | ||||
|                 video/.*-| | ||||
|                 playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid= | ||||
|             ) | ||||
|         ) | ||||
|         (?P<id>[0-9]+) | ||||
|         (?:$|\?) | ||||
|     ''' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', | ||||
| @@ -24,5 +35,31 @@ class AolIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         self.to_screen('Downloading 5min.com video %s' % video_id) | ||||
|  | ||||
|         playlist_id = mobj.group('playlist_id') | ||||
|         if playlist_id and not self._downloader.params.get('noplaylist'): | ||||
|             self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|  | ||||
|             webpage = self._download_webpage(url, playlist_id) | ||||
|             title = self._html_search_regex( | ||||
|                 r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title') | ||||
|             playlist_html = self._search_regex( | ||||
|                 r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage, | ||||
|                 'playlist HTML') | ||||
|             entries = [{ | ||||
|                 '_type': 'url', | ||||
|                 'url': 'aol-video:%s' % m.group('id'), | ||||
|                 'ie_key': 'Aol', | ||||
|             } for m in re.finditer( | ||||
|                 r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>", | ||||
|                 playlist_html)] | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'playlist', | ||||
|                 'id': playlist_id, | ||||
|                 'display_id': mobj.group('playlist_display_id'), | ||||
|                 'title': title, | ||||
|                 'entries': entries, | ||||
|             } | ||||
|  | ||||
|         return FiveMinIE._build_result(video_id) | ||||
|   | ||||
| @@ -38,37 +38,43 @@ class ARDIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title') | ||||
|             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', | ||||
|              r'<meta name="dcterms.title" content="(.*?)"/>', | ||||
|              r'<h4 class="headline">(.*?)</h4>'], | ||||
|             webpage, 'title') | ||||
|         description = self._html_search_meta( | ||||
|             'dcterms.abstract', webpage, 'description') | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         streams = [ | ||||
|             mo.groupdict() | ||||
|             for mo in re.finditer( | ||||
|                 r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)] | ||||
|  | ||||
|         media_info = self._download_json( | ||||
|             'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) | ||||
|         # The second element of the _mediaArray contains the standard http urls | ||||
|         streams = media_info['_mediaArray'][1]['_mediaStreamArray'] | ||||
|         if not streams: | ||||
|             if '"fsk"' in webpage: | ||||
|                 raise ExtractorError('This video is only available after 20:00') | ||||
|  | ||||
|         formats = [] | ||||
|         for s in streams: | ||||
|             format = { | ||||
|                 'quality': int(s['quality']), | ||||
|             } | ||||
|             if s.get('rtmp_url'): | ||||
|                 format['protocol'] = 'rtmp' | ||||
|                 format['url'] = s['rtmp_url'] | ||||
|                 format['playpath'] = s['video_url'] | ||||
|             else: | ||||
|                 format['url'] = s['video_url'] | ||||
|  | ||||
|             quality_name = self._search_regex( | ||||
|                 r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'], | ||||
|                 'quality name', default='NA') | ||||
|             format['format_id'] = '%s-%s-%s-%s' % ( | ||||
|                 determine_ext(format['url']), quality_name, s['media_type'], | ||||
|                 s['quality']) | ||||
|         for s in streams: | ||||
|             if type(s['_stream']) == list: | ||||
|                 for index, url in enumerate(s['_stream'][::-1]): | ||||
|                     quality = s['_quality'] + index | ||||
|                     formats.append({ | ||||
|                         'quality': quality, | ||||
|                         'url': url, | ||||
|                         'format_id': '%s-%s' % (determine_ext(url), quality) | ||||
|                         }) | ||||
|                 continue | ||||
|  | ||||
|             format = { | ||||
|                 'quality': s['_quality'], | ||||
|                 'url': s['_stream'], | ||||
|             } | ||||
|  | ||||
|             format['format_id'] = '%s-%s' % ( | ||||
|                 determine_ext(format['url']), format['quality']) | ||||
|  | ||||
|             formats.append(format) | ||||
|  | ||||
|   | ||||
| @@ -74,7 +74,8 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|         return self._extract_from_webpage(webpage, video_id, lang) | ||||
|  | ||||
|     def _extract_from_webpage(self, webpage, video_id, lang): | ||||
|         json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') | ||||
|         json_url = self._html_search_regex( | ||||
|             r'arte_vp_url="(.*?)"', webpage, 'json vp url') | ||||
|         return self._extract_from_json_url(json_url, video_id, lang) | ||||
|  | ||||
|     def _extract_from_json_url(self, json_url, video_id, lang): | ||||
| @@ -120,14 +121,17 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|                 return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) | ||||
|         else: | ||||
|             def sort_key(f): | ||||
|                 versionCode = f.get('versionCode') | ||||
|                 if versionCode is None: | ||||
|                     versionCode = '' | ||||
|                 return ( | ||||
|                     # Sort first by quality | ||||
|                     int(f.get('height',-1)), | ||||
|                     int(f.get('bitrate',-1)), | ||||
|                     int(f.get('height', -1)), | ||||
|                     int(f.get('bitrate', -1)), | ||||
|                     # The original version with subtitles has lower relevance | ||||
|                     re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None, | ||||
|                     re.match(r'VO-ST(F|A)', versionCode) is None, | ||||
|                     # The version with sourds/mal subtitles has also lower relevance | ||||
|                     re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None, | ||||
|                     re.match(r'VO?(F|A)-STM\1', versionCode) is None, | ||||
|                     # Prefer http downloads over m3u8 | ||||
|                     0 if f['url'].endswith('m3u8') else 1, | ||||
|                 ) | ||||
|   | ||||
| @@ -12,14 +12,14 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class BandcampIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' | ||||
|     _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', | ||||
|         'file': '1812978515.mp3', | ||||
|         'md5': 'c557841d5e50261777a6585648adf439', | ||||
|         'info_dict': { | ||||
|             "title": "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", | ||||
|             "duration": 10, | ||||
|             "duration": 9.8485, | ||||
|         }, | ||||
|         '_skip': 'There is a limit of 200 free downloads / month for the test song' | ||||
|     }] | ||||
| @@ -28,36 +28,32 @@ class BandcampIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         # We get the link to the free download page | ||||
|         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) | ||||
|         if m_download is None: | ||||
|         if not m_download: | ||||
|             m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) | ||||
|             if m_trackinfo: | ||||
|                 json_code = m_trackinfo.group(1) | ||||
|                 data = json.loads(json_code) | ||||
|                 d = data[0] | ||||
|                 data = json.loads(json_code)[0] | ||||
|  | ||||
|                 duration = int(round(d['duration'])) | ||||
|                 formats = [] | ||||
|                 for format_id, format_url in d['file'].items(): | ||||
|                     ext, _, abr_str = format_id.partition('-') | ||||
|  | ||||
|                 for format_id, format_url in data['file'].items(): | ||||
|                     ext, abr_str = format_id.split('-', 1) | ||||
|                     formats.append({ | ||||
|                         'format_id': format_id, | ||||
|                         'url': format_url, | ||||
|                         'ext': format_id.partition('-')[0], | ||||
|                         'ext': ext, | ||||
|                         'vcodec': 'none', | ||||
|                         'acodec': format_id.partition('-')[0], | ||||
|                         'abr': int(format_id.partition('-')[2]), | ||||
|                         'acodec': ext, | ||||
|                         'abr': int(abr_str), | ||||
|                     }) | ||||
|  | ||||
|                 self._sort_formats(formats) | ||||
|  | ||||
|                 return { | ||||
|                     'id': compat_str(d['id']), | ||||
|                     'title': d['title'], | ||||
|                     'id': compat_str(data['id']), | ||||
|                     'title': data['title'], | ||||
|                     'formats': formats, | ||||
|                     'duration': duration, | ||||
|                     'duration': float(data['duration']), | ||||
|                 } | ||||
|             else: | ||||
|                 raise ExtractorError('No free songs found') | ||||
| @@ -67,11 +63,9 @@ class BandcampIE(InfoExtractor): | ||||
|             r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', | ||||
|             webpage, re.MULTILINE | re.DOTALL).group('id') | ||||
|  | ||||
|         download_webpage = self._download_webpage(download_link, video_id, | ||||
|                                                   'Downloading free downloads page') | ||||
|         # We get the dictionary of the track from some javascrip code | ||||
|         info = re.search(r'items: (.*?),$', | ||||
|                          download_webpage, re.MULTILINE).group(1) | ||||
|         download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') | ||||
|         # We get the dictionary of the track from some javascript code | ||||
|         info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1) | ||||
|         info = json.loads(info)[0] | ||||
|         # We pick mp3-320 for now, until format selection can be easily implemented. | ||||
|         mp3_info = info['downloads']['mp3-320'] | ||||
| @@ -100,7 +94,7 @@ class BandcampIE(InfoExtractor): | ||||
|  | ||||
| class BandcampAlbumIE(InfoExtractor): | ||||
|     IE_NAME = 'Bandcamp:album' | ||||
|     _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' | ||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', | ||||
| @@ -123,13 +117,15 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'playlistend': 2 | ||||
|         }, | ||||
|         'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' | ||||
|         'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('subdomain') | ||||
|         title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         display_id = title or playlist_id | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) | ||||
|         if not tracks_paths: | ||||
|             raise ExtractorError('The page doesn\'t contain any tracks') | ||||
| @@ -139,6 +135,8 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|         title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title') | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										106
									
								
								youtube_dl/extractor/bilibili.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								youtube_dl/extractor/bilibili.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,106 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_parse_qs, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BiliBiliIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bilibili.tv/video/av1074402/', | ||||
|         'md5': '2c301e4dab317596e837c3e7633e7d86', | ||||
|         'info_dict': { | ||||
|             'id': '1074402', | ||||
|             'ext': 'flv', | ||||
|             'title': '【金坷垃】金泡沫', | ||||
|             'duration': 308, | ||||
|             'upload_date': '20140420', | ||||
|             'thumbnail': 're:^https?://.+\.jpg', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_code = self._search_regex( | ||||
|             r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code') | ||||
|  | ||||
|         title = self._html_search_meta( | ||||
|             'media:title', video_code, 'title', fatal=True) | ||||
|         duration_str = self._html_search_meta( | ||||
|             'duration', video_code, 'duration') | ||||
|         if duration_str is None: | ||||
|             duration = None | ||||
|         else: | ||||
|             duration_mobj = re.match( | ||||
|                 r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$', | ||||
|                 duration_str) | ||||
|             duration = ( | ||||
|                 int_or_none(duration_mobj.group('hours'), default=0) * 3600 + | ||||
|                 int(duration_mobj.group('minutes')) * 60 + | ||||
|                 int(duration_mobj.group('seconds'))) | ||||
|         upload_date = unified_strdate(self._html_search_meta( | ||||
|             'uploadDate', video_code, fatal=False)) | ||||
|         thumbnail = self._html_search_meta( | ||||
|             'thumbnailUrl', video_code, 'thumbnail', fatal=False) | ||||
|  | ||||
|         player_params = compat_parse_qs(self._html_search_regex( | ||||
|             r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"', | ||||
|             webpage, 'player params')) | ||||
|  | ||||
|         if 'cid' in player_params: | ||||
|             cid = player_params['cid'][0] | ||||
|  | ||||
|             lq_doc = self._download_xml( | ||||
|                 'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid, | ||||
|                 video_id, | ||||
|                 note='Downloading LQ video info' | ||||
|             ) | ||||
|             lq_durl = lq_doc.find('.//durl') | ||||
|             formats = [{ | ||||
|                 'format_id': 'lq', | ||||
|                 'quality': 1, | ||||
|                 'url': lq_durl.find('./url').text, | ||||
|                 'filesize': int_or_none( | ||||
|                     lq_durl.find('./size'), get_attr='text'), | ||||
|             }] | ||||
|  | ||||
|             hq_doc = self._download_xml( | ||||
|                 'http://interface.bilibili.cn/playurl?cid=%s' % cid, | ||||
|                 video_id, | ||||
|                 note='Downloading HQ video info', | ||||
|                 fatal=False, | ||||
|             ) | ||||
|             if hq_doc is not False: | ||||
|                 hq_durl = hq_doc.find('.//durl') | ||||
|                 formats.append({ | ||||
|                     'format_id': 'hq', | ||||
|                     'quality': 2, | ||||
|                     'ext': 'flv', | ||||
|                     'url': hq_durl.find('./url').text, | ||||
|                     'filesize': int_or_none( | ||||
|                         hq_durl.find('./size'), get_attr='text'), | ||||
|                 }) | ||||
|         else: | ||||
|             raise ExtractorError('Unsupported player parameters: %r' % (player_params,)) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'duration': duration, | ||||
|             'upload_date': upload_date, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
| @@ -1,13 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     remove_start, | ||||
| ) | ||||
| from ..utils import remove_start | ||||
|  | ||||
|  | ||||
| class BlinkxIE(InfoExtractor): | ||||
| @@ -16,18 +13,21 @@ class BlinkxIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', | ||||
|         'file': '8aQUy7GV.mp4', | ||||
|         'md5': '2e9a07364af40163a908edbf10bb2492', | ||||
|         'info_dict': { | ||||
|             "title": "Police Car Rolls Away", | ||||
|             "uploader": "stupidvideos.com", | ||||
|             "upload_date": "20131215", | ||||
|             "description": "A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!", | ||||
|             "duration": 14.886, | ||||
|             "thumbnails": [{ | ||||
|                 "width": 100, | ||||
|                 "height": 76, | ||||
|                 "url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg", | ||||
|             'id': '8aQUy7GV', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Police Car Rolls Away', | ||||
|             'uploader': 'stupidvideos.com', | ||||
|             'upload_date': '20131215', | ||||
|             'timestamp': 1387068000, | ||||
|             'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!', | ||||
|             'duration': 14.886, | ||||
|             'thumbnails': [{ | ||||
|                 'width': 100, | ||||
|                 'height': 76, | ||||
|                 'resolution': '100x76', | ||||
|                 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg', | ||||
|             }], | ||||
|         }, | ||||
|     } | ||||
| @@ -37,13 +37,10 @@ class BlinkxIE(InfoExtractor): | ||||
|         video_id = m.group('id') | ||||
|         display_id = video_id[:8] | ||||
|  | ||||
|         api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' + | ||||
|         api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + | ||||
|                    'video=%s' % video_id) | ||||
|         data_json = self._download_webpage(api_url, display_id) | ||||
|         data = json.loads(data_json)['api']['results'][0] | ||||
|         dt = datetime.datetime.fromtimestamp(data['pubdate_epoch']) | ||||
|         pload_date = dt.strftime('%Y%m%d') | ||||
|  | ||||
|         duration = None | ||||
|         thumbnails = [] | ||||
|         formats = [] | ||||
| @@ -58,16 +55,13 @@ class BlinkxIE(InfoExtractor): | ||||
|                 duration = m['d'] | ||||
|             elif m['type'] == 'youtube': | ||||
|                 yt_id = m['link'] | ||||
|                 self.to_screen(u'Youtube video detected: %s' % yt_id) | ||||
|                 self.to_screen('Youtube video detected: %s' % yt_id) | ||||
|                 return self.url_result(yt_id, 'Youtube', video_id=yt_id) | ||||
|             elif m['type'] in ('flv', 'mp4'): | ||||
|                 vcodec = remove_start(m['vcodec'], 'ff') | ||||
|                 acodec = remove_start(m['acodec'], 'ff') | ||||
|                 tbr = (int(m['vbr']) + int(m['abr'])) // 1000 | ||||
|                 format_id = (u'%s-%sk-%s' % | ||||
|                              (vcodec, | ||||
|                               tbr, | ||||
|                               m['w'])) | ||||
|                 format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': m['link'], | ||||
| @@ -88,7 +82,7 @@ class BlinkxIE(InfoExtractor): | ||||
|             'title': data['title'], | ||||
|             'formats': formats, | ||||
|             'uploader': data['channel_name'], | ||||
|             'upload_date': pload_date, | ||||
|             'timestamp': data['pubdate_epoch'], | ||||
|             'description': data.get('description'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'duration': duration, | ||||
|   | ||||
| @@ -1,102 +1,124 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     unescapeHTML, | ||||
|     parse_iso8601, | ||||
|     compat_urlparse, | ||||
|     clean_html, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlipTVIE(SubtitlesInfoExtractor): | ||||
|     """Information extractor for blip.tv""" | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))' | ||||
|  | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', | ||||
|         'md5': 'c6934ad0b6acf2bd920720ec888eb812', | ||||
|         'info_dict': { | ||||
|             'id': '5779306', | ||||
|             'ext': 'mov', | ||||
|             'upload_date': '20111205', | ||||
|             'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', | ||||
|             'uploader': 'Comic Book Resources - CBR TV', | ||||
|             'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3', | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', | ||||
|             'md5': 'c6934ad0b6acf2bd920720ec888eb812', | ||||
|             'info_dict': { | ||||
|                 'id': '5779306', | ||||
|                 'ext': 'mov', | ||||
|                 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3', | ||||
|                 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', | ||||
|                 'timestamp': 1323138843, | ||||
|                 'upload_date': '20111206', | ||||
|                 'uploader': 'cbr', | ||||
|                 'uploader_id': '679425', | ||||
|                 'duration': 81, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             # https://github.com/rg3/youtube-dl/pull/2274 | ||||
|             'note': 'Video with subtitles', | ||||
|             'url': 'http://blip.tv/play/h6Uag5OEVgI.html', | ||||
|             'md5': '309f9d25b820b086ca163ffac8031806', | ||||
|             'info_dict': { | ||||
|                 'id': '6586561', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Red vs. Blue Season 11 Episode 1', | ||||
|                 'description': 'One-Zero-One', | ||||
|                 'timestamp': 1371261608, | ||||
|                 'upload_date': '20130615', | ||||
|                 'uploader': 'redvsblue', | ||||
|                 'uploader_id': '792887', | ||||
|                 'duration': 279, | ||||
|             } | ||||
|         } | ||||
|     }, { | ||||
|         # https://github.com/rg3/youtube-dl/pull/2274 | ||||
|         'note': 'Video with subtitles', | ||||
|         'url': 'http://blip.tv/play/h6Uag5OEVgI.html', | ||||
|         'md5': '309f9d25b820b086ca163ffac8031806', | ||||
|         'info_dict': { | ||||
|             'id': '6586561', | ||||
|             'ext': 'mp4', | ||||
|             'uploader': 'Red vs. Blue', | ||||
|             'description': 'One-Zero-One', | ||||
|             'upload_date': '20130614', | ||||
|             'title': 'Red vs. Blue Season 11 Episode 1', | ||||
|         } | ||||
|     }] | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         presumptive_id = mobj.group('presumptive_id') | ||||
|         lookup_id = mobj.group('lookup_id') | ||||
|  | ||||
|         # See https://github.com/rg3/youtube-dl/issues/857 | ||||
|         embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url) | ||||
|         if embed_mobj: | ||||
|             info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1) | ||||
|             info_page = self._download_webpage(info_url, embed_mobj.group(1)) | ||||
|             video_id = self._search_regex( | ||||
|                 r'data-episode-id="([0-9]+)', info_page, 'video_id') | ||||
|             return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV') | ||||
|          | ||||
|         cchar = '&' if '?' in url else '?' | ||||
|         json_url = url + cchar + 'skin=json&version=2&no_wrap=1' | ||||
|         request = compat_urllib_request.Request(json_url) | ||||
|         request.add_header('User-Agent', 'iTunes/10.6.1') | ||||
|  | ||||
|         json_data = self._download_json(request, video_id=presumptive_id) | ||||
|  | ||||
|         if 'Post' in json_data: | ||||
|             data = json_data['Post'] | ||||
|         if lookup_id: | ||||
|             info_page = self._download_webpage( | ||||
|                 'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id') | ||||
|             video_id = self._search_regex(r'data-episode-id="([0-9]+)', info_page, 'video_id') | ||||
|         else: | ||||
|             data = json_data | ||||
|             video_id = mobj.group('id') | ||||
|  | ||||
|         rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS') | ||||
|  | ||||
|         def blip(s): | ||||
|             return '{http://blip.tv/dtd/blip/1.0}%s' % s | ||||
|  | ||||
|         def media(s): | ||||
|             return '{http://search.yahoo.com/mrss/}%s' % s | ||||
|  | ||||
|         def itunes(s): | ||||
|             return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s | ||||
|  | ||||
|         item = rss.find('channel/item') | ||||
|  | ||||
|         video_id = item.find(blip('item_id')).text | ||||
|         title = item.find('./title').text | ||||
|         description = clean_html(compat_str(item.find(blip('puredescription')).text)) | ||||
|         timestamp = parse_iso8601(item.find(blip('datestamp')).text) | ||||
|         uploader = item.find(blip('user')).text | ||||
|         uploader_id = item.find(blip('userid')).text | ||||
|         duration = int(item.find(blip('runtime')).text) | ||||
|         media_thumbnail = item.find(media('thumbnail')) | ||||
|         thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text | ||||
|         categories = [category.text for category in item.findall('category')] | ||||
|  | ||||
|         video_id = compat_str(data['item_id']) | ||||
|         upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') | ||||
|         subtitles = {} | ||||
|         formats = [] | ||||
|         if 'additionalMedia' in data: | ||||
|             for f in data['additionalMedia']: | ||||
|                 if f.get('file_type_srt') == 1: | ||||
|                     LANGS = { | ||||
|                         'english': 'en', | ||||
|                     } | ||||
|                     lang = f['role'].rpartition('-')[-1].strip().lower() | ||||
|                     langcode = LANGS.get(lang, lang) | ||||
|                     subtitles[langcode] = f['url'] | ||||
|                     continue | ||||
|                 if not int(f['media_width']):  # filter m3u8 | ||||
|                     continue | ||||
|         subtitles = {} | ||||
|  | ||||
|         media_group = item.find(media('group')) | ||||
|         for media_content in media_group.findall(media('content')): | ||||
|             url = media_content.get('url') | ||||
|             role = media_content.get(blip('role')) | ||||
|             msg = self._download_webpage( | ||||
|                 url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url', | ||||
|                 video_id, 'Resolving URL for %s' % role) | ||||
|             real_url = compat_urlparse.parse_qs(msg)['message'][0] | ||||
|  | ||||
|             media_type = media_content.get('type') | ||||
|             if media_type == 'text/srt' or url.endswith('.srt'): | ||||
|                 LANGS = { | ||||
|                     'english': 'en', | ||||
|                 } | ||||
|                 lang = role.rpartition('-')[-1].strip().lower() | ||||
|                 langcode = LANGS.get(lang, lang) | ||||
|                 subtitles[langcode] = url | ||||
|             elif media_type.startswith('video/'): | ||||
|                 formats.append({ | ||||
|                     'url': f['url'], | ||||
|                     'format_id': f['role'], | ||||
|                     'width': int(f['media_width']), | ||||
|                     'height': int(f['media_height']), | ||||
|                     'url': real_url, | ||||
|                     'format_id': role, | ||||
|                     'format_note': media_type, | ||||
|                     'vcodec': media_content.get(blip('vcodec')), | ||||
|                     'acodec': media_content.get(blip('acodec')), | ||||
|                     'filesize': media_content.get('filesize'), | ||||
|                     'width': int(media_content.get('width')), | ||||
|                     'height': int(media_content.get('height')), | ||||
|                 }) | ||||
|         else: | ||||
|             formats.append({ | ||||
|                 'url': data['media']['url'], | ||||
|                 'width': int(data['media']['width']), | ||||
|                 'height': int(data['media']['height']), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         # subtitles | ||||
| @@ -107,12 +129,14 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'uploader': data['display_name'], | ||||
|             'upload_date': upload_date, | ||||
|             'title': data['title'], | ||||
|             'thumbnail': data['thumbnailUrl'], | ||||
|             'description': data['description'], | ||||
|             'user_agent': 'iTunes/10.6.1', | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'thumbnail': thumbnail, | ||||
|             'categories': categories, | ||||
|             'formats': formats, | ||||
|             'subtitles': video_subtitles, | ||||
|         } | ||||
|   | ||||
| @@ -4,39 +4,70 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BRIE(InfoExtractor): | ||||
|     IE_DESC = "Bayerischer Rundfunk Mediathek" | ||||
|     _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$" | ||||
|     _BASE_URL = "http://www.br.de" | ||||
|     IE_DESC = 'Bayerischer Rundfunk Mediathek' | ||||
|     _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html' | ||||
|     _BASE_URL = 'http://www.br.de' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", | ||||
|             "md5": "c4f83cf0f023ba5875aba0bf46860df2", | ||||
|             "info_dict": { | ||||
|                 "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", | ||||
|                 "ext": "mp4", | ||||
|                 "title": "Feiern und Verzichten", | ||||
|                 "description": "Anselm Grün: Feiern und Verzichten", | ||||
|                 "uploader": "BR/Birgit Baier", | ||||
|                 "upload_date": "20140301" | ||||
|             'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html', | ||||
|             'md5': '93556dd2bcb2948d9259f8670c516d59', | ||||
|             'info_dict': { | ||||
|                 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Am 1. und 2. August in Oberammergau', | ||||
|                 'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html", | ||||
|             "md5": "ab451b09d861dbed7d7cc9ab0be19ebe", | ||||
|             "info_dict": { | ||||
|                 "id": "2c060e69-3a27-4e13-b0f0-668fac17d812", | ||||
|                 "ext": "mp4", | ||||
|                 "title": "Über den Pass", | ||||
|                 "description": "Die Eroberung der Alpen: Über den Pass", | ||||
|                 "uploader": None, | ||||
|                 "upload_date": None | ||||
|             'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html', | ||||
|             'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe', | ||||
|             'info_dict': { | ||||
|                 'id': '2c060e69-3a27-4e13-b0f0-668fac17d812', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Über den Pass', | ||||
|                 'description': 'Die Eroberung der Alpen: Über den Pass', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html', | ||||
|             'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820', | ||||
|             'info_dict': { | ||||
|                 'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab', | ||||
|                 'ext': 'aac', | ||||
|                 'title': '"Keine neuen Schulden im nächsten Jahr"', | ||||
|                 'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', | ||||
|             'md5': 'dbab0aef2e047060ea7a21fc1ce1078a', | ||||
|             'info_dict': { | ||||
|                 'id': '6ba73750-d405-45d3-861d-1ce8c524e059', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Umweltbewusster Häuslebauer', | ||||
|                 'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', | ||||
|             'md5': '23bca295f1650d698f94fc570977dae3', | ||||
|             'info_dict': { | ||||
|                 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Folge 1 - Metaphysik', | ||||
|                 'description': 'Kant für Anfänger: Folge 1 - Metaphysik', | ||||
|                 'uploader': 'Eva Maria Steimle', | ||||
|                 'upload_date': '20140117', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -44,56 +75,63 @@ class BRIE(InfoExtractor): | ||||
|         display_id = mobj.group('id') | ||||
|         page = self._download_webpage(url, display_id) | ||||
|         xml_url = self._search_regex( | ||||
|             r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") | ||||
|             r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') | ||||
|         xml = self._download_xml(self._BASE_URL + xml_url, None) | ||||
|  | ||||
|         videos = [] | ||||
|         for xml_video in xml.findall("video"): | ||||
|             video = { | ||||
|                 "id": xml_video.get("externalId"), | ||||
|                 "title": xml_video.find("title").text, | ||||
|                 "formats": self._extract_formats(xml_video.find("assets")), | ||||
|                 "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")), | ||||
|                 "description": " ".join(xml_video.find("shareTitle").text.splitlines()), | ||||
|                 "webpage_url": xml_video.find("permalink").text | ||||
|             } | ||||
|             if xml_video.find("author").text: | ||||
|                 video["uploader"] = xml_video.find("author").text | ||||
|             if xml_video.find("broadcastDate").text: | ||||
|                 video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split("."))) | ||||
|             videos.append(video) | ||||
|         medias = [] | ||||
|  | ||||
|         if len(videos) > 1: | ||||
|         for xml_media in xml.findall('video') + xml.findall('audio'): | ||||
|             media = { | ||||
|                 'id': xml_media.get('externalId'), | ||||
|                 'title': xml_media.find('title').text, | ||||
|                 'formats': self._extract_formats(xml_media.find('assets')), | ||||
|                 'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')), | ||||
|                 'description': ' '.join(xml_media.find('shareTitle').text.splitlines()), | ||||
|                 'webpage_url': xml_media.find('permalink').text | ||||
|             } | ||||
|             if xml_media.find('author').text: | ||||
|                 media['uploader'] = xml_media.find('author').text | ||||
|             if xml_media.find('broadcastDate').text: | ||||
|                 media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.'))) | ||||
|             medias.append(media) | ||||
|  | ||||
|         if len(medias) > 1: | ||||
|             self._downloader.report_warning( | ||||
|                 'found multiple videos; please ' | ||||
|                 'found multiple medias; please ' | ||||
|                 'report this with the video URL to http://yt-dl.org/bug') | ||||
|         if not videos: | ||||
|             raise ExtractorError('No video entries found') | ||||
|         return videos[0] | ||||
|         if not medias: | ||||
|             raise ExtractorError('No media entries found') | ||||
|         return medias[0] | ||||
|  | ||||
|     def _extract_formats(self, assets): | ||||
|  | ||||
|         def text_or_none(asset, tag): | ||||
|             elem = asset.find(tag) | ||||
|             return None if elem is None else elem.text | ||||
|  | ||||
|         formats = [{ | ||||
|             "url": asset.find("downloadUrl").text, | ||||
|             "ext": asset.find("mediaType").text, | ||||
|             "format_id": asset.get("type"), | ||||
|             "width": int(asset.find("frameWidth").text), | ||||
|             "height": int(asset.find("frameHeight").text), | ||||
|             "tbr": int(asset.find("bitrateVideo").text), | ||||
|             "abr": int(asset.find("bitrateAudio").text), | ||||
|             "vcodec": asset.find("codecVideo").text, | ||||
|             "container": asset.find("mediaType").text, | ||||
|             "filesize": int(asset.find("size").text), | ||||
|         } for asset in assets.findall("asset") | ||||
|             if asset.find("downloadUrl") is not None] | ||||
|             'url': text_or_none(asset, 'downloadUrl'), | ||||
|             'ext': text_or_none(asset, 'mediaType'), | ||||
|             'format_id': asset.get('type'), | ||||
|             'width': int_or_none(text_or_none(asset, 'frameWidth')), | ||||
|             'height': int_or_none(text_or_none(asset, 'frameHeight')), | ||||
|             'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')), | ||||
|             'abr': int_or_none(text_or_none(asset, 'bitrateAudio')), | ||||
|             'vcodec': text_or_none(asset, 'codecVideo'), | ||||
|             'acodec': text_or_none(asset, 'codecAudio'), | ||||
|             'container': text_or_none(asset, 'mediaType'), | ||||
|             'filesize': int_or_none(text_or_none(asset, 'size')), | ||||
|         } for asset in assets.findall('asset') | ||||
|             if asset.find('downloadUrl') is not None] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_thumbnails(self, variants): | ||||
|         thumbnails = [{ | ||||
|             "url": self._BASE_URL + variant.find("url").text, | ||||
|             "width": int(variant.find("width").text), | ||||
|             "height": int(variant.find("height").text), | ||||
|         } for variant in variants.findall("variant")] | ||||
|         thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True) | ||||
|             'url': self._BASE_URL + variant.find('url').text, | ||||
|             'width': int_or_none(variant.find('width').text), | ||||
|             'height': int_or_none(variant.find('height').text), | ||||
|         } for variant in variants.findall('variant')] | ||||
|         thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) | ||||
|         return thumbnails | ||||
|   | ||||
| @@ -15,6 +15,7 @@ from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_parse_qs, | ||||
|  | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     unsmuggle_url, | ||||
|     unescapeHTML, | ||||
| @@ -29,10 +30,11 @@ class BrightcoveIE(InfoExtractor): | ||||
|         { | ||||
|             # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', | ||||
|             'file': '2371591881001.mp4', | ||||
|             'md5': '5423e113865d26e40624dce2e4b45d95', | ||||
|             'note': 'Test Brightcove downloads and detection in GenericIE', | ||||
|             'info_dict': { | ||||
|                 'id': '2371591881001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', | ||||
|                 'uploader': '8TV', | ||||
|                 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', | ||||
| @@ -41,8 +43,9 @@ class BrightcoveIE(InfoExtractor): | ||||
|         { | ||||
|             # From http://medianetwork.oracle.com/video/player/1785452137001 | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', | ||||
|             'file': '1785452137001.flv', | ||||
|             'info_dict': { | ||||
|                 'id': '1785452137001', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', | ||||
|                 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', | ||||
|                 'uploader': 'Oracle', | ||||
| @@ -70,7 +73,20 @@ class BrightcoveIE(InfoExtractor): | ||||
|                 'description': 'md5:363109c02998fee92ec02211bd8000df', | ||||
|                 'uploader': 'National Ballet of Canada', | ||||
|             }, | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|             # test flv videos served by akamaihd.net | ||||
|             # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', | ||||
|             # The md5 checksum changes on each download | ||||
|             'info_dict': { | ||||
|                 'id': '2996102916001', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', | ||||
|                 'uploader': 'Red Bull TV', | ||||
|                 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
| @@ -140,7 +156,11 @@ class BrightcoveIE(InfoExtractor): | ||||
|  | ||||
|         url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) | ||||
|         if url_m: | ||||
|             return [unescapeHTML(url_m.group(1))] | ||||
|             url = unescapeHTML(url_m.group(1)) | ||||
|             # Some sites don't add it, we can't download with this url, for example: | ||||
|             # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ | ||||
|             if 'playerKey' in url: | ||||
|                 return [url] | ||||
|  | ||||
|         matches = re.findall( | ||||
|             r'''(?sx)<object | ||||
| @@ -183,7 +203,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') | ||||
|         info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json') | ||||
|         info = json.loads(info)['data'] | ||||
|         video_info = info['programmedContent']['videoPlayer']['mediaDTO'] | ||||
|         video_info['_youtubedl_adServerURL'] = info.get('adServerURL') | ||||
| @@ -215,12 +235,26 @@ class BrightcoveIE(InfoExtractor): | ||||
|  | ||||
|         renditions = video_info.get('renditions') | ||||
|         if renditions: | ||||
|             renditions = sorted(renditions, key=lambda r: r['size']) | ||||
|             info['formats'] = [{ | ||||
|                 'url': rend['defaultURL'], | ||||
|                 'height': rend.get('frameHeight'), | ||||
|                 'width': rend.get('frameWidth'), | ||||
|             } for rend in renditions] | ||||
|             formats = [] | ||||
|             for rend in renditions: | ||||
|                 url = rend['defaultURL'] | ||||
|                 if rend['remote']: | ||||
|                     # This type of renditions are served through akamaihd.net, | ||||
|                     # but they don't use f4m manifests | ||||
|                     url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' | ||||
|                     ext = 'flv' | ||||
|                 else: | ||||
|                     ext = determine_ext(url) | ||||
|                 size = rend.get('size') | ||||
|                 formats.append({ | ||||
|                     'url': url, | ||||
|                     'ext': ext, | ||||
|                     'height': rend.get('frameHeight'), | ||||
|                     'width': rend.get('frameWidth'), | ||||
|                     'filesize': size if size != 0 else None, | ||||
|                 }) | ||||
|             self._sort_formats(formats) | ||||
|             info['formats'] = formats | ||||
|         elif video_info.get('FLVFullLengthURL') is not None: | ||||
|             info.update({ | ||||
|                 'url': video_info['FLVFullLengthURL'], | ||||
|   | ||||
| @@ -4,9 +4,7 @@ import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class BYUtvIE(InfoExtractor): | ||||
| @@ -16,7 +14,7 @@ class BYUtvIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': 'granite-flats-talking', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f', | ||||
|             'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c', | ||||
|             'title': 'Talking', | ||||
|             'thumbnail': 're:^https?://.*promo.*' | ||||
|         }, | ||||
|   | ||||
| @@ -1,53 +1,72 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CanalplusIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' | ||||
|     _VALID_URL = r'https?://(?:www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' | ||||
|     IE_NAME = u'canalplus.fr' | ||||
|     IE_NAME = 'canalplus.fr' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', | ||||
|         u'file': u'922470.flv', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Zapping - 26/08/13', | ||||
|             u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', | ||||
|             u'upload_date': u'20130826', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True, | ||||
|         'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', | ||||
|         'md5': '3db39fb48b9685438ecf33a1078023e4', | ||||
|         'info_dict': { | ||||
|             'id': '922470', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Zapping - 26/08/13', | ||||
|             'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', | ||||
|             'upload_date': '20130826', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.groupdict().get('id') | ||||
|  | ||||
|         # Beware, some subclasses do not define an id group | ||||
|         display_id = url_basename(mobj.group('path')) | ||||
|  | ||||
|         if video_id is None: | ||||
|             webpage = self._download_webpage(url, mobj.group('path')) | ||||
|             video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id') | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, 'video id') | ||||
|  | ||||
|         info_url = self._VIDEO_INFO_TEMPLATE % video_id | ||||
|         doc = self._download_xml(info_url,video_id,  | ||||
|                                            u'Downloading video info') | ||||
|         doc = self._download_xml(info_url, video_id, 'Downloading video XML') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         video_info = [video for video in doc if video.find('ID').text == video_id][0] | ||||
|         infos = video_info.find('INFOS') | ||||
|         media = video_info.find('MEDIA') | ||||
|         formats = [media.find('VIDEOS/%s' % format) | ||||
|             for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']] | ||||
|         video_url = [format.text for format in formats if format is not None][-1] | ||||
|         infos = video_info.find('INFOS') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text, | ||||
|                                        infos.find('TITRAGE/SOUS_TITRE').text), | ||||
|                 'url': video_url, | ||||
|                 'ext': 'flv', | ||||
|                 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), | ||||
|                 'thumbnail': media.find('IMAGES/GRAND').text, | ||||
|                 'description': infos.find('DESCRIPTION').text, | ||||
|                 'view_count': int(infos.find('NB_VUES').text), | ||||
|                 } | ||||
|         preferences = ['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'] | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': fmt.text + '?hdcore=2.11.3' if fmt.tag == 'HDS' else fmt.text, | ||||
|                 'format_id': fmt.tag, | ||||
|                 'ext': 'mp4' if fmt.tag == 'HLS' else 'flv', | ||||
|                 'preference': preferences.index(fmt.tag) if fmt.tag in preferences else -1, | ||||
|             } for fmt in media.find('VIDEOS') if fmt.text | ||||
|         ] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text, | ||||
|                                   infos.find('TITRAGE/SOUS_TITRE').text), | ||||
|             'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), | ||||
|             'thumbnail': media.find('IMAGES/GRAND').text, | ||||
|             'description': infos.find('DESCRIPTION').text, | ||||
|             'view_count': int(infos.find('NB_VUES').text), | ||||
|             'like_count': int(infos.find('NB_LIKES').text), | ||||
|             'comment_count': int(infos.find('NB_COMMENTS').text), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,10 +1,12 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -13,9 +15,10 @@ class CinemassacreIE(InfoExtractor): | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', | ||||
|             'file': '19911.mp4', | ||||
|             'md5': '782f8504ca95a0eba8fc9177c373eec7', | ||||
|             'md5': 'fde81fbafaee331785f58cd6c0d46190', | ||||
|             'info_dict': { | ||||
|                 'id': '19911', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20121110', | ||||
|                 'title': '“Angry Video Game Nerd: The Movie” – Trailer', | ||||
|                 'description': 'md5:fb87405fcb42a331742a0dce2708560b', | ||||
| @@ -23,9 +26,10 @@ class CinemassacreIE(InfoExtractor): | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', | ||||
|             'file': '521be8ef82b16.mp4', | ||||
|             'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35', | ||||
|             'md5': 'd72f10cd39eac4215048f62ab477a511', | ||||
|             'info_dict': { | ||||
|                 'id': '521be8ef82b16', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20131002', | ||||
|                 'title': 'The Mummy’s Hand (1940)', | ||||
|             }, | ||||
| @@ -50,29 +54,40 @@ class CinemassacreIE(InfoExtractor): | ||||
|             r'<div class="entry-content">(?P<description>.+?)</div>', | ||||
|             webpage, 'description', flags=re.DOTALL, fatal=False) | ||||
|  | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id) | ||||
|         playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') | ||||
|         video_thumbnail = self._search_regex( | ||||
|             r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) | ||||
|         sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') | ||||
|         videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url') | ||||
|  | ||||
|         sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') | ||||
|         hd_url = self._html_search_regex( | ||||
|             r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file', | ||||
|             default=None) | ||||
|         video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) | ||||
|         videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': sd_url, | ||||
|             'ext': 'mp4', | ||||
|             'format': 'sd', | ||||
|             'format_id': 'sd', | ||||
|             'quality': 1, | ||||
|         }] | ||||
|         if hd_url: | ||||
|             formats.append({ | ||||
|                 'url': hd_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'format': 'hd', | ||||
|                 'format_id': 'hd', | ||||
|                 'quality': 2, | ||||
|             }) | ||||
|         formats = [] | ||||
|         baseurl = sd_url[:sd_url.rfind('/')+1] | ||||
|         for video in videolist.findall('.//video'): | ||||
|             src = video.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             file_ = src.partition(':')[-1] | ||||
|             width = int_or_none(video.get('width')) | ||||
|             height = int_or_none(video.get('height')) | ||||
|             bitrate = int_or_none(video.get('system-bitrate')) | ||||
|             format = { | ||||
|                 'url': baseurl + file_, | ||||
|                 'format_id': src.rpartition('.')[0].rpartition('_')[-1], | ||||
|             } | ||||
|             if width or height: | ||||
|                 format.update({ | ||||
|                     'tbr': bitrate // 1000 if bitrate else None, | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                 }) | ||||
|             else: | ||||
|                 format.update({ | ||||
|                     'abr': bitrate // 1000 if bitrate else None, | ||||
|                     'vcodec': 'none', | ||||
|                 }) | ||||
|             formats.append(format) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/clubic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/clubic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     qualities, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ClubicIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', | ||||
|         'md5': '1592b694ba586036efac1776b0b43cd3', | ||||
|         'info_dict': { | ||||
|             'id': '448474', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité', | ||||
|             'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', | ||||
|             'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id | ||||
|         player_page = self._download_webpage(player_url, video_id) | ||||
|  | ||||
|         config_json = self._search_regex( | ||||
|             r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, | ||||
|             'configuration') | ||||
|         config = json.loads(config_json) | ||||
|  | ||||
|         video_info = config['videoInfo'] | ||||
|         sources = config['sources'] | ||||
|         quality_order = qualities(['sd', 'hq']) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': src['streamQuality'], | ||||
|             'url': src['src'], | ||||
|             'quality': quality_order(src['streamQuality']), | ||||
|         } for src in sources] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_info['title'], | ||||
|             'formats': formats, | ||||
|             'description': clean_html(video_info.get('description')), | ||||
|             'thumbnail': config.get('poster'), | ||||
|         } | ||||
| @@ -1,19 +1,19 @@ | ||||
| from __future__ import unicode_literals | ||||
| from .mtv import MTVIE | ||||
|  | ||||
|  | ||||
| class CMTIE(MTVIE): | ||||
|     IE_NAME = u'cmt.com' | ||||
|     IE_NAME = 'cmt.com' | ||||
|     _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml' | ||||
|     _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', | ||||
|             u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2', | ||||
|             u'info_dict': { | ||||
|                 u'id': u'989124', | ||||
|                 u'ext': u'mp4', | ||||
|                 u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"', | ||||
|                 u'description': u'Blame It All On My Roots', | ||||
|             }, | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', | ||||
|         'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2', | ||||
|         'info_dict': { | ||||
|             'id': '989124', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"', | ||||
|             'description': 'Blame It All On My Roots', | ||||
|         }, | ||||
|     ] | ||||
|     }] | ||||
|   | ||||
| @@ -33,7 +33,7 @@ class CNETIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         data_json = self._html_search_regex( | ||||
|             r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'", | ||||
|             r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'", | ||||
|             webpage, 'data json') | ||||
|         data = json.loads(data_json) | ||||
|         vdata = data['video'] | ||||
|   | ||||
| @@ -79,8 +79,11 @@ class CNNIE(InfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) | ||||
|         thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] | ||||
|         thumbnails = [{ | ||||
|             'height': int(t.attrib['height']), | ||||
|             'width': int(t.attrib['width']), | ||||
|             'url': t.text, | ||||
|         } for t in info.findall('images/image')] | ||||
|  | ||||
|         metas_el = info.find('metas') | ||||
|         upload_date = ( | ||||
| @@ -93,8 +96,7 @@ class CNNIE(InfoExtractor): | ||||
|             'id': info.attrib['id'], | ||||
|             'title': info.find('headline').text, | ||||
|             'formats': formats, | ||||
|             'thumbnail': thumbnails[-1][1], | ||||
|             'thumbnails': thumbs_dict, | ||||
|             'thumbnails': thumbnails, | ||||
|             'description': info.find('description').text, | ||||
|             'duration': duration, | ||||
|             'upload_date': upload_date, | ||||
|   | ||||
| @@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', | ||||
|         'md5': '4167875aae411f903b751a21f357f1ee', | ||||
|         'md5': 'c4f48e9eda1b16dd10add0744344b6d8', | ||||
|         'info_dict': { | ||||
|             'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', | ||||
|             'ext': 'mp4', | ||||
| @@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) | ||||
|                       |https?://(:www\.)? | ||||
|                           (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ | ||||
|                          (full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| | ||||
|                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| | ||||
|                           (?P<clip> | ||||
|                               (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+)) | ||||
|                               (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+)) | ||||
|                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) | ||||
|                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) | ||||
|                           )| | ||||
| @@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|                 }) | ||||
|                 formats.append({ | ||||
|                     'format_id': 'rtmp-%s' % format, | ||||
|                     'url': rtmp_video_url, | ||||
|                     'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'), | ||||
|                     'ext': self._video_extensions.get(format, 'mp4'), | ||||
|                     'height': h, | ||||
|                     'width': w, | ||||
|   | ||||
| @@ -92,8 +92,12 @@ class InfoExtractor(object): | ||||
|                     unique, but available before title. Typically, id is | ||||
|                     something like "4234987", title "Dancing naked mole rats", | ||||
|                     and display_id "dancing-naked-mole-rats" | ||||
|     thumbnails:     A list of dictionaries (with the entries "resolution" and | ||||
|                     "url") for the varying thumbnails | ||||
|     thumbnails:     A list of dictionaries, with the following entries: | ||||
|                         * "url" | ||||
|                         * "width" (optional, int) | ||||
|                         * "height" (optional, int) | ||||
|                         * "resolution" (optional, string "{width}x{height"}, | ||||
|                                         deprecated) | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
|     description:    One-line video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
| @@ -113,6 +117,8 @@ class InfoExtractor(object): | ||||
|     webpage_url:    The url to the video webpage, if given to youtube-dl it | ||||
|                     should allow to get the same result again. (It will be set | ||||
|                     by YoutubeDL if it's missing) | ||||
|     categories:     A list of categories that the video falls in, for example | ||||
|                     ["Sports", "Berlin"] | ||||
|  | ||||
|     Unless mentioned otherwise, the fields should be Unicode strings. | ||||
|  | ||||
| @@ -242,16 +248,20 @@ class InfoExtractor(object): | ||||
|                 url = url_or_request.get_full_url() | ||||
|             except AttributeError: | ||||
|                 url = url_or_request | ||||
|             if len(url) > 200: | ||||
|                 h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest() | ||||
|                 url = url[:200 - len(h)] + h | ||||
|             raw_filename = ('%s_%s.dump' % (video_id, url)) | ||||
|             basen = '%s_%s' % (video_id, url) | ||||
|             if len(basen) > 240: | ||||
|                 h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest() | ||||
|                 basen = basen[:240 - len(h)] + h | ||||
|             raw_filename = basen + '.dump' | ||||
|             filename = sanitize_filename(raw_filename, restricted=True) | ||||
|             self.to_screen(u'Saving request to ' + filename) | ||||
|             with open(filename, 'wb') as outf: | ||||
|                 outf.write(webpage_bytes) | ||||
|  | ||||
|         content = webpage_bytes.decode(encoding, 'replace') | ||||
|         try: | ||||
|             content = webpage_bytes.decode(encoding, 'replace') | ||||
|         except LookupError: | ||||
|             content = webpage_bytes.decode('utf-8', 'replace') | ||||
|  | ||||
|         if (u'<title>Access to this site is blocked</title>' in content and | ||||
|                 u'Websense' in content[:512]): | ||||
| @@ -276,9 +286,12 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def _download_xml(self, url_or_request, video_id, | ||||
|                       note=u'Downloading XML', errnote=u'Unable to download XML', | ||||
|                       transform_source=None): | ||||
|                       transform_source=None, fatal=True): | ||||
|         """Return the xml as an xml.etree.ElementTree.Element""" | ||||
|         xml_string = self._download_webpage(url_or_request, video_id, note, errnote) | ||||
|         xml_string = self._download_webpage( | ||||
|             url_or_request, video_id, note, errnote, fatal=fatal) | ||||
|         if xml_string is False: | ||||
|             return xml_string | ||||
|         if transform_source: | ||||
|             xml_string = transform_source(xml_string) | ||||
|         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) | ||||
| @@ -542,6 +555,23 @@ class InfoExtractor(object): | ||||
|             ) | ||||
|         formats.sort(key=_formats_key) | ||||
|  | ||||
|     def http_scheme(self): | ||||
|         """ Either "https:" or "https:", depending on the user's preferences """ | ||||
|         return ( | ||||
|             'http:' | ||||
|             if self._downloader.params.get('prefer_insecure', False) | ||||
|             else 'https:') | ||||
|  | ||||
|     def _proto_relative_url(self, url, scheme=None): | ||||
|         if url is None: | ||||
|             return url | ||||
|         if url.startswith('//'): | ||||
|             if scheme is None: | ||||
|                 scheme = self.http_scheme() | ||||
|             return scheme + url | ||||
|         else: | ||||
|             return url | ||||
|  | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
| @@ -585,3 +615,4 @@ class SearchInfoExtractor(InfoExtractor): | ||||
|     @property | ||||
|     def SEARCH_KEY(self): | ||||
|         return self._SEARCH_KEY | ||||
|  | ||||
|   | ||||
| @@ -28,16 +28,18 @@ class CondeNastIE(InfoExtractor): | ||||
|         'glamour': 'Glamour', | ||||
|         'wmagazine': 'W Magazine', | ||||
|         'vanityfair': 'Vanity Fair', | ||||
|         'cnevids': 'Condé Nast', | ||||
|     } | ||||
|  | ||||
|     _VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys()) | ||||
|     _VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) | ||||
|     IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', | ||||
|         'file': '5171b343c2b4c00dd0c1ccb3.mp4', | ||||
|         'md5': '1921f713ed48aabd715691f774c451f7', | ||||
|         'info_dict': { | ||||
|             'id': '5171b343c2b4c00dd0c1ccb3', | ||||
|             'ext': 'mp4', | ||||
|             'title': '3D Printed Speakers Lit With LED', | ||||
|             'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', | ||||
|         } | ||||
| @@ -55,12 +57,16 @@ class CondeNastIE(InfoExtractor): | ||||
|         entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] | ||||
|         return self.playlist_result(entries, playlist_title=title) | ||||
|  | ||||
|     def _extract_video(self, webpage): | ||||
|         description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>', | ||||
|                                                r'<div class="video-post-content">(.+?)</div>', | ||||
|                                                ], | ||||
|                                               webpage, 'description', | ||||
|                                               fatal=False, flags=re.DOTALL) | ||||
|     def _extract_video(self, webpage, url_type): | ||||
|         if url_type != 'embed': | ||||
|             description = self._html_search_regex( | ||||
|                 [ | ||||
|                     r'<div class="cne-video-description">(.+?)</div>', | ||||
|                     r'<div class="video-post-content">(.+?)</div>', | ||||
|                 ], | ||||
|                 webpage, 'description', fatal=False, flags=re.DOTALL) | ||||
|         else: | ||||
|             description = None | ||||
|         params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, | ||||
|                                     'player params', flags=re.DOTALL) | ||||
|         video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') | ||||
| @@ -99,12 +105,12 @@ class CondeNastIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site = mobj.group('site') | ||||
|         url_type = mobj.group('type') | ||||
|         id = mobj.group('id') | ||||
|         item_id = mobj.group('id') | ||||
|  | ||||
|         self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site]) | ||||
|         webpage = self._download_webpage(url, id) | ||||
|         self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site]) | ||||
|         webpage = self._download_webpage(url, item_id) | ||||
|  | ||||
|         if url_type == 'series': | ||||
|             return self._extract_series(url, webpage) | ||||
|         else: | ||||
|             return self._extract_video(webpage) | ||||
|             return self._extract_video(webpage, url_type) | ||||
|   | ||||
| @@ -8,12 +8,11 @@ from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_str, | ||||
|     get_element_by_id, | ||||
|     orderedSet, | ||||
|     str_to_int, | ||||
|     int_or_none, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
| class DailymotionBaseInfoExtractor(InfoExtractor): | ||||
| @@ -189,7 +188,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|             webpage = self._download_webpage(request, | ||||
|                                              id, u'Downloading page %s' % pagenum) | ||||
|  | ||||
|             video_ids.extend(re.findall(r'data-id="(.+?)"', webpage)) | ||||
|             video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage)) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: | ||||
|                 break | ||||
| @@ -201,11 +200,12 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|         playlist_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         return {'_type': 'playlist', | ||||
|                 'id': playlist_id, | ||||
|                 'title': get_element_by_id(u'playlist_name', webpage), | ||||
|                 'entries': self._extract_entries(playlist_id), | ||||
|                 } | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'entries': self._extract_entries(playlist_id), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class DailymotionUserIE(DailymotionPlaylistIE): | ||||
| @@ -217,9 +217,9 @@ class DailymotionUserIE(DailymotionPlaylistIE): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         user = mobj.group('user') | ||||
|         webpage = self._download_webpage(url, user) | ||||
|         full_user = self._html_search_regex( | ||||
|             r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user), | ||||
|             webpage, u'user', flags=re.DOTALL) | ||||
|         full_user = unescapeHTML(self._html_search_regex( | ||||
|             r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user), | ||||
|             webpage, u'user', flags=re.DOTALL)) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|   | ||||
| @@ -1,39 +1,37 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
| ) | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
|  | ||||
| class DreiSatIE(InfoExtractor): | ||||
|     IE_NAME = '3sat' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", | ||||
|         u'file': u'36983.mp4', | ||||
|         u'md5': u'9dcfe344732808dbfcc901537973c922', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Kaffeeland Schweiz", | ||||
|             u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",  | ||||
|             u"uploader": u"3sat", | ||||
|             u"upload_date": u"20130622" | ||||
|         'url': 'http://www.3sat.de/mediathek/index.php?obj=36983', | ||||
|         'md5': '9dcfe344732808dbfcc901537973c922', | ||||
|         'info_dict': { | ||||
|             'id': '36983', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Kaffeeland Schweiz', | ||||
|             'description': 'md5:cc4424b18b75ae9948b13929a0814033', | ||||
|             'uploader': '3sat', | ||||
|             'upload_date': '20130622' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id | ||||
|         details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details') | ||||
|         details_doc = self._download_xml(details_url, video_id, 'Downloading video details') | ||||
|  | ||||
|         thumbnail_els = details_doc.findall('.//teaserimage') | ||||
|         thumbnails = [{ | ||||
|             'width': te.attrib['key'].partition('x')[0], | ||||
|             'height': te.attrib['key'].partition('x')[2], | ||||
|             'width': int(te.attrib['key'].partition('x')[0]), | ||||
|             'height': int(te.attrib['key'].partition('x')[2]), | ||||
|             'url': te.text, | ||||
|         } for te in thumbnail_els] | ||||
|  | ||||
|   | ||||
							
								
								
									
										54
									
								
								youtube_dl/extractor/empflix.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								youtube_dl/extractor/empflix.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class EmpflixIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', | ||||
|         'md5': 'b1bc15b6412d33902d6e5952035fcabc', | ||||
|         'info_dict': { | ||||
|             'id': '33051', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Amateur Finger Fuck', | ||||
|             'description': 'Amateur solo finger fucking.', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'name="title" value="(?P<title>[^"]*)"', webpage, 'title') | ||||
|         video_description = self._html_search_regex( | ||||
|             r'name="description" value="([^"]*)"', webpage, 'description', fatal=False) | ||||
|  | ||||
|         cfg_url = self._html_search_regex( | ||||
|             r'flashvars\.config = escape\("([^"]+)"', | ||||
|             webpage, 'flashvars.config') | ||||
|  | ||||
|         cfg_xml = self._download_xml( | ||||
|             cfg_url, video_id, note='Downloading metadata') | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': item.find('videoLink').text, | ||||
|                 'format_id': item.find('res').text, | ||||
|             } for item in cfg_xml.findall('./quality/item') | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'description': video_description, | ||||
|             'formats': formats, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
| @@ -1,4 +1,5 @@ | ||||
| import os | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -8,18 +9,23 @@ from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ExtremeTubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', | ||||
|         u'file': u'652431.mp4', | ||||
|         u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Music Video 14 british euro brit european cumshots swallow", | ||||
|             u"uploader": u"unknown", | ||||
|             u"age_limit": 18, | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', | ||||
|         'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', | ||||
|         'info_dict': { | ||||
|             'id': '652431', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Music Video 14 british euro brit european cumshots swallow', | ||||
|             'uploader': 'unknown', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.extremetube.com/gay/video/abcde-1234', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -30,11 +36,14 @@ class ExtremeTubeIE(InfoExtractor): | ||||
|         req.add_header('Cookie', 'age_verified=1') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') | ||||
|         uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) | ||||
|         video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title') | ||||
|         uploader = self._html_search_regex( | ||||
|             r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader', | ||||
|             fatal=False) | ||||
|         video_url = compat_urllib_parse.unquote(self._html_search_regex( | ||||
|             r'video_url=(.+?)&', webpage, 'video_url')) | ||||
|         path = compat_urllib_parse_urlparse(video_url).path | ||||
|         extension = os.path.splitext(path)[1][1:] | ||||
|         format = path.split('/')[5].split('_')[:2] | ||||
|         format = "-".join(format) | ||||
|  | ||||
| @@ -43,7 +52,6 @@ class ExtremeTubeIE(InfoExtractor): | ||||
|             'title': video_title, | ||||
|             'uploader': uploader, | ||||
|             'url': video_url, | ||||
|             'ext': extension, | ||||
|             'format': format, | ||||
|             'format_id': format, | ||||
|             'age_limit': 18, | ||||
|   | ||||
| @@ -76,9 +76,8 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|             check_form = { | ||||
|                 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), | ||||
|                 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'), | ||||
|                 'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'), | ||||
|                 'name_action_selected': 'dont_save', | ||||
|                 'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'), | ||||
|             } | ||||
|             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) | ||||
|             check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|   | ||||
							
								
								
									
										63
									
								
								youtube_dl/extractor/fc2.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								youtube_dl/extractor/fc2.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | ||||
| #! -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FC2IE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)' | ||||
|     IE_NAME = 'fc2' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', | ||||
|         'md5': 'a6ebe8ebe0396518689d963774a54eb7', | ||||
|         'info_dict': { | ||||
|             'id': '20121103kUan1KHs', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Boxing again with Puff', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         self._downloader.cookiejar.clear_session_cookies()  # must clear | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         refer = url.replace('/content/', '/a/content/') | ||||
|  | ||||
|         mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() | ||||
|  | ||||
|         info_url = ( | ||||
|             "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&". | ||||
|             format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E'))) | ||||
|  | ||||
|         info_webpage = self._download_webpage( | ||||
|             info_url, video_id, note='Downloading info page') | ||||
|         info = compat_urlparse.parse_qs(info_webpage) | ||||
|  | ||||
|         if 'err_code' in info: | ||||
|             raise ExtractorError('Error code: %s' % info['err_code'][0]) | ||||
|  | ||||
|         video_url = info['filepath'][0] + '?mid=' + info['mid'][0] | ||||
|         title_info = info.get('title') | ||||
|         if title_info: | ||||
|             title = title_info[0] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
| @@ -6,7 +6,6 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class FirstpostIE(InfoExtractor): | ||||
|     IE_NAME = 'Firstpost.com' | ||||
|     _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
| @@ -16,7 +15,7 @@ class FirstpostIE(InfoExtractor): | ||||
|             'id': '1025403', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'India to launch indigenous aircraft carrier INS Vikrant today', | ||||
|             'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.', | ||||
|             'description': 'md5:feef3041cb09724e0bdc02843348f5f4', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -24,15 +23,30 @@ class FirstpostIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<div.*?name="div_video".*?flashvars="([^"]+)">', | ||||
|             webpage, 'video URL') | ||||
|         page = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_meta('twitter:title', page, 'title') | ||||
|         description = self._html_search_meta('twitter:description', page, 'title') | ||||
|  | ||||
|         data = self._download_xml( | ||||
|             'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id, | ||||
|             'Downloading video XML') | ||||
|  | ||||
|         item = data.find('./playlist/item') | ||||
|         thumbnail = item.find('./image').text | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': details.find('./file').text, | ||||
|                 'format_id': details.find('./label').text.strip(), | ||||
|                 'width': int(details.find('./width').text.strip()), | ||||
|                 'height': int(details.find('./height').text.strip()), | ||||
|             } for details in item.findall('./source/file_details') if details.find('./file').text | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -5,6 +5,8 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -16,16 +18,28 @@ class FiveMinIE(InfoExtractor): | ||||
|         (?P<id>\d+) | ||||
|         ''' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/ | ||||
|         'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791', | ||||
|         'md5': '4f7b0b79bf1a470e5004f7112385941d', | ||||
|         'info_dict': { | ||||
|             'id': '518013791', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'iPad Mini with Retina Display Review', | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/ | ||||
|             'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791', | ||||
|             'md5': '4f7b0b79bf1a470e5004f7112385941d', | ||||
|             'info_dict': { | ||||
|                 'id': '518013791', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'iPad Mini with Retina Display Review', | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|         { | ||||
|             # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247 | ||||
|             'url': '5min:518086247', | ||||
|             'md5': 'e539a9dd682c288ef5a498898009f69e', | ||||
|             'info_dict': { | ||||
|                 'id': '518086247', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'How to Make a Next-Level Fruit Salad', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def _build_result(cls, video_id): | ||||
| @@ -34,10 +48,28 @@ class FiveMinIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info = self._download_json( | ||||
|             'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&' | ||||
|             'playlist=%s&url=https' % video_id, | ||||
|             video_id)['binding'][0] | ||||
|         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id | ||||
|         embed_page = self._download_webpage(embed_url, video_id, | ||||
|             'Downloading embed page') | ||||
|         sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid') | ||||
|         query = compat_urllib_parse.urlencode({ | ||||
|             'func': 'GetResults', | ||||
|             'playlist': video_id, | ||||
|             'sid': sid, | ||||
|             'isPlayerSeed': 'true', | ||||
|             'url': embed_url, | ||||
|         }) | ||||
|         response = self._download_json( | ||||
|             'https://syn.5min.com/handlers/SenseHandler.ashx?' + query, | ||||
|             video_id) | ||||
|         if not response['success']: | ||||
|             err_msg = response['errorMessage'] | ||||
|             if err_msg == 'ErrorVideoUserNotGeo': | ||||
|                 msg = 'Video not available from your location' | ||||
|             else: | ||||
|                 msg = 'Aol said: %s' % err_msg | ||||
|             raise ExtractorError(msg, expected=True, video_id=video_id) | ||||
|         info = response['binding'][0] | ||||
|  | ||||
|         second_id = compat_str(int(video_id[:-2]) + 1) | ||||
|         formats = [] | ||||
|   | ||||
| @@ -48,24 +48,36 @@ class PluzzIE(FranceTVBaseInfoExtractor): | ||||
|  | ||||
| class FranceTvInfoIE(FranceTVBaseInfoExtractor): | ||||
|     IE_NAME = 'francetvinfo.fr' | ||||
|     _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html' | ||||
|     _VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', | ||||
|         'file': '84981923.mp4', | ||||
|         'info_dict': { | ||||
|             'id': '84981923', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Soir 3', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', | ||||
|         'info_dict': { | ||||
|             'id': 'EV_20019', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Débat des candidats à la Commission européenne', | ||||
|             'description': 'Débat des candidats à la Commission européenne', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'HLS (reqires ffmpeg)' | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, page_title) | ||||
|         video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id') | ||||
|         video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id') | ||||
|         return self._extract_video(video_id) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -4,22 +4,32 @@ import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class FunnyOrDieIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', | ||||
|         'file': '0732f586d7.mp4', | ||||
|         'md5': 'f647e9e90064b53b6e046e75d0241fbd', | ||||
|         'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9', | ||||
|         'info_dict': { | ||||
|             'description': ('Lyrics changed to match the video. Spoken cameo ' | ||||
|                 'by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a ' | ||||
|                 'concept by Dustin McLean (DustFilms.com). Performed, edited, ' | ||||
|                 'and written by David A. Scott.'), | ||||
|             'id': '0732f586d7', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Heart-Shaped Box: Literal Video Version', | ||||
|             'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', | ||||
|             'thumbnail': 're:^http:.*\.jpg$', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.funnyordie.com/embed/e402820827', | ||||
|         'md5': 'ff4d83318f89776ed0250634cfaa8d36', | ||||
|         'info_dict': { | ||||
|             'id': 'e402820827', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Please Use This Song (Jon Lajoie)', | ||||
|             'description': 'md5:2ed27d364f5a805a6dba199faaf6681d', | ||||
|             'thumbnail': 're:^http:.*\.jpg$', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -27,27 +37,34 @@ class FunnyOrDieIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], | ||||
|             webpage, 'video URL', flags=re.DOTALL) | ||||
|         links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage) | ||||
|         if not links: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         if mobj.group('type') == 'embed': | ||||
|             post_json = self._search_regex( | ||||
|                 r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') | ||||
|             post = json.loads(post_json) | ||||
|             title = post['name'] | ||||
|             description = post.get('description') | ||||
|             thumbnail = post.get('picture') | ||||
|         else: | ||||
|             title = self._og_search_title(webpage) | ||||
|             description = self._og_search_description(webpage) | ||||
|             thumbnail = None | ||||
|         links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0) | ||||
|  | ||||
|         bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates') | ||||
|         bitrates = [int(b) for b in bitrates.rstrip(',').split(',')] | ||||
|         bitrates.sort() | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for bitrate in bitrates: | ||||
|             for link in links: | ||||
|                 formats.append({ | ||||
|                     'url': '%s%d.%s' % (link[0], bitrate, link[1]), | ||||
|                     'format_id': '%s-%d' % (link[1], bitrate), | ||||
|                     'vbr': bitrate, | ||||
|                 }) | ||||
|  | ||||
|         post_json = self._search_regex( | ||||
|             r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') | ||||
|         post = json.loads(post_json) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'title': post['name'], | ||||
|             'description': post.get('description'), | ||||
|             'thumbnail': post.get('picture'), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -15,7 +15,7 @@ class GamekingsIE(InfoExtractor): | ||||
|             'id': '20130811', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', | ||||
|             'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', | ||||
|             'description': 'md5:36fd701e57e8c15ac8682a2374c99731', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -15,11 +15,12 @@ from ..utils import ( | ||||
| class GameSpotIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' | ||||
|     _TEST = { | ||||
|         "url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", | ||||
|         "file": "gs-2300-6410818.mp4", | ||||
|         "md5": "b2a30deaa8654fcccd43713a6b6a4825", | ||||
|         "info_dict": { | ||||
|             "title": "Arma 3 - Community Guide: SITREP I", | ||||
|         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', | ||||
|         'md5': 'b2a30deaa8654fcccd43713a6b6a4825', | ||||
|         'info_dict': { | ||||
|             'id': 'gs-2300-6410818', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Arma 3 - Community Guide: SITREP I', | ||||
|             'description': 'Check out this video where some of the basics of Arma 3 is explained.', | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -35,9 +35,10 @@ class GenericIE(InfoExtractor): | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', | ||||
|             'file': '13601338388002.mp4', | ||||
|             'md5': '6e15c93721d7ec9e9ca3fdbf07982cfd', | ||||
|             'md5': '85b90ccc9d73b4acd9138d3af4c27f89', | ||||
|             'info_dict': { | ||||
|                 'id': '13601338388002', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'www.hodiho.fr', | ||||
|                 'title': 'R\u00e9gis plante sa Jeep', | ||||
|             } | ||||
| @@ -46,8 +47,9 @@ class GenericIE(InfoExtractor): | ||||
|         { | ||||
|             'add_ie': ['Bandcamp'], | ||||
|             'url': 'http://bronyrock.com/track/the-pony-mash', | ||||
|             'file': '3235767654.mp3', | ||||
|             'info_dict': { | ||||
|                 'id': '3235767654', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'The Pony Mash', | ||||
|                 'uploader': 'M_Pallante', | ||||
|             }, | ||||
| @@ -73,9 +75,10 @@ class GenericIE(InfoExtractor): | ||||
|         { | ||||
|             # https://github.com/rg3/youtube-dl/issues/2253 | ||||
|             'url': 'http://bcove.me/i6nfkrc3', | ||||
|             'file': '3101154703001.mp4', | ||||
|             'md5': '0ba9446db037002366bab3b3eb30c88c', | ||||
|             'info_dict': { | ||||
|                 'id': '3101154703001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Still no power', | ||||
|                 'uploader': 'thestar.com', | ||||
|                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', | ||||
| @@ -236,6 +239,56 @@ class GenericIE(InfoExtractor): | ||||
|                 'uploader_id': 'rbctv_2012_4', | ||||
|             }, | ||||
|         }, | ||||
|         # Condé Nast embed | ||||
|         { | ||||
|             'url': 'http://www.wired.com/2014/04/honda-asimo/', | ||||
|             'md5': 'ba0dfe966fa007657bd1443ee672db0f', | ||||
|             'info_dict': { | ||||
|                 'id': '53501be369702d3275860000', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever', | ||||
|             } | ||||
|         }, | ||||
|         # Dailymotion embed | ||||
|         { | ||||
|             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/', | ||||
|             'md5': '441aeeb82eb72c422c7f14ec533999cd', | ||||
|             'info_dict': { | ||||
|                 'id': 'k2mm4bCdJ6CQ2i7c8o2', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web', | ||||
|                 'uploader': 'Spi0n', | ||||
|             }, | ||||
|             'add_ie': ['Dailymotion'], | ||||
|         }, | ||||
|         # YouTube embed | ||||
|         { | ||||
|             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'FXRb4ykk4S0', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'The NBL Auction 2014', | ||||
|                 'uploader': 'BADMINTON England', | ||||
|                 'uploader_id': 'BADMINTONEvents', | ||||
|                 'upload_date': '20140603', | ||||
|                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73', | ||||
|             }, | ||||
|             'add_ie': ['Youtube'], | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         }, | ||||
|         # MTVSercices embed | ||||
|         { | ||||
|             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too', | ||||
|             'md5': '35727f82f58c76d996fc188f9755b0d5', | ||||
|             'info_dict': { | ||||
|                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Review', | ||||
|                 'description': 'Mario\'s life in the fast lane has never looked so good.', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_download_webpage(self, video_id): | ||||
| @@ -320,6 +373,12 @@ class GenericIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         if url.startswith('//'): | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': self.http_scheme() + url, | ||||
|             } | ||||
|  | ||||
|         parsed_url = compat_urlparse.urlparse(url) | ||||
|         if not parsed_url.scheme: | ||||
|             default_search = self._downloader.params.get('default_search') | ||||
| @@ -332,8 +391,13 @@ class GenericIE(InfoExtractor): | ||||
|                     return self.url_result('http://' + url) | ||||
|                 else: | ||||
|                     if default_search == 'auto_warning': | ||||
|                         self._downloader.report_warning( | ||||
|                             'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url) | ||||
|                         if re.match(r'^(?:url|URL)$', url): | ||||
|                             raise ExtractorError( | ||||
|                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url, | ||||
|                                 expected=True) | ||||
|                         else: | ||||
|                             self._downloader.report_warning( | ||||
|                                 'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url) | ||||
|                     return self.url_result('ytsearch:' + url) | ||||
|             else: | ||||
|                 assert ':' in default_search | ||||
| @@ -442,8 +506,13 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded YouTube player | ||||
|         matches = re.findall(r'''(?x) | ||||
|             (?:<iframe[^>]+?src=|embedSWF\(\s*) | ||||
|             (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/ | ||||
|             (?: | ||||
|                 <iframe[^>]+?src=| | ||||
|                 <embed[^>]+?src=| | ||||
|                 embedSWF\(?:\s* | ||||
|             ) | ||||
|             (["\']) | ||||
|                 (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/ | ||||
|                 (?:embed|v)/.+?) | ||||
|             \1''', webpage) | ||||
|         if matches: | ||||
| @@ -456,7 +525,7 @@ class GenericIE(InfoExtractor): | ||||
|         matches = re.findall( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) | ||||
|         if matches: | ||||
|             urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion') | ||||
|             urlrs = [self.url_result(unescapeHTML(tuppl[1])) | ||||
|                      for tuppl in matches] | ||||
|             return self.playlist_result( | ||||
|                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||
| @@ -482,6 +551,22 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj: | ||||
|             return self.url_result(mobj.group(1), 'BlipTV') | ||||
|  | ||||
|         # Look for embedded condenast player | ||||
|         matches = re.findall( | ||||
|             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")', | ||||
|             webpage) | ||||
|         if matches: | ||||
|             return { | ||||
|                 '_type': 'playlist', | ||||
|                 'entries': [{ | ||||
|                     '_type': 'url', | ||||
|                     'ie_key': 'CondeNast', | ||||
|                     'url': ma, | ||||
|                 } for ma in matches], | ||||
|                 'title': video_title, | ||||
|                 'id': video_id, | ||||
|             } | ||||
|  | ||||
|         # Look for Bandcamp pages with custom domain | ||||
|         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) | ||||
|         if mobj is not None: | ||||
| @@ -502,7 +587,7 @@ class GenericIE(InfoExtractor): | ||||
|             return OoyalaIE._build_url_result(mobj.group('ec')) | ||||
|  | ||||
|         # Look for Aparat videos | ||||
|         mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage) | ||||
|         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group(1), 'Aparat') | ||||
|  | ||||
| @@ -513,7 +598,7 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded NovaMov-based player | ||||
|         mobj = re.search( | ||||
|             r'''(?x)<iframe[^>]+?src=(["\']) | ||||
|             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\']) | ||||
|                     (?P<url>http://(?:(?:embed|www)\.)? | ||||
|                         (?:novamov\.com| | ||||
|                            nowvideo\.(?:ch|sx|eu|at|ag|co)| | ||||
| @@ -586,65 +671,102 @@ class GenericIE(InfoExtractor): | ||||
|         if smotri_url: | ||||
|             return self.url_result(smotri_url, 'Smotri') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Look for gorilla-vid style embedding | ||||
|             mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage) | ||||
|         if mobj is None: | ||||
|             # Broaden the search a little bit | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | ||||
|         if mobj is None: | ||||
|             # Broaden the search a little bit: JWPlayer JS loader | ||||
|             mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) | ||||
|         # Look for embeded soundcloud player | ||||
|         mobj = re.search( | ||||
|             r'<iframe src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             url = unescapeHTML(mobj.group('url')) | ||||
|             return self.url_result(url) | ||||
|  | ||||
|         if mobj is None: | ||||
|         # Look for embedded vulture.com player | ||||
|         mobj = re.search( | ||||
|             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             url = unescapeHTML(mobj.group('url')) | ||||
|             return self.url_result(url, ie='Vulture') | ||||
|  | ||||
|         # Look for embedded mtvservices player | ||||
|         mobj = re.search( | ||||
|             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             url = unescapeHTML(mobj.group('url')) | ||||
|             return self.url_result(url, ie='MTVServicesEmbedded') | ||||
|  | ||||
|         # Start with something easy: JW Player in SWFObject | ||||
|         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) | ||||
|         if not found: | ||||
|             # Look for gorilla-vid style embedding | ||||
|             found = re.findall(r'''(?sx) | ||||
|                 (?: | ||||
|                     jw_plugins| | ||||
|                     JWPlayerOptions| | ||||
|                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup | ||||
|                 ) | ||||
|                 .*?file\s*:\s*["\'](.*?)["\']''', webpage) | ||||
|         if not found: | ||||
|             # Broaden the search a little bit | ||||
|             found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) | ||||
|         if not found: | ||||
|             # Broaden the findall a little bit: JWPlayer JS loader | ||||
|             found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) | ||||
|         if not found: | ||||
|             # Try to find twitter cards info | ||||
|             mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||
|         if mobj is None: | ||||
|             found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) | ||||
|         if not found: | ||||
|             # We look for Open Graph info: | ||||
|             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) | ||||
|             m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) | ||||
|             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) | ||||
|             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: | ||||
|             if m_video_type is not None: | ||||
|                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | ||||
|         if mobj is None: | ||||
|                 found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) | ||||
|         if not found: | ||||
|             # HTML5 video | ||||
|             mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) | ||||
|         if mobj is None: | ||||
|             mobj = re.search( | ||||
|             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage) | ||||
|         if not found: | ||||
|             found = re.search( | ||||
|                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' | ||||
|                 r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', | ||||
|                 webpage) | ||||
|             if mobj: | ||||
|                 new_url = mobj.group(1) | ||||
|             if found: | ||||
|                 new_url = found.group(1) | ||||
|                 self.report_following_redirect(new_url) | ||||
|                 return { | ||||
|                     '_type': 'url', | ||||
|                     'url': new_url, | ||||
|                 } | ||||
|         if mobj is None: | ||||
|         if not found: | ||||
|             raise ExtractorError('Unsupported URL: %s' % url) | ||||
|  | ||||
|         # It's possible that one of the regexes | ||||
|         # matched, but returned an empty group: | ||||
|         if mobj.group(1) is None: | ||||
|             raise ExtractorError('Did not find a valid video URL at %s' % url) | ||||
|         entries = [] | ||||
|         for video_url in found: | ||||
|             video_url = compat_urlparse.urljoin(url, video_url) | ||||
|             video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) | ||||
|  | ||||
|         video_url = mobj.group(1) | ||||
|         video_url = compat_urlparse.urljoin(url, video_url) | ||||
|         video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) | ||||
|             # Sometimes, jwplayer extraction will result in a YouTube URL | ||||
|             if YoutubeIE.suitable(video_url): | ||||
|                 entries.append(self.url_result(video_url, 'Youtube')) | ||||
|                 continue | ||||
|  | ||||
|         # Sometimes, jwplayer extraction will result in a YouTube URL | ||||
|         if YoutubeIE.suitable(video_url): | ||||
|             return self.url_result(video_url, 'Youtube') | ||||
|             # here's a fun little line of code for you: | ||||
|             video_id = os.path.splitext(video_id)[0] | ||||
|  | ||||
|         # here's a fun little line of code for you: | ||||
|         video_id = os.path.splitext(video_id)[0] | ||||
|             entries.append({ | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'uploader': video_uploader, | ||||
|                 'title': video_title, | ||||
|             }) | ||||
|  | ||||
|         if len(entries) == 1: | ||||
|             return entries[0] | ||||
|         else: | ||||
|             for num, e in enumerate(entries, start=1): | ||||
|                 e['title'] = '%s (%d)' % (e['title'], num) | ||||
|             return { | ||||
|                 '_type': 'playlist', | ||||
|                 'entries': entries, | ||||
|             } | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'uploader': video_uploader, | ||||
|             'title': video_title, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										76
									
								
								youtube_dl/extractor/gorillavid.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								youtube_dl/extractor/gorillavid.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GorillaVidIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://gorillavid.in/06y9juieqpmi', | ||||
|         'md5': '5ae4a3580620380619678ee4875893ba', | ||||
|         'info_dict': { | ||||
|             'id': '06y9juieqpmi', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Rebecca Black My Moment Official Music Video Reaction', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html', | ||||
|         'md5': 'c9e293ca74d46cad638e199c3f3fe604', | ||||
|         'info_dict': { | ||||
|             'id': 'z08zf8le23c6', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Say something nice', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         url = 'http://gorillavid.in/%s' % video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         fields = dict(re.findall(r'''(?x)<input\s+ | ||||
|             type="hidden"\s+ | ||||
|             name="([^"]+)"\s+ | ||||
|             (?:id="[^"]+"\s+)? | ||||
|             value="([^"]*)" | ||||
|             ''', webpage)) | ||||
|          | ||||
|         if fields['op'] == 'download1': | ||||
|             post = compat_urllib_parse.urlencode(fields) | ||||
|  | ||||
|             req = compat_urllib_request.Request(url, post) | ||||
|             req.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
|  | ||||
|             webpage = self._download_webpage(req, video_id, 'Downloading video page') | ||||
|  | ||||
|         title = self._search_regex(r'style="z-index: [0-9]+;">([0-9a-zA-Z ]+)(?:-.+)?</span>', webpage, 'title') | ||||
|         thumbnail = self._search_regex(r'image:\'(http[^\']+)\',', webpage, 'thumbnail') | ||||
|         url = self._search_regex(r'file: \'(http[^\']+)\',', webpage, 'file url') | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|             'url': url, | ||||
|             'ext': determine_ext(url), | ||||
|             'quality': 1, | ||||
|         }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										42
									
								
								youtube_dl/extractor/hentaistigma.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								youtube_dl/extractor/hentaistigma.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class HentaiStigmaIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/', | ||||
|         'md5': '4e3d07422a68a4cc363d8f57c8bf0d23', | ||||
|         'info_dict': { | ||||
|             'id': 'inyouchuu-etsu-bonus', | ||||
|             'ext': 'mp4', | ||||
|             "title": "Inyouchuu Etsu Bonus", | ||||
|             "age_limit": 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>', | ||||
|             webpage, 'title') | ||||
|         wrap_url = self._html_search_regex( | ||||
|             r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url') | ||||
|         wrap_webpage = self._download_webpage(wrap_url, video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -1,10 +1,11 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
| @@ -13,59 +14,55 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class HypemIE(InfoExtractor): | ||||
|     """Information Extractor for hypem""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' | ||||
|     _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', | ||||
|         u'file': u'1v6ga.mp3', | ||||
|         u'md5': u'b9cc91b5af8995e9f0c1cee04c575828', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Tame" | ||||
|         'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', | ||||
|         'md5': 'b9cc91b5af8995e9f0c1cee04c575828', | ||||
|         'info_dict': { | ||||
|             'id': '1v6ga', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Tame', | ||||
|             'uploader': 'BODYWORK', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         track_id = mobj.group(1) | ||||
|  | ||||
|         data = {'ax': 1, 'ts': time.time()} | ||||
|         data_encoded = compat_urllib_parse.urlencode(data) | ||||
|         complete_url = url + "?" + data_encoded | ||||
|         request = compat_urllib_request.Request(complete_url) | ||||
|         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url') | ||||
|         response, urlh = self._download_webpage_handle( | ||||
|             request, track_id, 'Downloading webpage with the url') | ||||
|         cookie = urlh.headers.get('Set-Cookie', '') | ||||
|  | ||||
|         self.report_extraction(track_id) | ||||
|  | ||||
|         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>', | ||||
|             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip() | ||||
|         html_tracks = self._html_search_regex( | ||||
|             r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>', | ||||
|             response, 'tracks') | ||||
|         try: | ||||
|             track_list = json.loads(html_tracks) | ||||
|             track = track_list[u'tracks'][0] | ||||
|             track = track_list['tracks'][0] | ||||
|         except ValueError: | ||||
|             raise ExtractorError(u'Hypemachine contained invalid JSON.') | ||||
|             raise ExtractorError('Hypemachine contained invalid JSON.') | ||||
|  | ||||
|         key = track[u"key"] | ||||
|         track_id = track[u"id"] | ||||
|         artist = track[u"artist"] | ||||
|         title = track[u"song"] | ||||
|         key = track['key'] | ||||
|         track_id = track['id'] | ||||
|         artist = track['artist'] | ||||
|         title = track['song'] | ||||
|  | ||||
|         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key)) | ||||
|         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) | ||||
|         serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key) | ||||
|         request = compat_urllib_request.Request( | ||||
|             serve_url, '', {'Content-Type': 'application/json'}) | ||||
|         request.add_header('cookie', cookie) | ||||
|         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata') | ||||
|         try: | ||||
|             song_data = json.loads(song_data_json) | ||||
|         except ValueError: | ||||
|             raise ExtractorError(u'Hypemachine contained invalid JSON.') | ||||
|         final_url = song_data[u"url"] | ||||
|         song_data = self._download_json(request, track_id, 'Downloading metadata') | ||||
|         final_url = song_data["url"] | ||||
|  | ||||
|         return [{ | ||||
|             'id':       track_id, | ||||
|             'url':      final_url, | ||||
|             'ext':      "mp3", | ||||
|             'title':    title, | ||||
|             'artist':   artist, | ||||
|         }] | ||||
|         return { | ||||
|             'id': track_id, | ||||
|             'url': final_url, | ||||
|             'ext': 'mp3', | ||||
|             'title': title, | ||||
|             'uploader': artist, | ||||
|         } | ||||
|   | ||||
| @@ -5,8 +5,8 @@ import re | ||||
| from .common import InfoExtractor | ||||
| 
 | ||||
| 
 | ||||
| class StatigramIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)' | ||||
| class IconosquareIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://statigr.am/p/522207370455279102_24101272', | ||||
|         'md5': '6eb93b882a3ded7c378ee1d6884b1814', | ||||
| @@ -15,6 +15,7 @@ class StatigramIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'uploader_id': 'aguynamedpatrick', | ||||
|             'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)', | ||||
|             'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', | ||||
|         }, | ||||
|     } | ||||
| 
 | ||||
| @@ -25,7 +26,7 @@ class StatigramIE(InfoExtractor): | ||||
|         html_title = self._html_search_regex( | ||||
|             r'<title>(.+?)</title>', | ||||
|             webpage, 'title') | ||||
|         title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title) | ||||
|         title = re.sub(r'(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)$', '', html_title) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r'@([^ ]+)', title, 'uploader name', fatal=False) | ||||
| 
 | ||||
| @@ -33,6 +34,7 @@ class StatigramIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'url': self._og_search_video_url(webpage), | ||||
|             'title': title, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader_id': uploader_id | ||||
|         } | ||||
| @@ -106,7 +106,7 @@ class OneUPIE(IGNIE): | ||||
|  | ||||
|     _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://gamevideos.1up.com/video/id/34976', | ||||
|         'md5': '68a54ce4ebc772e4b71e3123d413163d', | ||||
|         'info_dict': { | ||||
| @@ -115,10 +115,7 @@ class OneUPIE(IGNIE): | ||||
|             'title': 'Sniper Elite V2 - Trailer', | ||||
|             'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # Override IGN tests | ||||
|     _TESTS = [] | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|   | ||||
| @@ -11,16 +11,15 @@ from ..utils import ( | ||||
|  | ||||
| class InfoQIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$' | ||||
|  | ||||
|     _TEST = { | ||||
|         "name": "InfoQ", | ||||
|         "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", | ||||
|         "file": "12-jan-pythonthings.mp4", | ||||
|         "info_dict": { | ||||
|             "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", | ||||
|             "title": "A Few of My Favorite [Python] Things", | ||||
|         }, | ||||
|         "params": { | ||||
|             "skip_download": True, | ||||
|         'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things', | ||||
|         'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2', | ||||
|         'info_dict': { | ||||
|             'id': '12-jan-pythonthings', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.', | ||||
|             'title': 'A Few of My Favorite [Python] Things', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -30,26 +29,39 @@ class InfoQIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title') | ||||
|         video_description = self._html_search_meta('description', webpage, 'description') | ||||
|  | ||||
|         # The server URL is hardcoded | ||||
|         video_url = 'rtmpe://video.infoq.com/cfx/st/' | ||||
|  | ||||
|         # Extract video URL | ||||
|         encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id') | ||||
|         encoded_id = self._search_regex( | ||||
|             r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id') | ||||
|         real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) | ||||
|         video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id | ||||
|         playpath = 'mp4:' + real_id | ||||
|  | ||||
|         # Extract title | ||||
|         video_title = self._search_regex(r'contentTitle = "(.*?)";', | ||||
|             webpage, 'title') | ||||
|  | ||||
|         # Extract description | ||||
|         video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         video_filename = video_url.split('/')[-1] | ||||
|         video_filename = playpath.split('/')[-1] | ||||
|         video_id, extension = video_filename.split('.') | ||||
|  | ||||
|         http_base = self._search_regex( | ||||
|             r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage, | ||||
|             'HTTP base URL') | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'rtmp', | ||||
|             'url': video_url, | ||||
|             'ext': extension, | ||||
|             'play_path': playpath, | ||||
|         }, { | ||||
|             'format_id': 'http', | ||||
|             'url': http_base + real_id, | ||||
|         }] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': video_title, | ||||
|             'ext': extension,  # Extension is always(?) mp4, but seems to be flv | ||||
|             'description': video_description, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -33,14 +33,14 @@ class IviIE(InfoExtractor): | ||||
|         }, | ||||
|         # Serial's serie | ||||
|         { | ||||
|             'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791', | ||||
|             'md5': '3e6cc9a848c1d2ebcc6476444967baa9', | ||||
|             'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549', | ||||
|             'md5': '221f56b35e3ed815fde2df71032f4b3e', | ||||
|             'info_dict': { | ||||
|                 'id': '74791', | ||||
|                 'id': '9549', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Дежурный ангел - 1 серия', | ||||
|                 'duration': 2490, | ||||
|                 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', | ||||
|                 'title': 'Двое из ларца - Серия 1', | ||||
|                 'duration': 2655, | ||||
|                 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg', | ||||
|             }, | ||||
|             'skip': 'Only works from Russia', | ||||
|          } | ||||
|   | ||||
| @@ -14,7 +14,7 @@ class JukeboxIE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html', | ||||
|         'md5': '5dc6477e74b1e37042ac5acedd8413e5', | ||||
|         'md5': '1574e9b4d6438446d5b7dbcdf2786276', | ||||
|         'info_dict': { | ||||
|             'id': 'r303r', | ||||
|             'ext': 'flv', | ||||
|   | ||||
							
								
								
									
										35
									
								
								youtube_dl/extractor/ku6.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								youtube_dl/extractor/ku6.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class Ku6IE(InfoExtractor): | ||||
|     _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html' | ||||
|     _TEST = { | ||||
|         'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html', | ||||
|         'md5': '01203549b9efbb45f4b87d55bdea1ed1', | ||||
|         'info_dict': { | ||||
|             'id': 'JG-8yS14xzBr4bCn1pu0xw', | ||||
|             'ext': 'f4v', | ||||
|             'title': 'techniques test', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._search_regex(r'<h1 title=.*>(.*?)</h1>', webpage, 'title') | ||||
|         dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id | ||||
|         jsonData = self._download_json(dataUrl, video_id) | ||||
|         downloadUrl = jsonData['data']['f'] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': downloadUrl | ||||
|         } | ||||
|  | ||||
| @@ -24,7 +24,7 @@ class LifeNewsIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом', | ||||
|             'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.', | ||||
|             'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|             'upload_date': '20140130', | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| @@ -6,31 +8,34 @@ from ..utils import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
|     xpath_with_ns, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LivestreamIE(InfoExtractor): | ||||
|     IE_NAME = u'livestream' | ||||
|     IE_NAME = 'livestream' | ||||
|     _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$' | ||||
|     _TEST = { | ||||
|         u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', | ||||
|         u'file': u'4719370.mp4', | ||||
|         u'md5': u'0d2186e3187d185a04b3cdd02b828836', | ||||
|         u'info_dict': { | ||||
|             u'title': u'Live from Webster Hall NYC', | ||||
|             u'upload_date': u'20121012', | ||||
|         'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', | ||||
|         'md5': '53274c76ba7754fb0e8d072716f2292b', | ||||
|         'info_dict': { | ||||
|             'id': '4719370', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Live from Webster Hall NYC', | ||||
|             'upload_date': '20121012', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _extract_video_info(self, video_data): | ||||
|         video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url') | ||||
|         return {'id': video_data['id'], | ||||
|                 'url': video_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'title': video_data['caption'], | ||||
|                 'thumbnail': video_data['thumbnail_url'], | ||||
|                 'upload_date': video_data['updated_at'].replace('-','')[:8], | ||||
|                 } | ||||
|         return { | ||||
|             'id': compat_str(video_data['id']), | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': video_data['caption'], | ||||
|             'thumbnail': video_data['thumbnail_url'], | ||||
|             'upload_date': video_data['updated_at'].replace('-', '')[:8], | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -40,36 +45,36 @@ class LivestreamIE(InfoExtractor): | ||||
|  | ||||
|         if video_id is None: | ||||
|             # This is an event page: | ||||
|             config_json = self._search_regex(r'window.config = ({.*?});', | ||||
|                 webpage, u'window config') | ||||
|             config_json = self._search_regex( | ||||
|                 r'window.config = ({.*?});', webpage, 'window config') | ||||
|             info = json.loads(config_json)['event'] | ||||
|             videos = [self._extract_video_info(video_data['data']) | ||||
|                 for video_data in info['feed']['data'] if video_data['type'] == u'video'] | ||||
|                 for video_data in info['feed']['data'] if video_data['type'] == 'video'] | ||||
|             return self.playlist_result(videos, info['id'], info['full_name']) | ||||
|         else: | ||||
|             og_video = self._og_search_video_url(webpage, name=u'player url') | ||||
|             og_video = self._og_search_video_url(webpage, 'player url') | ||||
|             query_str = compat_urllib_parse_urlparse(og_video).query | ||||
|             query = compat_urlparse.parse_qs(query_str) | ||||
|             api_url = query['play_url'][0].replace('.smil', '') | ||||
|             info = json.loads(self._download_webpage(api_url, video_id, | ||||
|                                                      u'Downloading video info')) | ||||
|             info = json.loads(self._download_webpage( | ||||
|                 api_url, video_id, 'Downloading video info')) | ||||
|             return self._extract_video_info(info) | ||||
|  | ||||
|  | ||||
| # The original version of Livestream uses a different system | ||||
| class LivestreamOriginalIE(InfoExtractor): | ||||
|     IE_NAME = u'livestream:original' | ||||
|     IE_NAME = 'livestream:original' | ||||
|     _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | ||||
|         u'info_dict': { | ||||
|             u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | ||||
|             u'ext': u'flv', | ||||
|             u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', | ||||
|         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | ||||
|         'info_dict': { | ||||
|             'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', | ||||
|         }, | ||||
|         u'params': { | ||||
|         'params': { | ||||
|             # rtmp | ||||
|             u'skip_download': True, | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -84,7 +89,7 @@ class LivestreamOriginalIE(InfoExtractor): | ||||
|         ns = {'media': 'http://search.yahoo.com/mrss'} | ||||
|         thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] | ||||
|         # Remove the extension and number from the path (like 1.jpg) | ||||
|         path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path') | ||||
|         path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -2,7 +2,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| @@ -10,28 +9,48 @@ from .common import InfoExtractor | ||||
| class MailRuIE(InfoExtractor): | ||||
|     IE_NAME = 'mailru' | ||||
|     IE_DESC = 'Видео@Mail.Ru' | ||||
|     _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)' | ||||
|     _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', | ||||
|         'md5': 'dea205f03120046894db4ebb6159879a', | ||||
|         'info_dict': { | ||||
|             'id': '46301138', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', | ||||
|             'upload_date': '20140224', | ||||
|             'uploader': 'sonypicturesrus', | ||||
|             'uploader_id': 'sonypicturesrus@mail.ru', | ||||
|             'duration': 184, | ||||
|         } | ||||
|     } | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', | ||||
|             'md5': 'dea205f03120046894db4ebb6159879a', | ||||
|             'info_dict': { | ||||
|                 'id': '46301138', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', | ||||
|                 'timestamp': 1393232740, | ||||
|                 'upload_date': '20140224', | ||||
|                 'uploader': 'sonypicturesrus', | ||||
|                 'uploader_id': 'sonypicturesrus@mail.ru', | ||||
|                 'duration': 184, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html', | ||||
|             'md5': '00a91a58c3402204dcced523777b475f', | ||||
|             'info_dict': { | ||||
|                 'id': '46843144', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', | ||||
|                 'timestamp': 1397217632, | ||||
|                 'upload_date': '20140411', | ||||
|                 'uploader': 'hitech', | ||||
|                 'uploader_id': 'hitech@corp.mail.ru', | ||||
|                 'duration': 245, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = mobj.group('idv1') | ||||
|  | ||||
|         if not video_id: | ||||
|             video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') | ||||
|             'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         author = video_data['author'] | ||||
|         uploader = author['name'] | ||||
| @@ -40,10 +59,11 @@ class MailRuIE(InfoExtractor): | ||||
|         movie = video_data['movie'] | ||||
|         content_id = str(movie['contentId']) | ||||
|         title = movie['title'] | ||||
|         if title.endswith('.mp4'): | ||||
|             title = title[:-4] | ||||
|         thumbnail = movie['poster'] | ||||
|         duration = movie['duration'] | ||||
|  | ||||
|         upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d') | ||||
|         view_count = video_data['views_count'] | ||||
|  | ||||
|         formats = [ | ||||
| @@ -57,7 +77,7 @@ class MailRuIE(InfoExtractor): | ||||
|             'id': content_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'timestamp': video_data['timestamp'], | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|   | ||||
| @@ -1,15 +1,18 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MDRIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*' | ||||
|     _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)' | ||||
|      | ||||
|     # No tests, MDR regularily deletes its videos | ||||
|     _TEST = { | ||||
|         'url': 'http://www.mdr.de/fakt/video189002.html', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
| @@ -19,9 +22,9 @@ class MDRIE(InfoExtractor): | ||||
|         # determine title and media streams from webpage | ||||
|         html = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title') | ||||
|         title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title') | ||||
|         xmlurl = self._search_regex( | ||||
|             r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL') | ||||
|             r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL') | ||||
|  | ||||
|         doc = self._download_xml(domain + xmlurl, video_id) | ||||
|         formats = [] | ||||
| @@ -41,7 +44,7 @@ class MDRIE(InfoExtractor): | ||||
|             if vbr_el is None: | ||||
|                 format.update({ | ||||
|                     'vcodec': 'none', | ||||
|                     'format_id': u'%s-%d' % (media_type, abr), | ||||
|                     'format_id': '%s-%d' % (media_type, abr), | ||||
|                 }) | ||||
|             else: | ||||
|                 vbr = int(vbr_el.text) // 1000 | ||||
| @@ -49,12 +52,9 @@ class MDRIE(InfoExtractor): | ||||
|                     'vbr': vbr, | ||||
|                     'width': int(a.find('frameWidth').text), | ||||
|                     'height': int(a.find('frameHeight').text), | ||||
|                     'format_id': u'%s-%d' % (media_type, vbr), | ||||
|                     'format_id': '%s-%d' % (media_type, vbr), | ||||
|                 }) | ||||
|             formats.append(format) | ||||
|         if not formats: | ||||
|             raise ExtractorError(u'Could not find any valid formats') | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -4,9 +4,10 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     compat_urllib_parse, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor): | ||||
|             'uploader': 'Daniel Holbach', | ||||
|             'uploader_id': 'dholbach', | ||||
|             'upload_date': '20111115', | ||||
|             'timestamp': 1321359578, | ||||
|             'thumbnail': 're:https?://.*\.jpg', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, track_id) | ||||
|  | ||||
|         api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) | ||||
|         info = self._download_json( | ||||
|             api_url, track_id, 'Downloading cloudcast info') | ||||
|  | ||||
|         preview_url = self._search_regex( | ||||
|             r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') | ||||
|         song_url = preview_url.replace('/previews/', '/c/originals/') | ||||
| @@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor): | ||||
|             template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | ||||
|             final_song_url = self._get_url(template_url) | ||||
|         if final_song_url is None: | ||||
|             raise ExtractorError(u'Unable to extract track url') | ||||
|             raise ExtractorError('Unable to extract track url') | ||||
|  | ||||
|         PREFIX = ( | ||||
|             r'<div class="cloudcast-play-button-container"' | ||||
|             r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') | ||||
|         title = self._html_search_regex( | ||||
|             PREFIX + r'm-title="([^"]+)"', webpage, 'title') | ||||
|         thumbnail = self._proto_relative_url(self._html_search_regex( | ||||
|             PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', | ||||
|             fatal=False)) | ||||
|         uploader = self._html_search_regex( | ||||
|             PREFIX + r'm-owner-name="([^"]+)"', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         uploader_id = self._search_regex( | ||||
|             r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | ||||
|         description = self._og_search_description(webpage) | ||||
|         like_count = int_or_none(self._search_regex( | ||||
|             r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', | ||||
|             webpage, 'like count', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | ||||
|             webpage, 'play count', fatal=False)) | ||||
|         timestamp = parse_iso8601(self._search_regex( | ||||
|             r'<time itemprop="dateCreated" datetime="([^"]+)">', | ||||
|             webpage, 'upload date')) | ||||
|  | ||||
|         return { | ||||
|             'id': track_id, | ||||
|             'title': info['name'], | ||||
|             'title': title, | ||||
|             'url': final_song_url, | ||||
|             'description': info.get('description'), | ||||
|             'thumbnail': info['pictures'].get('extra_large'), | ||||
|             'uploader': info['user']['name'], | ||||
|             'uploader_id': info['user']['username'], | ||||
|             'upload_date': unified_strdate(info['created_time']), | ||||
|             'view_count': info['play_count'], | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|         } | ||||
|   | ||||
| @@ -8,7 +8,7 @@ from .common import InfoExtractor | ||||
|  | ||||
| class MorningstarIE(InfoExtractor): | ||||
|     IE_DESC = 'morningstar.com' | ||||
|     _VALID_URL = r'https?://(?:www\.)?morningstar\.com/cover/videocenter\.aspx\?id=(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869', | ||||
|         'md5': '6c0acface7a787aadc8391e4bbf7b0f5', | ||||
|   | ||||
							
								
								
									
										45
									
								
								youtube_dl/extractor/moviezine.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								youtube_dl/extractor/moviezine.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MoviezineIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.moviezine.se/video/205866', | ||||
|         'info_dict': { | ||||
|             'id': '205866', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Oculus - Trailer 1', | ||||
|             'description': 'md5:40cc6790fc81d931850ca9249b40e8a4', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player') | ||||
|  | ||||
|         formats =[{ | ||||
|             'format_id': 'sd', | ||||
|             'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'), | ||||
|             'quality': 0, | ||||
|             'ext': 'mp4', | ||||
|         }] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'), | ||||
|             'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'), | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
| @@ -4,9 +4,7 @@ import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
| ) | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class MporaIE(InfoExtractor): | ||||
| @@ -20,7 +18,7 @@ class MporaIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'title': 'Katy Curd -  Winter in the Forest', | ||||
|             'duration': 416, | ||||
|             'uploader': 'petenewman', | ||||
|             'uploader': 'Peter Newman Media', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -22,6 +22,7 @@ def _media_xml_tag(tag): | ||||
|  | ||||
| class MTVServicesInfoExtractor(InfoExtractor): | ||||
|     _MOBILE_TEMPLATE = None | ||||
|  | ||||
|     @staticmethod | ||||
|     def _id_from_uri(uri): | ||||
|         return uri.split(':')[-1] | ||||
| @@ -35,6 +36,9 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' | ||||
|         return base + m.group('finalid') | ||||
|  | ||||
|     def _get_feed_url(self, uri): | ||||
|         return self._FEED_URL | ||||
|  | ||||
|     def _get_thumbnail_url(self, uri, itemdoc): | ||||
|         search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) | ||||
|         thumb_node = itemdoc.find(search_path) | ||||
| @@ -80,6 +84,7 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|                                 }) | ||||
|             except (KeyError, TypeError): | ||||
|                 raise ExtractorError('Invalid rendition field.') | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|     def _get_video_info(self, itemdoc): | ||||
| @@ -135,10 +140,10 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|  | ||||
|     def _get_videos_info(self, uri): | ||||
|         video_id = self._id_from_uri(uri) | ||||
|         feed_url = self._get_feed_url(uri) | ||||
|         data = compat_urllib_parse.urlencode({'uri': uri}) | ||||
|  | ||||
|         idoc = self._download_xml( | ||||
|             self._FEED_URL + '?' + data, video_id, | ||||
|             feed_url + '?' + data, video_id, | ||||
|             'Downloading info', transform_source=fix_xml_ampersands) | ||||
|         return [self._get_video_info(item) for item in idoc.findall('.//item')] | ||||
|  | ||||
| @@ -159,6 +164,37 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         return self._get_videos_info(mgid) | ||||
|  | ||||
|  | ||||
| class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = 'mtvservices:embedded' | ||||
|     _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/ | ||||
|         'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906', | ||||
|         'md5': 'cb349b21a7897164cede95bd7bf3fbb9', | ||||
|         'info_dict': { | ||||
|             'id': '1043906', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds', | ||||
|             'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _get_feed_url(self, uri): | ||||
|         video_id = self._id_from_uri(uri) | ||||
|         site_id = uri.replace(video_id, '') | ||||
|         config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id) | ||||
|         config_doc = self._download_xml(config_url, video_id) | ||||
|         feed_node = config_doc.find('.//feed') | ||||
|         feed_url = feed_node.text.strip().split('?')[0] | ||||
|         return feed_url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         mgid = mobj.group('mgid') | ||||
|         return self._get_videos_info(mgid) | ||||
|  | ||||
|  | ||||
| class MTVIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'''(?x)^https?:// | ||||
|         (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -12,12 +14,13 @@ class NaverIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://tvcast.naver.com/v/81652', | ||||
|         u'file': u'81652.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', | ||||
|             u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', | ||||
|             u'upload_date': u'20130903', | ||||
|         'url': 'http://tvcast.naver.com/v/81652', | ||||
|         'info_dict': { | ||||
|             'id': '81652', | ||||
|             'ext': 'mp4', | ||||
|             'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', | ||||
|             'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', | ||||
|             'upload_date': '20130903', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -28,7 +31,7 @@ class NaverIE(InfoExtractor): | ||||
|         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', | ||||
|             webpage) | ||||
|         if m_id is None: | ||||
|             raise ExtractorError(u'couldn\'t extract vid and key') | ||||
|             raise ExtractorError('couldn\'t extract vid and key') | ||||
|         vid = m_id.group(1) | ||||
|         key = m_id.group(2) | ||||
|         query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,}) | ||||
| @@ -39,22 +42,27 @@ class NaverIE(InfoExtractor): | ||||
|         }) | ||||
|         info = self._download_xml( | ||||
|             'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, | ||||
|             video_id, u'Downloading video info') | ||||
|             video_id, 'Downloading video info') | ||||
|         urls = self._download_xml( | ||||
|             'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, | ||||
|             video_id, u'Downloading video formats info') | ||||
|             video_id, 'Downloading video formats info') | ||||
|  | ||||
|         formats = [] | ||||
|         for format_el in urls.findall('EncodingOptions/EncodingOption'): | ||||
|             domain = format_el.find('Domain').text | ||||
|             if domain.startswith('rtmp'): | ||||
|                 continue | ||||
|             formats.append({ | ||||
|             f = { | ||||
|                 'url': domain + format_el.find('uri').text, | ||||
|                 'ext': 'mp4', | ||||
|                 'width': int(format_el.find('width').text), | ||||
|                 'height': int(format_el.find('height').text), | ||||
|             }) | ||||
|             } | ||||
|             if domain.startswith('rtmp'): | ||||
|                 f.update({ | ||||
|                     'ext': 'flv', | ||||
|                     'rtmp_protocol': '1', # rtmpt | ||||
|                 }) | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import find_xpath_attr, compat_str | ||||
| @@ -31,30 +32,68 @@ class NBCIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class NBCNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' | ||||
|     _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ | ||||
|         ((video/.+?/(?P<id>\d+))| | ||||
|         (feature/[^/]+/(?P<title>.+))) | ||||
|         ''' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nbcnews.com/video/nbc-news/52753292', | ||||
|         'md5': '47abaac93c6eaf9ad37ee6c4463a5179', | ||||
|         'info_dict': { | ||||
|             'id': '52753292', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Crew emerges after four-month Mars food study', | ||||
|             'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nbcnews.com/video/nbc-news/52753292', | ||||
|             'md5': '47abaac93c6eaf9ad37ee6c4463a5179', | ||||
|             'info_dict': { | ||||
|                 'id': '52753292', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Crew emerges after four-month Mars food study', | ||||
|                 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|         { | ||||
|             'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236', | ||||
|             'md5': 'b2421750c9f260783721d898f4c42063', | ||||
|             'info_dict': { | ||||
|                 'id': 'I1wpAI_zmhsQ', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'How Twitter Reacted To The Snowden Interview', | ||||
|                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', | ||||
|             }, | ||||
|             'add_ie': ['ThePlatform'], | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | ||||
|         info = all_info.find('video') | ||||
|         if video_id is not None: | ||||
|             all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | ||||
|             info = all_info.find('video') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info.find('headline').text, | ||||
|             'ext': 'flv', | ||||
|             'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | ||||
|             'description': compat_str(info.find('caption').text), | ||||
|             'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | ||||
|         } | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': info.find('headline').text, | ||||
|                 'ext': 'flv', | ||||
|                 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | ||||
|                 'description': compat_str(info.find('caption').text), | ||||
|                 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | ||||
|             } | ||||
|         else: | ||||
|             # "feature" pages use theplatform.com | ||||
|             title = mobj.group('title') | ||||
|             webpage = self._download_webpage(url, title) | ||||
|             bootstrap_json = self._search_regex( | ||||
|                 r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', | ||||
|                 flags=re.MULTILINE) | ||||
|             bootstrap = json.loads(bootstrap_json) | ||||
|             info = bootstrap['results'][0]['video'] | ||||
|             playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI' | ||||
|             mpxid = info['mpxId'] | ||||
|             all_videos = self._download_json(playlist_url, title)['videos'] | ||||
|             # The response contains additional videos | ||||
|             info = next(v for v in all_videos if v['mpxId'] == mpxid) | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 # We get the best quality video | ||||
|                 'url': info['videoAssets'][-1]['publicUrl'], | ||||
|                 'ie_key': 'ThePlatform', | ||||
|             } | ||||
|   | ||||
| @@ -4,7 +4,11 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NDRIE(InfoExtractor): | ||||
| @@ -45,17 +49,16 @@ class NDRIE(InfoExtractor): | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         title = self._og_search_title(page) | ||||
|         title = self._og_search_title(page).strip() | ||||
|         description = self._og_search_description(page) | ||||
|         if description: | ||||
|             description = description.strip() | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>', | ||||
|             page) | ||||
|         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None | ||||
|         duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False)) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page) | ||||
|         mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page) | ||||
|         if mp3_url: | ||||
|             formats.append({ | ||||
|                 'url': mp3_url.group('audio'), | ||||
| @@ -64,13 +67,15 @@ class NDRIE(InfoExtractor): | ||||
|  | ||||
|         thumbnail = None | ||||
|  | ||||
|         video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page) | ||||
|         video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page) | ||||
|         if video_url: | ||||
|             thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",', | ||||
|                 page, 'thumbnail', fatal=False) | ||||
|             if thumbnail: | ||||
|                 thumbnail = 'http://www.ndr.de' + thumbnail | ||||
|             for format_id in ['lo', 'hi', 'hq']: | ||||
|             thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page) | ||||
|             if thumbnails: | ||||
|                 quality_key = qualities(['xs', 's', 'm', 'l', 'xl']) | ||||
|                 largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1])) | ||||
|                 thumbnail = 'http://www.ndr.de' + largest[0] | ||||
|  | ||||
|             for format_id in 'lo', 'hi', 'hq': | ||||
|                 formats.append({ | ||||
|                     'url': '%s.%s.mp4' % (video_url.group('video'), format_id), | ||||
|                     'format_id': format_id, | ||||
|   | ||||
| @@ -1,22 +1,28 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import month_by_name | ||||
| from ..utils import ( | ||||
|     month_by_name, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NDTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710", | ||||
|         u"file": u"300710.mp4", | ||||
|         u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88", | ||||
|         u"info_dict": { | ||||
|             u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal", | ||||
|             u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.", | ||||
|             u"upload_date": u"20131208", | ||||
|             u"duration": 1327, | ||||
|             u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg", | ||||
|         'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710', | ||||
|         'md5': '39f992dbe5fb531c395d8bbedb1e5e88', | ||||
|         'info_dict': { | ||||
|             'id': '300710', | ||||
|             'ext': 'mp4', | ||||
|             'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal", | ||||
|             'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', | ||||
|             'upload_date': '20131208', | ||||
|             'duration': 1327, | ||||
|             'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -27,13 +33,12 @@ class NDTVIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         filename = self._search_regex( | ||||
|             r"__filename='([^']+)'", webpage, u'video filename') | ||||
|         video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % | ||||
|             r"__filename='([^']+)'", webpage, 'video filename') | ||||
|         video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % | ||||
|                      filename) | ||||
|  | ||||
|         duration_str = filename = self._search_regex( | ||||
|             r"__duration='([^']+)'", webpage, u'duration', fatal=False) | ||||
|         duration = None if duration_str is None else int(duration_str) | ||||
|         duration = int_or_none(self._search_regex( | ||||
|             r"__duration='([^']+)'", webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         date_m = re.search(r'''(?x) | ||||
|             <p\s+class="vod_dateline">\s* | ||||
| @@ -41,7 +46,7 @@ class NDTVIE(InfoExtractor): | ||||
|                 (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+) | ||||
|             ''', webpage) | ||||
|         upload_date = None | ||||
|         assert date_m | ||||
|  | ||||
|         if date_m is not None: | ||||
|             month = month_by_name(date_m.group('monthname')) | ||||
|             if month is not None: | ||||
| @@ -49,14 +54,19 @@ class NDTVIE(InfoExtractor): | ||||
|                     date_m.group('year'), month, int(date_m.group('day'))) | ||||
|  | ||||
|         description = self._og_search_description(webpage) | ||||
|         READ_MORE = u' (Read more)' | ||||
|         READ_MORE = ' (Read more)' | ||||
|         if description.endswith(READ_MORE): | ||||
|             description = description[:-len(READ_MORE)] | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         TITLE_SUFFIX = ' - NDTV' | ||||
|         if title.endswith(TITLE_SUFFIX): | ||||
|             title = title[:-len(TITLE_SUFFIX)] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'duration': duration, | ||||
|   | ||||
							
								
								
									
										87
									
								
								youtube_dl/extractor/newstube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								youtube_dl/extractor/newstube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class NewstubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs', | ||||
|         'info_dict': { | ||||
|             'id': 'd156a237-a6e9-4111-a682-039995f721f1', | ||||
|             'ext': 'flv', | ||||
|             'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»', | ||||
|             'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77', | ||||
|             'duration': 20.04, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         video_guid = self._html_search_regex( | ||||
|             r'<meta property="og:video" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', | ||||
|             page, 'video GUID') | ||||
|  | ||||
|         player = self._download_xml( | ||||
|             'http://p.newstube.ru/v2/player.asmx/GetAutoPlayInfo6?state=&url=%s&sessionId=&id=%s&placement=profile&location=n2' % (url, video_guid), | ||||
|             video_guid, 'Downloading player XML') | ||||
|  | ||||
|         def ns(s): | ||||
|             return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'} | ||||
|  | ||||
|         session_id = player.find(ns('./SessionId')).text | ||||
|         media_info = player.find(ns('./Medias/MediaInfo')) | ||||
|         title = media_info.find(ns('./Name')).text | ||||
|         description = self._og_search_description(page) | ||||
|         thumbnail = media_info.find(ns('./KeyFrame')).text | ||||
|         duration = int(media_info.find(ns('./Duration')).text) / 1000.0 | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for stream_info in media_info.findall(ns('./Streams/StreamInfo')): | ||||
|             media_location = stream_info.find(ns('./MediaLocation')) | ||||
|             if media_location is None: | ||||
|                 continue | ||||
|  | ||||
|             server = media_location.find(ns('./Server')).text | ||||
|             app = media_location.find(ns('./App')).text | ||||
|             media_id = stream_info.find(ns('./Id')).text | ||||
|             quality_id = stream_info.find(ns('./QualityId')).text | ||||
|             name = stream_info.find(ns('./Name')).text | ||||
|             width = int(stream_info.find(ns('./Width')).text) | ||||
|             height = int(stream_info.find(ns('./Height')).text) | ||||
|  | ||||
|             formats.append({ | ||||
|                 'url': 'rtmp://%s/%s' % (server, app), | ||||
|                 'app': app, | ||||
|                 'play_path': '01/%s' % video_guid.upper(), | ||||
|                 'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'], | ||||
|                 'page_url': url, | ||||
|                 'ext': 'flv', | ||||
|                 'format_id': quality_id, | ||||
|                 'format_note': name, | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_guid, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -73,14 +73,16 @@ class NFBIE(InfoExtractor): | ||||
|                 title = media.find('title').text | ||||
|                 description = media.find('description').text | ||||
|                 # It seems assets always go from lower to better quality, so no need to sort | ||||
|                 formats = [{ | ||||
|                     'url': x.find('default/streamerURI').text, | ||||
|                     'app': x.find('default/streamerURI').text.split('/', 3)[3], | ||||
|                     'play_path': x.find('default/url').text, | ||||
|                     'rtmp_live': False, | ||||
|                     'ext': 'mp4', | ||||
|                     'format_id': x.get('quality'), | ||||
|                 } for x in media.findall('assets/asset')] | ||||
|                 for asset in media.findall('assets/asset'): | ||||
|                     for x in asset: | ||||
|                         formats.append({ | ||||
|                             'url': x.find('streamerURI').text, | ||||
|                             'app': x.find('streamerURI').text.split('/', 3)[3], | ||||
|                             'play_path': x.find('url').text, | ||||
|                             'rtmp_live': False, | ||||
|                             'ext': 'mp4', | ||||
|                             'format_id': '%s-%s' % (x.tag, asset.get('quality')), | ||||
|                         }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -1,15 +1,22 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import str_to_int | ||||
|  | ||||
|  | ||||
| class NineGagIE(InfoExtractor): | ||||
|     IE_NAME = '9gag' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/ | ||||
|         (?: | ||||
|             v/(?P<numid>[0-9]+)| | ||||
|             p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+) | ||||
|         ) | ||||
|     ''' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         "url": "http://9gag.tv/v/1912", | ||||
|         "info_dict": { | ||||
|             "id": "1912", | ||||
| @@ -20,34 +27,42 @@ class NineGagIE(InfoExtractor): | ||||
|             "thumbnail": "re:^https?://", | ||||
|         }, | ||||
|         'add_ie': ['Youtube'] | ||||
|     } | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar', | ||||
|         'info_dict': { | ||||
|             'id': 'KklwM', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': 'alternate-banned-opening-scene-of-gravity', | ||||
|             "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.", | ||||
|             'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie", | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = mobj.group('numid') or mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         youtube_id = self._html_search_regex( | ||||
|             r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"', | ||||
|             webpage, 'video ID') | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage, | ||||
|             'description', fatal=False) | ||||
|         view_count_str = self._html_search_regex( | ||||
|             r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count', | ||||
|             fatal=False) | ||||
|         view_count = ( | ||||
|             None if view_count_str is None | ||||
|             else int(view_count_str.replace(',', ''))) | ||||
|         post_view = json.loads(self._html_search_regex( | ||||
|             r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view')) | ||||
|  | ||||
|         youtube_id = post_view['videoExternalId'] | ||||
|         title = post_view['title'] | ||||
|         description = post_view['description'] | ||||
|         view_count = str_to_int(post_view['externalView']) | ||||
|         thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w') | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': youtube_id, | ||||
|             'ie_key': 'Youtube', | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'view_count': view_count, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										106
									
								
								youtube_dl/extractor/noco.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								youtube_dl/extractor/noco.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,106 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NocoIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', | ||||
|         'md5': '0a993f0058ddbcd902630b2047ef710e', | ||||
|         'info_dict': { | ||||
|             'id': '11538', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ami Ami Idol - Hello! France', | ||||
|             'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86', | ||||
|             'upload_date': '20140412', | ||||
|             'uploader': 'Nolife', | ||||
|             'uploader_id': 'NOL', | ||||
|             'duration': 2851.2, | ||||
|         }, | ||||
|         'skip': 'Requires noco account', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         medias = self._download_json( | ||||
|             'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for fmt in medias['fr']['video_list']['default']['quality_list']: | ||||
|             format_id = fmt['quality_key'] | ||||
|  | ||||
|             file = self._download_json( | ||||
|                 'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), | ||||
|                 video_id, 'Downloading %s video JSON' % format_id) | ||||
|  | ||||
|             file_url = file['file'] | ||||
|             if not file_url: | ||||
|                 continue | ||||
|  | ||||
|             if file_url == 'forbidden': | ||||
|                 raise ExtractorError( | ||||
|                     '%s returned error: %s - %s' % ( | ||||
|                         self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']), | ||||
|                     expected=True) | ||||
|  | ||||
|             formats.append({ | ||||
|                 'url': file_url, | ||||
|                 'format_id': format_id, | ||||
|                 'width': fmt['res_width'], | ||||
|                 'height': fmt['res_lines'], | ||||
|                 'abr': fmt['audiobitrate'], | ||||
|                 'vbr': fmt['videobitrate'], | ||||
|                 'filesize': fmt['filesize'], | ||||
|                 'format_note': fmt['quality_name'], | ||||
|                 'preference': fmt['priority'], | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         show = self._download_json( | ||||
|             'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] | ||||
|  | ||||
|         upload_date = unified_strdate(show['indexed']) | ||||
|         uploader = show['partner_name'] | ||||
|         uploader_id = show['partner_key'] | ||||
|         duration = show['duration_ms'] / 1000.0 | ||||
|         thumbnail = show['screenshot'] | ||||
|  | ||||
|         episode = show.get('show_TT') or show.get('show_OT') | ||||
|         family = show.get('family_TT') or show.get('family_OT') | ||||
|         episode_number = show.get('episode_number') | ||||
|  | ||||
|         title = '' | ||||
|         if family: | ||||
|             title += family | ||||
|         if episode_number: | ||||
|             title += ' #' + compat_str(episode_number) | ||||
|         if episode: | ||||
|             title += ' - ' + episode | ||||
|  | ||||
|         description = show.get('show_resume') or show.get('family_resume') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -4,9 +4,7 @@ import re | ||||
|  | ||||
| from .brightcove import BrightcoveIE | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class NownessIE(InfoExtractor): | ||||
| @@ -14,9 +12,10 @@ class NownessIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', | ||||
|         'file': '2520295746001.mp4', | ||||
|         'md5': '0ece2f70a7bd252c7b00f3070182d418', | ||||
|         'md5': '068bc0202558c2e391924cb8cc470676', | ||||
|         'info_dict': { | ||||
|             'id': '2520295746001', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'Candor: The Art of Gesticulation', | ||||
|             'uploader': 'Nowness', | ||||
|             'title': 'Candor: The Art of Gesticulation', | ||||
|   | ||||
							
								
								
									
										144
									
								
								youtube_dl/extractor/nrk.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								youtube_dl/extractor/nrk.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,144 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NRKIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/', | ||||
|             'md5': 'a6eac35052f3b242bb6bb7f43aed5886', | ||||
|             'info_dict': { | ||||
|                 'id': '150533', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Dompap og andre fugler i Piip-Show', | ||||
|                 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f' | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/', | ||||
|             'md5': '3471f2a51718195164e88f46bf427668', | ||||
|             'info_dict': { | ||||
|                 'id': '154915', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Slik høres internett ut når du er blind', | ||||
|                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id') | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON') | ||||
|  | ||||
|         if data['usageRights']['isGeoBlocked']: | ||||
|             raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True) | ||||
|  | ||||
|         video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124' | ||||
|  | ||||
|         images = data.get('images') | ||||
|         if images: | ||||
|             thumbnails = images['webImages'] | ||||
|             thumbnails.sort(key=lambda image: image['pixelWidth']) | ||||
|             thumbnail = thumbnails[-1]['imageUrl'] | ||||
|         else: | ||||
|             thumbnail = None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'title': data['title'], | ||||
|             'description': data['description'], | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NRKTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', | ||||
|             'md5': '7b96112fbae1faf09a6f9ae1aff6cb84', | ||||
|             'info_dict': { | ||||
|                 'id': 'MUHH48000314', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '20 spørsmål', | ||||
|                 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', | ||||
|                 'upload_date': '20140523', | ||||
|                 'duration': 1741.52, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://tv.nrk.no/program/mdfp15000514', | ||||
|             'md5': 'af01795a31f1cf7265c8657534d8077b', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdfp15000514', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting', | ||||
|                 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', | ||||
|                 'upload_date': '20140524', | ||||
|                 'duration': 4605.0, | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_meta('title', page, 'title') | ||||
|         description = self._html_search_meta('description', page, 'description') | ||||
|         thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False) | ||||
|         upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False)) | ||||
|         duration = float_or_none( | ||||
|             self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         f4m_url = re.search(r'data-media="([^"]+)"', page) | ||||
|         if f4m_url: | ||||
|             formats.append({ | ||||
|                 'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', | ||||
|                 'format_id': 'f4m', | ||||
|                 'ext': 'flv', | ||||
|             }) | ||||
|  | ||||
|         m3u8_url = re.search(r'data-hls-media="([^"]+)"', page) | ||||
|         if m3u8_url: | ||||
|             formats.append({ | ||||
|                 'url': m3u8_url.group(1), | ||||
|                 'format_id': 'm3u8', | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -5,7 +5,6 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML | ||||
| ) | ||||
|  | ||||
| @@ -24,9 +23,9 @@ class NTVIE(InfoExtractor): | ||||
|                 'duration': 136, | ||||
|             }, | ||||
|             'params': { | ||||
|                     # rtmp download | ||||
|                     'skip_download': True, | ||||
|                 }, | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/video/novosti/750370/', | ||||
| @@ -38,9 +37,9 @@ class NTVIE(InfoExtractor): | ||||
|                 'duration': 172, | ||||
|             }, | ||||
|             'params': { | ||||
|                     # rtmp download | ||||
|                     'skip_download': True, | ||||
|                 }, | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', | ||||
| @@ -52,23 +51,23 @@ class NTVIE(InfoExtractor): | ||||
|                 'duration': 1496, | ||||
|             }, | ||||
|             'params': { | ||||
|                     # rtmp download | ||||
|                     'skip_download': True, | ||||
|                 }, | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/kino/Koma_film', | ||||
|             'info_dict': { | ||||
|                 'id': '750783', | ||||
|                 'id': '758100', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ', | ||||
|                 'description': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ', | ||||
|                 'duration': 28, | ||||
|                 'title': 'Остросюжетный фильм «Кома»', | ||||
|                 'description': 'Остросюжетный фильм «Кома»', | ||||
|                 'duration': 5592, | ||||
|             }, | ||||
|             'params': { | ||||
|                     # rtmp download | ||||
|                     'skip_download': True, | ||||
|                 }, | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', | ||||
| @@ -80,33 +79,25 @@ class NTVIE(InfoExtractor): | ||||
|                 'duration': 2590, | ||||
|             }, | ||||
|             'params': { | ||||
|                     # rtmp download | ||||
|                     'skip_download': True, | ||||
|                 }, | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     _VIDEO_ID_REGEXES = [ | ||||
|         r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)', | ||||
|         r'<video embed=[^>]+><id>(\d+)</id>', | ||||
|         r'<video restriction[^>]+><key>(\d+)</key>' | ||||
|         r'<video restriction[^>]+><key>(\d+)</key>', | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         for pattern in self._VIDEO_ID_REGEXES: | ||||
|             mobj = re.search(pattern, page) | ||||
|             if mobj: | ||||
|                 break | ||||
|  | ||||
|         if not mobj: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         video_id = mobj.group(1) | ||||
|         video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id') | ||||
|  | ||||
|         player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML') | ||||
|         title = unescapeHTML(player.find('./data/title').text) | ||||
| @@ -124,7 +115,7 @@ class NTVIE(InfoExtractor): | ||||
|             '7': 'video2', | ||||
|         } | ||||
|  | ||||
|         app = apps[puid22] if puid22 in apps else apps['4'] | ||||
|         app = apps.get(puid22, apps['4']) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ['', 'hi', 'webm']: | ||||
|   | ||||
							
								
								
									
										69
									
								
								youtube_dl/extractor/nuvid.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								youtube_dl/extractor/nuvid.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NuvidIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://m.nuvid.com/video/1310741/', | ||||
|         'md5': 'eab207b7ac4fccfb4e23c86201f11277', | ||||
|         'info_dict': { | ||||
|             'id': '1310741', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Horny babes show their awesome bodeis and', | ||||
|             'duration': 129, | ||||
|             'upload_date': '20140508', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]: | ||||
|             request = compat_urllib_request.Request( | ||||
|                 'http://m.nuvid.com/play/%s' % video_id) | ||||
|             request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed) | ||||
|             webpage = self._download_webpage( | ||||
|                 request, video_id, 'Downloading %s page' % format_id) | ||||
|             video_url = self._html_search_regex( | ||||
|                 r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False) | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': format_id, | ||||
|             }) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page') | ||||
|         title = self._html_search_regex( | ||||
|             r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip() | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"', | ||||
|             webpage, 'thumbnail URL', fatal=False) | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False)) | ||||
|         upload_date = unified_strdate(self._html_search_regex( | ||||
|             r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': 'http://m.nuvid.com%s' % thumbnail, | ||||
|             'duration': duration, | ||||
|             'upload_date': upload_date, | ||||
|             'age_limit': 18, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										77
									
								
								youtube_dl/extractor/nytimes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								youtube_dl/extractor/nytimes.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_iso8601 | ||||
|  | ||||
|  | ||||
| class NYTimesIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', | ||||
|         'md5': '18a525a510f942ada2720db5f31644c0', | ||||
|         'info_dict': { | ||||
|             'id': '100000002847155', | ||||
|             'ext': 'mov', | ||||
|             'title': 'Verbatim: What Is a Photocopier?', | ||||
|             'description': 'md5:93603dada88ddbda9395632fdc5da260', | ||||
|             'timestamp': 1398631707, | ||||
|             'upload_date': '20140427', | ||||
|             'uploader': 'Brett Weiner', | ||||
|             'duration': 419, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         title = video_data['headline'] | ||||
|         description = video_data['summary'] | ||||
|         duration = video_data['duration'] / 1000.0 | ||||
|  | ||||
|         uploader = video_data['byline'] | ||||
|         timestamp = parse_iso8601(video_data['publication_date'][:-8]) | ||||
|  | ||||
|         def get_file_size(file_size): | ||||
|             if isinstance(file_size, int): | ||||
|                 return file_size | ||||
|             elif isinstance(file_size, dict): | ||||
|                 return int(file_size.get('value', 0)) | ||||
|             else: | ||||
|                 return 0 | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': video['url'], | ||||
|                 'format_id': video['type'], | ||||
|                 'vcodec': video['video_codec'], | ||||
|                 'width': video['width'], | ||||
|                 'height': video['height'], | ||||
|                 'filesize': get_file_size(video['fileSize']), | ||||
|             } for video in video_data['renditions'] | ||||
|         ] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [ | ||||
|             { | ||||
|                 'url': 'http://www.nytimes.com/%s' % image['url'], | ||||
|                 'resolution': '%dx%d' % (image['width'], image['height']), | ||||
|             } for image in video_data['images'] | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
| @@ -1,10 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_parse | ||||
|  | ||||
|  | ||||
| class PhotobucketIE(InfoExtractor): | ||||
| @@ -14,6 +14,7 @@ class PhotobucketIE(InfoExtractor): | ||||
|         'file': 'zpsc0c3b9fa.mp4', | ||||
|         'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', | ||||
|         'info_dict': { | ||||
|             'timestamp': 1367669341, | ||||
|             'upload_date': '20130504', | ||||
|             'uploader': 'rachaneronas', | ||||
|             'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', | ||||
| @@ -32,11 +33,12 @@ class PhotobucketIE(InfoExtractor): | ||||
|         info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', | ||||
|             webpage, 'info json') | ||||
|         info = json.loads(info_json) | ||||
|         url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': info['downloadUrl'], | ||||
|             'url': url, | ||||
|             'uploader': info['username'], | ||||
|             'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'), | ||||
|             'timestamp': info['creationDate'], | ||||
|             'title': info['title'], | ||||
|             'ext': video_extension, | ||||
|             'thumbnail': info['thumbUrl'], | ||||
|   | ||||
| @@ -6,22 +6,36 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class PodomaticIE(InfoExtractor): | ||||
|     IE_NAME = 'podomatic' | ||||
|     _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", | ||||
|         "file": "2009-01-02T16_03_35-08_00.mp3", | ||||
|         "md5": "84bb855fcf3429e6bf72460e1eed782d", | ||||
|         "info_dict": { | ||||
|             "uploader": "Science Teaching Tips", | ||||
|             "uploader_id": "scienceteachingtips", | ||||
|             "title": "64.  When the Moon Hits Your Eye", | ||||
|             "duration": 446, | ||||
|         } | ||||
|     } | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', | ||||
|             'md5': '84bb855fcf3429e6bf72460e1eed782d', | ||||
|             'info_dict': { | ||||
|                 'id': '2009-01-02T16_03_35-08_00', | ||||
|                 'ext': 'mp3', | ||||
|                 'uploader': 'Science Teaching Tips', | ||||
|                 'uploader_id': 'scienceteachingtips', | ||||
|                 'title': '64.  When the Moon Hits Your Eye', | ||||
|                 'duration': 446, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', | ||||
|             'md5': 'd2cf443931b6148e27638650e2638297', | ||||
|             'info_dict': { | ||||
|                 'id': '2013-11-15T16_31_21-08_00', | ||||
|                 'ext': 'mp3', | ||||
|                 'uploader': 'Ostbahnhof / Techno Mix', | ||||
|                 'uploader_id': 'ostbahnhof', | ||||
|                 'title': 'Einunddreizig', | ||||
|                 'duration': 3799, | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -32,10 +46,12 @@ class PodomaticIE(InfoExtractor): | ||||
|                      '?permalink=true&rtmp=0') % | ||||
|                     (mobj.group('proto'), channel, video_id)) | ||||
|         data_json = self._download_webpage( | ||||
|             json_url, video_id, note=u'Downloading video info') | ||||
|             json_url, video_id, 'Downloading video info') | ||||
|         data = json.loads(data_json) | ||||
|  | ||||
|         video_url = data['downloadLink'] | ||||
|         if not video_url: | ||||
|             video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation']) | ||||
|         uploader = data['podcast'] | ||||
|         title = data['title'] | ||||
|         thumbnail = data['imageLocation'] | ||||
|   | ||||
| @@ -45,7 +45,7 @@ class PornHubIE(InfoExtractor): | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', | ||||
|             r'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) | ||||
|         if thumbnail: | ||||
|   | ||||
| @@ -8,8 +8,6 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
|     RegexNotFoundError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -160,18 +158,19 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|     _CLIPID_REGEXES = [ | ||||
|         r'"clip_id"\s*:\s+"(\d+)"', | ||||
|         r'clipid: "(\d+)"', | ||||
|         r'clip[iI]d=(\d+)', | ||||
|     ] | ||||
|     _TITLE_REGEXES = [ | ||||
|         r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', | ||||
|         r'<header class="clearfix">\s*<h3>(.+?)</h3>', | ||||
|         r'<!-- start video -->\s*<h1>(.+?)</h1>', | ||||
|         r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>', | ||||
|         r'<h1 class="att-name">\s*(.+?)</h1>', | ||||
|     ] | ||||
|     _DESCRIPTION_REGEXES = [ | ||||
|         r'<p itemprop="description">\s*(.+?)</p>', | ||||
|         r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', | ||||
|         r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', | ||||
|         r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">', | ||||
|         r'<p class="att-description">\s*(.+?)\s*</p>', | ||||
|     ] | ||||
|     _UPLOAD_DATE_REGEXES = [ | ||||
|         r'<meta property="og:published_time" content="(.+?)">', | ||||
| @@ -187,16 +186,7 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         def extract(patterns, name, page, fatal=False): | ||||
|             for pattern in patterns: | ||||
|                 mobj = re.search(pattern, page) | ||||
|                 if mobj: | ||||
|                     return clean_html(mobj.group(1)) | ||||
|             if fatal: | ||||
|                 raise RegexNotFoundError(u'Unable to extract %s' % name) | ||||
|             return None | ||||
|  | ||||
|         clip_id = extract(self._CLIPID_REGEXES, 'clip id', page, fatal=True) | ||||
|         clip_id = self._html_search_regex(self._CLIPID_REGEXES, page, 'clip id') | ||||
|  | ||||
|         access_token = 'testclient' | ||||
|         client_name = 'kolibri-1.2.5' | ||||
| @@ -245,13 +235,12 @@ class ProSiebenSat1IE(InfoExtractor): | ||||
|  | ||||
|         urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON') | ||||
|  | ||||
|         title = extract(self._TITLE_REGEXES, 'title', page, fatal=True) | ||||
|         description = extract(self._DESCRIPTION_REGEXES, 'description', page) | ||||
|         title = self._html_search_regex(self._TITLE_REGEXES, page, 'title') | ||||
|         description = self._html_search_regex(self._DESCRIPTION_REGEXES, page, 'description', fatal=False) | ||||
|         thumbnail = self._og_search_thumbnail(page) | ||||
|  | ||||
|         upload_date = extract(self._UPLOAD_DATE_REGEXES, 'upload date', page) | ||||
|         if upload_date: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|         upload_date = unified_strdate(self._html_search_regex( | ||||
|             self._UPLOAD_DATE_REGEXES, page, 'upload date', fatal=False)) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|   | ||||
| @@ -46,7 +46,8 @@ class PyvideoIE(InfoExtractor): | ||||
|             return self.url_result(m_youtube.group(1), 'Youtube') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL) | ||||
|             r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>', | ||||
|             webpage, 'title', flags=re.DOTALL) | ||||
|         video_url = self._search_regex( | ||||
|             [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'], | ||||
|             webpage, 'video url', flags=re.DOTALL) | ||||
|   | ||||
							
								
								
									
										121
									
								
								youtube_dl/extractor/rai.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								youtube_dl/extractor/rai.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,121 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RaiIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', | ||||
|             'md5': 'c064c0b2d09c278fb293116ef5d0a32d', | ||||
|             'info_dict': { | ||||
|                 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Report del 07/04/2014', | ||||
|                 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', | ||||
|                 'upload_date': '20140407', | ||||
|                 'duration': 6160, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', | ||||
|             'md5': '8bb9c151924ce241b74dd52ef29ceafa', | ||||
|             'info_dict': { | ||||
|                 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'TG PRIMO TEMPO', | ||||
|                 'description': '', | ||||
|                 'upload_date': '20140612', | ||||
|                 'duration': 1758, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', | ||||
|             'md5': '35cf7c229f22eeef43e48b5cf923bef0', | ||||
|             'info_dict': { | ||||
|                 'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'State of the Net, Antonella La Carpia: regole virali', | ||||
|                 'description': 'md5:b0ba04a324126903e3da7763272ae63c', | ||||
|                 'upload_date': '20140613', | ||||
|             }, | ||||
|             'skip': 'Error 404', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', | ||||
|             'md5': '35694f062977fe6619943f08ed935730', | ||||
|             'info_dict': { | ||||
|                 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Alluvione in Sardegna e dissesto idrogeologico', | ||||
|                 'description': 'Edizione delle ore 20:30 ', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON') | ||||
|  | ||||
|         title = media.get('name') | ||||
|         description = media.get('desc') | ||||
|         thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') | ||||
|         duration = parse_duration(media.get('length')) | ||||
|         uploader = media.get('author') | ||||
|         upload_date = unified_strdate(media.get('date')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']: | ||||
|             media_url = media.get(format_id) | ||||
|             if not media_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': media_url, | ||||
|                 'format_id': format_id, | ||||
|                 'ext': 'mp4', | ||||
|             }) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             page = self._download_webpage(url, video_id) | ||||
|             self._list_available_subtitles(video_id, page) | ||||
|             return | ||||
|  | ||||
|         subtitles = {} | ||||
|         if self._have_to_download_any_subtitles: | ||||
|             page = self._download_webpage(url, video_id) | ||||
|             subtitles = self.extract_subtitles(video_id, page) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         subtitles = {} | ||||
|         m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) | ||||
|         if m: | ||||
|             captions = m.group('captions') | ||||
|             STL_EXT = '.stl' | ||||
|             SRT_EXT = '.srt' | ||||
|             if captions.endswith(STL_EXT): | ||||
|                 captions = captions[:-len(STL_EXT)] + SRT_EXT | ||||
|             subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions) | ||||
|         return subtitles | ||||
							
								
								
									
										49
									
								
								youtube_dl/extractor/rtbf.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								youtube_dl/extractor/rtbf.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class RTBFIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www.rtbf.be/video/[^\?]+\?id=(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', | ||||
|         'md5': '799f334ddf2c0a582ba80c44655be570', | ||||
|         'info_dict': { | ||||
|             'id': '1921274', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Les Diables au coeur (épisode 2)', | ||||
|             'description': 'Football - Diables Rouges', | ||||
|             'duration': 3099, | ||||
|             'timestamp': 1398456336, | ||||
|             'upload_date': '20140425', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id) | ||||
|  | ||||
|         data = json.loads(self._html_search_regex( | ||||
|             r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data'] | ||||
|  | ||||
|         video_url = data.get('downloadUrl') or data.get('url') | ||||
|  | ||||
|         if data['provider'].lower() == 'youtube': | ||||
|             return self.url_result(video_url, 'Youtube') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': data['title'], | ||||
|             'description': data.get('description') or data.get('subtitle'), | ||||
|             'thumbnail': data['thumbnail']['large'], | ||||
|             'duration': data.get('duration') or data.get('realDuration'), | ||||
|             'timestamp': data['created'], | ||||
|             'view_count': data['viewCount'], | ||||
|         } | ||||
| @@ -43,13 +43,14 @@ class RutubeIE(InfoExtractor): | ||||
|             'http://rutube.ru/api/video/%s/?format=json' % video_id, | ||||
|             video_id, 'Downloading video JSON') | ||||
|  | ||||
|         trackinfo = self._download_json( | ||||
|             'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, | ||||
|             video_id, 'Downloading trackinfo JSON') | ||||
|  | ||||
|         # Some videos don't have the author field | ||||
|         author = trackinfo.get('author') or {} | ||||
|         m3u8_url = trackinfo['video_balancer'].get('m3u8') | ||||
|         author = video.get('author') or {} | ||||
|  | ||||
|         options = self._download_json( | ||||
|             'http://rutube.ru/api/play/options/%s/?format=json' % video_id, | ||||
|             video_id, 'Downloading options JSON') | ||||
|  | ||||
|         m3u8_url = options['video_balancer'].get('m3u8') | ||||
|         if m3u8_url is None: | ||||
|             raise ExtractorError('Couldn\'t find m3u8 manifest url') | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,12 @@ from ..utils import ( | ||||
|  | ||||
| class RUTVIE(InfoExtractor): | ||||
|     IE_DESC = 'RUTV.RU' | ||||
|     _VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://player\.(?:rutv\.ru|vgtrk\.com)/ | ||||
|             (?P<path>flash2v/container\.swf\?id= | ||||
|             |iframe/(?P<type>swf|video|live)/id/ | ||||
|             |index/iframe/cast_id/) | ||||
|             (?P<id>\d+)''' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -90,7 +95,7 @@ class RUTVIE(InfoExtractor): | ||||
|     @classmethod | ||||
|     def _extract_url(cls, webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage) | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
| @@ -103,10 +108,16 @@ class RUTVIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_type = mobj.group('type') | ||||
|         video_path = mobj.group('path') | ||||
|  | ||||
|         if not video_type or video_type == 'swf': | ||||
|         if video_path.startswith('flash2v'): | ||||
|             video_type = 'video' | ||||
|         elif video_path.startswith('iframe'): | ||||
|             video_type = mobj.group('type') | ||||
|             if video_type == 'swf': | ||||
|                 video_type = 'video' | ||||
|         elif video_path.startswith('index/iframe/cast_id'): | ||||
|             video_type = 'live' | ||||
|  | ||||
|         json_data = self._download_json( | ||||
|             'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), | ||||
|   | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/scivee.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/scivee.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class SciVeeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.scivee.tv/node/62352', | ||||
|         'md5': 'b16699b74c9e6a120f6772a44960304f', | ||||
|         'info_dict': { | ||||
|             'id': '62352', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting', | ||||
|             'description': 'md5:81f1710638e11a481358fab1b11059d7', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         # annotations XML is malformed | ||||
|         annotations = self._download_webpage( | ||||
|             'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations') | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>([^<]+)</title>', annotations, 'title') | ||||
|         description = self._html_search_regex(r'<abstract>([^<]+)</abstract>', annotations, 'abstract', fatal=False) | ||||
|         filesize = int_or_none(self._html_search_regex( | ||||
|             r'<filesize>([^<]+)</filesize>', annotations, 'filesize', fatal=False)) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': 'http://www.scivee.tv/assets/audio/%s' % video_id, | ||||
|                 'ext': 'mp3', | ||||
|                 'format_id': 'audio', | ||||
|             }, | ||||
|             { | ||||
|                 'url': 'http://www.scivee.tv/assets/video/%s' % video_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': 'video', | ||||
|                 'filesize': filesize, | ||||
|             }, | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -39,7 +39,8 @@ class SlideshareIE(InfoExtractor): | ||||
|         ext = info['jsplayer']['video_extension'] | ||||
|         video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) | ||||
|         description = self._html_search_regex( | ||||
|             r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description') | ||||
|             r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage, | ||||
|             'description', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|   | ||||
							
								
								
									
										44
									
								
								youtube_dl/extractor/slutload.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								youtube_dl/extractor/slutload.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SlutloadIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', | ||||
|         'md5': '0cf531ae8006b530bd9df947a6a0df77', | ||||
|         'info_dict': { | ||||
|             'id': 'TD73btpBqSxc', | ||||
|             'ext': 'mp4', | ||||
|             "title": "virginie baisee en cam", | ||||
|             "age_limit": 18, | ||||
|             'thumbnail': 're:https?://.*?\.jpg' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>', | ||||
|             webpage, 'title').strip() | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"', | ||||
|             webpage, 'video URL') | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': video_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'age_limit': 18 | ||||
|         } | ||||
| @@ -1,7 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import itertools | ||||
|  | ||||
| @@ -12,6 +11,7 @@ from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|  | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
| @@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|        of the stream token and uid | ||||
|      """ | ||||
|  | ||||
|     _VALID_URL = r'''^(?:https?://)? | ||||
|     _VALID_URL = r'''(?x)^(?:https?://)? | ||||
|                     (?:(?:(?:www\.|m\.)?soundcloud\.com/ | ||||
|                             (?P<uploader>[\w\d-]+)/ | ||||
|                             (?!sets/)(?P<title>[\w\d-]+)/? | ||||
| @@ -44,7 +44,8 @@ class SoundcloudIE(InfoExtractor): | ||||
|                 "upload_date": "20121011", | ||||
|                 "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", | ||||
|                 "uploader": "E.T. ExTerrestrial Music", | ||||
|                 "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" | ||||
|                 "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1", | ||||
|                 "duration": 143, | ||||
|             } | ||||
|         }, | ||||
|         # not streamable song | ||||
| @@ -57,6 +58,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', | ||||
|                 'uploader': 'The Royal Concept', | ||||
|                 'upload_date': '20120521', | ||||
|                 'duration': 227, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp | ||||
| @@ -74,6 +76,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|                 'uploader': 'jaimeMF', | ||||
|                 'description': 'test chars:  \"\'/\\ä↭', | ||||
|                 'upload_date': '20131209', | ||||
|                 'duration': 9, | ||||
|             }, | ||||
|         }, | ||||
|         # downloadable song | ||||
| @@ -87,6 +90,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|                 'description': 'Vocals', | ||||
|                 'uploader': 'Sim Gretina', | ||||
|                 'upload_date': '20130815', | ||||
|                 #'duration': 42, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
| @@ -94,10 +98,6 @@ class SoundcloudIE(InfoExtractor): | ||||
|     _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None | ||||
|  | ||||
|     def report_resolve(self, video_id): | ||||
|         """Report information extraction.""" | ||||
|         self.to_screen('%s: Resolving id' % video_id) | ||||
| @@ -123,6 +123,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|             'title': info['title'], | ||||
|             'description': info['description'], | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': int_or_none(info.get('duration'), 1000), | ||||
|         } | ||||
|         formats = [] | ||||
|         if info.get('downloadable', False): | ||||
| @@ -141,11 +142,10 @@ class SoundcloudIE(InfoExtractor): | ||||
|         # We have to retrieve the url | ||||
|         streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?' | ||||
|             'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token)) | ||||
|         stream_json = self._download_webpage( | ||||
|         format_dict = self._download_json( | ||||
|             streams_url, | ||||
|             track_id, 'Downloading track url') | ||||
|  | ||||
|         format_dict = json.loads(stream_json) | ||||
|         for key, stream_url in format_dict.items(): | ||||
|             if key.startswith('http'): | ||||
|                 formats.append({ | ||||
| @@ -198,7 +198,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|             full_title = track_id | ||||
|         elif mobj.group('player'): | ||||
|             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|             return self.url_result(query['url'][0], ie='Soundcloud') | ||||
|             return self.url_result(query['url'][0]) | ||||
|         else: | ||||
|             # extract uploader (which is in the url) | ||||
|             uploader = mobj.group('uploader') | ||||
| @@ -213,11 +213,11 @@ class SoundcloudIE(InfoExtractor): | ||||
|      | ||||
|             url = 'http://soundcloud.com/%s' % resolve_title | ||||
|             info_json_url = self._resolv_url(url) | ||||
|         info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON') | ||||
|         info = self._download_json(info_json_url, full_title, 'Downloading info JSON') | ||||
|  | ||||
|         info = json.loads(info_json) | ||||
|         return self._extract_info_dict(info, full_title, secret_token=token) | ||||
|  | ||||
|  | ||||
| class SoundcloudSetIE(SoundcloudIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' | ||||
|     IE_NAME = 'soundcloud:set' | ||||
| @@ -232,16 +232,15 @@ class SoundcloudSetIE(SoundcloudIE): | ||||
|         # extract uploader (which is in the url) | ||||
|         uploader = mobj.group(1) | ||||
|         # extract simple title (uploader + slug of song title) | ||||
|         slug_title =  mobj.group(2) | ||||
|         slug_title = mobj.group(2) | ||||
|         full_title = '%s/sets/%s' % (uploader, slug_title) | ||||
|  | ||||
|         self.report_resolve(full_title) | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) | ||||
|         resolv_url = self._resolv_url(url) | ||||
|         info_json = self._download_webpage(resolv_url, full_title) | ||||
|         info = self._download_json(resolv_url, full_title) | ||||
|  | ||||
|         info = json.loads(info_json) | ||||
|         if 'errors' in info: | ||||
|             for err in info['errors']: | ||||
|                 self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message'])) | ||||
| @@ -268,26 +267,55 @@ class SoundcloudUserIE(SoundcloudIE): | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/' % uploader | ||||
|         resolv_url = self._resolv_url(url) | ||||
|         user_json = self._download_webpage(resolv_url, uploader, | ||||
|             'Downloading user info') | ||||
|         user = json.loads(user_json) | ||||
|         user = self._download_json( | ||||
|             resolv_url, uploader, 'Downloading user info') | ||||
|         base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader | ||||
|  | ||||
|         tracks = [] | ||||
|         entries = [] | ||||
|         for i in itertools.count(): | ||||
|             data = compat_urllib_parse.urlencode({'offset': i*50, | ||||
|                                                   'client_id': self._CLIENT_ID, | ||||
|                                                   }) | ||||
|             tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data | ||||
|             response = self._download_webpage(tracks_url, uploader,  | ||||
|                 'Downloading tracks page %s' % (i+1)) | ||||
|             new_tracks = json.loads(response) | ||||
|             tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks) | ||||
|             if len(new_tracks) < 50: | ||||
|             data = compat_urllib_parse.urlencode({ | ||||
|                 'offset': i * 50, | ||||
|                 'client_id': self._CLIENT_ID, | ||||
|             }) | ||||
|             new_entries = self._download_json( | ||||
|                 base_url + data, uploader, 'Downloading track page %s' % (i + 1)) | ||||
|             entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) | ||||
|             if len(new_entries) < 50: | ||||
|                 break | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': compat_str(user['id']), | ||||
|             'title': user['username'], | ||||
|             'entries': tracks, | ||||
|             'entries': entries, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SoundcloudPlaylistIE(SoundcloudIE): | ||||
|     _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)' | ||||
|     IE_NAME = 'soundcloud:playlist' | ||||
|  | ||||
|      # it's in tests/test_playlists.py | ||||
|     _TESTS = [] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|         base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id) | ||||
|  | ||||
|         data = compat_urllib_parse.urlencode({ | ||||
|             'client_id': self._CLIENT_ID, | ||||
|         }) | ||||
|         data = self._download_json( | ||||
|             base_url + data, playlist_id, 'Downloading playlist') | ||||
|  | ||||
|         entries = [ | ||||
|             self._extract_info_dict(t, quiet=True) for t in data['tracks']] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': data.get('title'), | ||||
|             'description': data.get('description'), | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| @@ -9,18 +10,33 @@ class SpiegelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', | ||||
|         'file': '1259285.mp4', | ||||
|         'md5': '2c2754212136f35fb4b19767d242f66e', | ||||
|         'info_dict': { | ||||
|             'id': '1259285', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', | ||||
|             'description': 'md5:8029d8310232196eb235d27575a8b9f4', | ||||
|             'duration': 49, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|     }, { | ||||
|         'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', | ||||
|         'file': '1309159.mp4', | ||||
|         'md5': 'f2cdf638d7aa47654e251e1aee360af1', | ||||
|         'info_dict': { | ||||
|             'id': '1309159', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', | ||||
|             'description': 'md5:c2322b65e58f385a820c10fa03b2d088', | ||||
|             'duration': 983, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html', | ||||
|         'md5': '54f58ba0e752e3c07bc2a26222dd0acf', | ||||
|         'info_dict': { | ||||
|             'id': '1502367', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern', | ||||
|             'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91', | ||||
|             'duration': 42, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
| @@ -30,18 +46,20 @@ class SpiegelIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|         title = self._html_search_regex( | ||||
|             r'<div class="module-title">(.*?)</div>', webpage, 'title') | ||||
|         description = self._html_search_meta('description', webpage, 'description') | ||||
|  | ||||
|         xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml' | ||||
|         idoc = self._download_xml( | ||||
|             xml_url, video_id, | ||||
|             note='Downloading XML', errnote='Failed to download XML') | ||||
|         base_url = self._search_regex( | ||||
|             r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL') | ||||
|  | ||||
|         xml_url = base_url + video_id + '.xml' | ||||
|         idoc = self._download_xml(xml_url, video_id) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'format_id': n.tag.rpartition('type')[2], | ||||
|                 'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text, | ||||
|                 'url': base_url + n.find('./filename').text, | ||||
|                 'width': int(n.find('./width').text), | ||||
|                 'height': int(n.find('./height').text), | ||||
|                 'abr': int(n.find('./audiobitrate').text), | ||||
| @@ -59,7 +77,8 @@ class SpiegelIE(InfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										81
									
								
								youtube_dl/extractor/spiegeltv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								youtube_dl/extractor/spiegeltv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SpiegeltvIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.spiegel.tv/filme/flug-mh370/', | ||||
|         'info_dict': { | ||||
|             'id': 'flug-mh370', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Flug MH370', | ||||
|             'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines', | ||||
|             'thumbnail': 're:http://.*\.jpg$', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title') | ||||
|  | ||||
|         apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com' | ||||
|         version_json = self._download_json( | ||||
|             '%s/version.json' % apihost, video_id, | ||||
|             note='Downloading version information') | ||||
|         version_name = version_json['version_name'] | ||||
|  | ||||
|         slug_json = self._download_json( | ||||
|             '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id), | ||||
|             video_id, | ||||
|             note='Downloading object information') | ||||
|         oid = slug_json['object_id'] | ||||
|  | ||||
|         media_json = self._download_json( | ||||
|             '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid), | ||||
|             video_id, note='Downloading media information') | ||||
|         uuid = media_json['uuid'] | ||||
|         is_wide = media_json['is_wide'] | ||||
|  | ||||
|         server_json = self._download_json( | ||||
|             'http://www.spiegel.tv/streaming_servers/', video_id, | ||||
|             note='Downloading server information') | ||||
|         server = server_json[0]['endpoint'] | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for image in media_json['images']: | ||||
|             thumbnails.append({ | ||||
|                 'url': image['url'], | ||||
|                 'width': image['width'], | ||||
|                 'height': image['height'], | ||||
|             }) | ||||
|  | ||||
|         description = media_json['subtitle'] | ||||
|         duration = media_json['duration_in_ms'] / 1000. | ||||
|  | ||||
|         if is_wide: | ||||
|             format = '16x9' | ||||
|         else: | ||||
|             format = '4x3' | ||||
|  | ||||
|         url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v' | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': url, | ||||
|             'ext': 'm4v', | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'thumbnails': thumbnails | ||||
|         } | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -8,78 +10,114 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class SteamIE(InfoExtractor): | ||||
|     _VALID_URL = r"""http://store\.steampowered\.com/ | ||||
|                 (agecheck/)? | ||||
|                 (?P<urltype>video|app)/ #If the page is only for videos or for a game | ||||
|                 (?P<gameID>\d+)/? | ||||
|                 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID | ||||
|                 """ | ||||
|     _VALID_URL = r"""(?x) | ||||
|         https?://store\.steampowered\.com/ | ||||
|             (agecheck/)? | ||||
|             (?P<urltype>video|app)/ #If the page is only for videos or for a game | ||||
|             (?P<gameID>\d+)/? | ||||
|             (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID | ||||
|         | | ||||
|         https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+) | ||||
|     """ | ||||
|     _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' | ||||
|     _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' | ||||
|     _TEST = { | ||||
|         u"url": u"http://store.steampowered.com/video/105600/", | ||||
|         u"playlist": [ | ||||
|     _TESTS = [{ | ||||
|         "url": "http://store.steampowered.com/video/105600/", | ||||
|         "playlist": [ | ||||
|             { | ||||
|                 u"file": u"81300.flv", | ||||
|                 u"md5": u"f870007cee7065d7c76b88f0a45ecc07", | ||||
|                 u"info_dict": { | ||||
|                         u"title": u"Terraria 1.1 Trailer", | ||||
|                         u'playlist_index': 1, | ||||
|                 "md5": "f870007cee7065d7c76b88f0a45ecc07", | ||||
|                 "info_dict": { | ||||
|                     'id': '81300', | ||||
|                     'ext': 'flv', | ||||
|                     "title": "Terraria 1.1 Trailer", | ||||
|                     'playlist_index': 1, | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 u"file": u"80859.flv", | ||||
|                 u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751", | ||||
|                 u"info_dict": { | ||||
|                     u"title": u"Terraria Trailer", | ||||
|                     u'playlist_index': 2, | ||||
|                 "md5": "61aaf31a5c5c3041afb58fb83cbb5751", | ||||
|                 "info_dict": { | ||||
|                     'id': '80859', | ||||
|                     'ext': 'flv', | ||||
|                     "title": "Terraria Trailer", | ||||
|                     'playlist_index': 2, | ||||
|                 } | ||||
|             } | ||||
|         ] | ||||
|     } | ||||
|  | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         """Receives a URL and returns True if suitable for this IE.""" | ||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||
|         ], | ||||
|         'params': { | ||||
|             'playlistend': 2, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205', | ||||
|         'info_dict': { | ||||
|             'id': 'WB5DvDOOvAY', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20140329', | ||||
|             'title': 'FRONTIERS - Final Greenlight Trailer', | ||||
|             'description': 'md5:6df4fe8dd494ae811869672b0767e025', | ||||
|             'uploader': 'AAD Productions', | ||||
|             'uploader_id': 'AtomicAgeDogGames', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|         gameID = m.group('gameID') | ||||
|  | ||||
|         videourl = self._VIDEO_PAGE_TEMPLATE % gameID | ||||
|         webpage = self._download_webpage(videourl, gameID) | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         fileID = m.group('fileID') | ||||
|         if fileID: | ||||
|             videourl = url | ||||
|             playlist_id = fileID | ||||
|         else: | ||||
|             gameID = m.group('gameID') | ||||
|             playlist_id = gameID | ||||
|             videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id | ||||
|         webpage = self._download_webpage(videourl, playlist_id) | ||||
|  | ||||
|         if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: | ||||
|             videourl = self._AGECHECK_TEMPLATE % gameID | ||||
|             videourl = self._AGECHECK_TEMPLATE % playlist_id | ||||
|             self.report_age_confirmation() | ||||
|             webpage = self._download_webpage(videourl, gameID) | ||||
|             webpage = self._download_webpage(videourl, playlist_id) | ||||
|  | ||||
|         self.report_extraction(gameID) | ||||
|         game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>', | ||||
|                                              webpage, 'game title') | ||||
|         if fileID: | ||||
|             playlist_title = self._html_search_regex( | ||||
|                 r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title') | ||||
|             mweb = re.finditer(r'''(?x) | ||||
|                 'movie_(?P<videoID>[0-9]+)':\s*\{\s* | ||||
|                 YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)", | ||||
|                 ''', webpage) | ||||
|             videos = [{ | ||||
|                 '_type': 'url', | ||||
|                 'url': vid.group('youtube_id'), | ||||
|                 'ie_key': 'Youtube', | ||||
|             } for vid in mweb] | ||||
|         else: | ||||
|             playlist_title = self._html_search_regex( | ||||
|                 r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title') | ||||
|  | ||||
|         urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," | ||||
|         mweb = re.finditer(urlRE, webpage) | ||||
|         namesRE = r'<span class="title">(?P<videoName>.+?)</span>' | ||||
|         titles = re.finditer(namesRE, webpage) | ||||
|         thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">' | ||||
|         thumbs = re.finditer(thumbsRE, webpage) | ||||
|         videos = [] | ||||
|         for vid,vtitle,thumb in zip(mweb,titles,thumbs): | ||||
|             video_id = vid.group('videoID') | ||||
|             title = vtitle.group('videoName') | ||||
|             video_url = vid.group('videoURL') | ||||
|             video_thumb = thumb.group('thumbnail') | ||||
|             if not video_url: | ||||
|                 raise ExtractorError(u'Cannot find video url for %s' % video_id) | ||||
|             info = { | ||||
|                 'id':video_id, | ||||
|                 'url':video_url, | ||||
|                 'ext': 'flv', | ||||
|                 'title': unescapeHTML(title), | ||||
|                 'thumbnail': video_thumb | ||||
|                   } | ||||
|             videos.append(info) | ||||
|         return [self.playlist_result(videos, gameID, game_title)] | ||||
|             mweb = re.finditer(r'''(?x) | ||||
|                 'movie_(?P<videoID>[0-9]+)':\s*\{\s* | ||||
|                 FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)" | ||||
|                 (,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}, | ||||
|                 ''', webpage) | ||||
|             titles = re.finditer( | ||||
|                 r'<span class="title">(?P<videoName>.+?)</span>', webpage) | ||||
|             thumbs = re.finditer( | ||||
|                 r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage) | ||||
|             videos = [] | ||||
|  | ||||
|             for vid, vtitle, thumb in zip(mweb, titles, thumbs): | ||||
|                 video_id = vid.group('videoID') | ||||
|                 title = vtitle.group('videoName') | ||||
|                 video_url = vid.group('videoURL') | ||||
|                 video_thumb = thumb.group('thumbnail') | ||||
|                 if not video_url: | ||||
|                     raise ExtractorError('Cannot find video url for %s' % video_id) | ||||
|                 videos.append({ | ||||
|                     'id': video_id, | ||||
|                     'url': video_url, | ||||
|                     'ext': 'flv', | ||||
|                     'title': unescapeHTML(title), | ||||
|                     'thumbnail': video_thumb | ||||
|                 }) | ||||
|         if not videos: | ||||
|             raise ExtractorError('Could not find any videos') | ||||
|  | ||||
|         return self.playlist_result(videos, playlist_id, playlist_title) | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user