Compare commits
	
		
			488 Commits
		
	
	
		
			2015.01.23
			...
			2015.02.23
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | bd61a9e770 | ||
|   | 3438e7acd2 | ||
|   | 09c200acf2 | ||
|   | 716889cab1 | ||
|   | 409693984f | ||
|   | 04e8c11080 | ||
|   | 80af2b73ab | ||
|   | 3cc57f9645 | ||
|   | a65d4e7f14 | ||
|   | 543ec2136b | ||
|   | 93b5071f73 | ||
|   | ddc369f073 | ||
|   | fcc3e6138b | ||
|   | 9fe6ef7ab2 | ||
|   | c010af6f19 | ||
|   | 35b7982303 | ||
|   | f311cfa231 | ||
|   | e086e0eb6c | ||
|   | 314368c822 | ||
|   | c5181ab410 | ||
|   | ea5152cae1 | ||
|   | 255fca5eea | ||
|   | 4aeccadf4e | ||
|   | 93540ee10e | ||
|   | 8fb3ac3649 | ||
|   | 77b2986b5b | ||
|   | 62b013df0d | ||
|   | fad6768bd1 | ||
|   | a78125f925 | ||
|   | a00a8bcc8a | ||
|   | 1e9a9e167d | ||
|   | 3da0db62e6 | ||
|   | e14ced7918 | ||
|   | ab9d02f53b | ||
|   | a461a11989 | ||
|   | 1bd838608f | ||
|   | 365577f567 | ||
|   | 50efb383f0 | ||
|   | 5da6bd0083 | ||
|   | 5e9a033e6e | ||
|   | dd0a58f5f0 | ||
|   | a21420389e | ||
|   | 6140baf4e1 | ||
|   | 8fc642eb5b | ||
|   | e66e1a0046 | ||
|   | d5c69f1da4 | ||
|   | 5c8a3f862a | ||
|   | a3b9157f49 | ||
|   | b88ba05356 | ||
|   | b74d505577 | ||
|   | 9e2d7dca87 | ||
|   | d236b37ac9 | ||
|   | e880c66bd8 | ||
|   | 383456aa29 | ||
|   | 1a13940c8d | ||
|   | 3d54788495 | ||
|   | 71d53ace2f | ||
|   | f37e3f99f0 | ||
|   | bd03ffc16e | ||
|   | 1ac1af9b47 | ||
|   | 3bf5705316 | ||
|   | 1c2528c8a3 | ||
|   | 7bd15b1a03 | ||
|   | 6b961a85fd | ||
|   | 7707004043 | ||
|   | a025d3c5a5 | ||
|   | c460bdd56b | ||
|   | b81a359eb6 | ||
|   | d61aefb24c | ||
|   | d305dd73a3 | ||
|   | 93a16ba238 | ||
|   | 85d5866177 | ||
|   | 9789d7535d | ||
|   | d8443cd3f7 | ||
|   | d47c26e168 | ||
|   | 81975f4693 | ||
|   | b8b928d5cb | ||
|   | 3eff81fbf7 | ||
|   | 785521bf4f | ||
|   | 6d1a55a521 | ||
|   | 9cad27008b | ||
|   | 11e611a7fa | ||
|   | 72c1f8de06 | ||
|   | 6e99868e4c | ||
|   | 4d278fde64 | ||
|   | f21e915fb9 | ||
|   | 6f53c63df6 | ||
|   | 1def5f359e | ||
|   | 15ec669374 | ||
|   | a3fa5da496 | ||
|   | 30965ac66a | ||
|   | 09ab40b7d1 | ||
|   | fa15607773 | ||
|   | a91a2c1a83 | ||
|   | 16e7711e22 | ||
|   | 5cda4eda72 | ||
|   | 98f000409f | ||
|   | 4a8d4a53b1 | ||
|   | 4cd95bcbc3 | ||
|   | be24c8697f | ||
|   | 0d93378887 | ||
|   | 4069766c52 | ||
|   | 7010577720 | ||
|   | 8ac27a68e6 | ||
|   | 46312e0b46 | ||
|   | f9216ed6ad | ||
|   | 65bf37ef83 | ||
|   | f740fae2a4 | ||
|   | fbc503d696 | ||
|   | 662435f728 | ||
|   | 163d966707 | ||
|   | 85729c51af | ||
|   | 1db5fbcfe3 | ||
|   | 59b8ab5834 | ||
|   | a568180441 | ||
|   | 85e80f71cd | ||
|   | bfa6bdcd8b | ||
|   | 03cd72b007 | ||
|   | 5bfd430f81 | ||
|   | 73fac4e911 | ||
|   | 8fb474fb17 | ||
|   | f813928e4b | ||
|   | b9c7a97318 | ||
|   | 9fb2f1cd6d | ||
|   | 6ca7732d5e | ||
|   | b0ab0fac49 | ||
|   | a294bce82f | ||
|   | 76d1466b08 | ||
|   | 1888d3f7b3 | ||
|   | c2787701cc | ||
|   | 52e1d0ccc4 | ||
|   | 10e3c4c221 | ||
|   | 68f2d273bf | ||
|   | 7c86c21662 | ||
|   | ae1580d790 | ||
|   | 3215c50f25 | ||
|   | 36f73e8044 | ||
|   | a4f3d779db | ||
|   | d9aa2b784d | ||
|   | cffcbc02de | ||
|   | 9347fddbfc | ||
|   | 037e9437e4 | ||
|   | 36e7a4ca2e | ||
|   | ae6423d704 | ||
|   | 7105440cec | ||
|   | c80b9cd280 | ||
|   | 171ca612af | ||
|   | c3d64fc1b3 | ||
|   | 7c24ce225d | ||
|   | 08b38d5401 | ||
|   | 024c53694d | ||
|   | 7e6011101f | ||
|   | c40feaba77 | ||
|   | 5277f09dfc | ||
|   | 2d30521ab9 | ||
|   | 050fa43561 | ||
|   | f36f92f4da | ||
|   | 124f3bc67d | ||
|   | d304209a85 | ||
|   | 8367d3f3cb | ||
|   | c56d7d899d | ||
|   | ea5db8469e | ||
|   | 3811c567e7 | ||
|   | 8708d76425 | ||
|   | 054fe3cc40 | ||
|   | af0d11f244 | ||
|   | 9650885be9 | ||
|   | 596ac6e31f | ||
|   | 612ee37365 | ||
|   | 442c37b7a9 | ||
|   | 04bbe41330 | ||
|   | 8f84f57183 | ||
|   | 6a78740211 | ||
|   | c0e1a415fd | ||
|   | bf8f082a90 | ||
|   | 2f543a2142 | ||
|   | 7e5db8c930 | ||
|   | f7a211dcc8 | ||
|   | 845734773d | ||
|   | 347de4931c | ||
|   | 8829650513 | ||
|   | c73fae1e2e | ||
|   | 834bf069d2 | ||
|   | c06a9fa34f | ||
|   | 753fad4adc | ||
|   | 34814eb66e | ||
|   | 3a5bcd0326 | ||
|   | 99c2398bc6 | ||
|   | 28f1272870 | ||
|   | f18e3a2fc0 | ||
|   | c4c5dc27cb | ||
|   | 2caf182f37 | ||
|   | 43f244b6d5 | ||
|   | 1309b396d0 | ||
|   | ba61796458 | ||
|   | 3255fe7141 | ||
|   | e98b8e79ea | ||
|   | 196121c51b | ||
|   | 5269028951 | ||
|   | f7bc056b5a | ||
|   | a0f7198544 | ||
|   | dd8930684e | ||
|   | bdb186f3b0 | ||
|   | 64f9baa084 | ||
|   | b29231c040 | ||
|   | 6128bf07a9 | ||
|   | 2ec19e9558 | ||
|   | 9ddb6925bf | ||
|   | 12931e1c6e | ||
|   | 41c23b0da5 | ||
|   | 2578ab19e4 | ||
|   | d87ec897e9 | ||
|   | 3bd4bffb1c | ||
|   | c36b09a502 | ||
|   | 641eb10d34 | ||
|   | 955c5505e7 | ||
|   | 69319969de | ||
|   | a14292e848 | ||
|   | 5d678df64a | ||
|   | 8ca8cbe2bd | ||
|   | ba322d8209 | ||
|   | 2f38289b79 | ||
|   | f23a3ca699 | ||
|   | 77d2b106cc | ||
|   | c0e46412e9 | ||
|   | 0161353d7d | ||
|   | 2b4ecde2c8 | ||
|   | b3a286d69d | ||
|   | 467d3c9a0c | ||
|   | ad5747bad1 | ||
|   | d6eb66ed3c | ||
|   | 7f2a9f1b49 | ||
|   | 1e1896f2de | ||
|   | c831973366 | ||
|   | 1a2548d9e9 | ||
|   | 3900eec27c | ||
|   | a02d212638 | ||
|   | 9c91a8fa70 | ||
|   | 41469f335e | ||
|   | 67ce4f8820 | ||
|   | bc63d56cca | ||
|   | c893d70805 | ||
|   | 3ee6e02564 | ||
|   | e3aaace400 | ||
|   | 300753a069 | ||
|   | f13b88c616 | ||
|   | 60ca389c64 | ||
|   | 1b0f3919c1 | ||
|   | 6a348cf7d5 | ||
|   | 9e91449c8d | ||
|   | 25e5ebf382 | ||
|   | 7dfc356625 | ||
|   | 58ba6c0160 | ||
|   | f076b63821 | ||
|   | 12f0454cd6 | ||
|   | cd7342755f | ||
|   | 9bb8e0a3f9 | ||
|   | 1a6373ef39 | ||
|   | f6c24009be | ||
|   | d862042301 | ||
|   | 23d9ded655 | ||
|   | 4c1a017e69 | ||
|   | ee623d9247 | ||
|   | 330537d08a | ||
|   | 2cf0ecac7b | ||
|   | d200b11c7e | ||
|   | d0eca21021 | ||
|   | c1147c05e1 | ||
|   | 55898ad2cf | ||
|   | a465808592 | ||
|   | 5c4862bad4 | ||
|   | 995029a142 | ||
|   | a57b562cff | ||
|   | 531572578e | ||
|   | 3a4cca687f | ||
|   | 7d3d06a16c | ||
|   | c21b1fbeeb | ||
|   | f920ce295e | ||
|   | 7a7bd19c45 | ||
|   | 8f4b58d70e | ||
|   | 3fd45e03bf | ||
|   | 869b4aeff4 | ||
|   | cc9ca3ba6e | ||
|   | ea71034bd3 | ||
|   | 9fffd0469f | ||
|   | ae7773942e | ||
|   | 469a64cebf | ||
|   | aae3fdcfae | ||
|   | 6a66904f8e | ||
|   | 78271e3319 | ||
|   | 92bf0bcdf8 | ||
|   | 1283204917 | ||
|   | 6789defea9 | ||
|   | acf2a6e97b | ||
|   | 8cfb6efe6f | ||
|   | 04edb9caf5 | ||
|   | 044131ba21 | ||
|   | 0a7055c90d | ||
|   | 9e3f19919a | ||
|   | 4a3da4ebdb | ||
|   | 027008b14e | ||
|   | c6df692466 | ||
|   | acf757f42e | ||
|   | dd8982f19c | ||
|   | 654bd52f58 | ||
|   | a9551e9020 | ||
|   | 4e980275b5 | ||
|   | c172440ac5 | ||
|   | e332772531 | ||
|   | 437cac8cc1 | ||
|   | 9f281cacd2 | ||
|   | 748a0fab8a | ||
|   | c1f06d6307 | ||
|   | c4e817ce4a | ||
|   | 9a3e5e6955 | ||
|   | 228d30ed06 | ||
|   | 057c0609fc | ||
|   | 17d2712d9c | ||
|   | fc09240e24 | ||
|   | 146303136f | ||
|   | 96aded8d3d | ||
|   | 2886be15aa | ||
|   | ca0f500ecf | ||
|   | 29aef5a33c | ||
|   | 9158b2b301 | ||
|   | 0196149c5b | ||
|   | 8f9312c387 | ||
|   | 439b9a9e9b | ||
|   | 8c72beb25e | ||
|   | 1ee94db2d0 | ||
|   | e77d2975af | ||
|   | e41b1f7385 | ||
|   | cd596028d6 | ||
|   | cc57bd33a8 | ||
|   | 6d593c3276 | ||
|   | 91755ee384 | ||
|   | 0692ef86ef | ||
|   | 439d9be27d | ||
|   | b80505a409 | ||
|   | e4c17d7274 | ||
|   | 2c58674e0e | ||
|   | ef1269fb07 | ||
|   | e525d9a3df | ||
|   | 20b4492c71 | ||
|   | dee3f73787 | ||
|   | d543bdc351 | ||
|   | c7ff0c6422 | ||
|   | 01c46659c4 | ||
|   | b04b885271 | ||
|   | dc35bfd2d5 | ||
|   | 70fca8d694 | ||
|   | a52c633536 | ||
|   | 7b6c60393e | ||
|   | 83e7a314b4 | ||
|   | 749f2ca044 | ||
|   | 5468ff4d91 | ||
|   | 1d2daaea63 | ||
|   | 52585fd6dc | ||
|   | c03844a4ec | ||
|   | 6449cd807e | ||
|   | e2a08185c6 | ||
|   | 5d6677ca28 | ||
|   | 5a8a29cfea | ||
|   | c1708b89c0 | ||
|   | 83fddfd493 | ||
|   | 1798791df1 | ||
|   | 6ebb0dca9f | ||
|   | cf8d6ec865 | ||
|   | f452f72c6b | ||
|   | 3198291f26 | ||
|   | 02c1d5e285 | ||
|   | ec4161a57d | ||
|   | 03d8d4df38 | ||
|   | 03d2d6d51b | ||
|   | 83fda3c000 | ||
|   | 4fe8495a23 | ||
|   | a16f6643f0 | ||
|   | adc0ae3ceb | ||
|   | 7bb3ceb4c7 | ||
|   | 75a4fc5b72 | ||
|   | 87673cd438 | ||
|   | f345fe9db7 | ||
|   | e683a48d0e | ||
|   | a7a14d9586 | ||
|   | 219337990b | ||
|   | 376a770cc4 | ||
|   | 7e500dbd93 | ||
|   | affd04a45d | ||
|   | c84130e865 | ||
|   | 4f264c02c7 | ||
|   | d205476103 | ||
|   | 367cc95aa7 | ||
|   | 206dba27a4 | ||
|   | dcf53d4408 | ||
|   | 63be3b8989 | ||
|   | 18b4e9e79d | ||
|   | cb454b333d | ||
|   | e0d9f85aee | ||
|   | b04fbd789c | ||
|   | aad9556414 | ||
|   | 48a1e5141a | ||
|   | 0865f397ae | ||
|   | 796df3c631 | ||
|   | a28383834b | ||
|   | 3a0d2f520a | ||
|   | 6348ad12a0 | ||
|   | fe7710cbcc | ||
|   | 2103d038b3 | ||
|   | 6ca85be6f8 | ||
|   | 9f0df77ab1 | ||
|   | e72c7e4123 | ||
|   | 2b1bd292ae | ||
|   | 71e7da6533 | ||
|   | 80a49d3d7b | ||
|   | d862a4f94f | ||
|   | a57e8ce658 | ||
|   | 96a53167fa | ||
|   | 6d2749aac4 | ||
|   | b1b0b1ca30 | ||
|   | 3dee7826e7 | ||
|   | c9326b38b8 | ||
|   | d4f64cabf4 | ||
|   | fe41ddbb28 | ||
|   | ee69b99af6 | ||
|   | 767ff0a2d1 | ||
|   | 8604e882a8 | ||
|   | cc1237f484 | ||
|   | 37f4ce538a | ||
|   | 7d346331b5 | ||
|   | e1ccc04e9f | ||
|   | 881e6a1f5c | ||
|   | baeaeffce5 | ||
|   | c14e88f0f5 | ||
|   | 8940b8608e | ||
|   | ec82d85acd | ||
|   | cfb56d1af3 | ||
|   | 1e10802990 | ||
|   | 6695916045 | ||
|   | 7906d199a1 | ||
|   | 1070711d60 | ||
|   | 4b405cfc6e | ||
|   | e5660ee6ae | ||
|   | 8011fba3ae | ||
|   | 587a9c2749 | ||
|   | e1554a407d | ||
|   | 3fcfb8e9fa | ||
|   | 384b62028a | ||
|   | b95aab8482 | ||
|   | fc2d6abfe7 | ||
|   | 27de5625d4 | ||
|   | 6aa4f54d66 | ||
|   | 222516d97d | ||
|   | a055469faf | ||
|   | fdaaaaa878 | ||
|   | 12d1fb5aa9 | ||
|   | 48f00d15b1 | ||
|   | 3e055aa5c3 | ||
|   | 6896a52721 | ||
|   | 5779b3e1fe | ||
|   | 62cd676c74 | ||
|   | 0c17278843 | ||
|   | d229ee70da | ||
|   | 26e274666d | ||
|   | ebd46aed51 | ||
|   | e793f7671c | ||
|   | c2e64f71d0 | ||
|   | 0920e5830f | ||
|   | bf7fa94ec7 | ||
|   | 6f58db8982 | ||
|   | aa42e87340 | ||
|   | 649f7966f7 | ||
|   | 5f0d813d93 | ||
|   | 501f13fbf3 | ||
|   | 5a000b45b3 | ||
|   | 40b1cbafac | ||
|   | 4231235cda | ||
|   | ca7a9c1bf7 | ||
|   | 247a5da704 | ||
|   | d1b4617e1d | ||
|   | 74dcf42a85 | ||
|   | a42c921598 | ||
|   | f96252b913 | ||
|   | 04b89c9026 | ||
|   | 0c72eb9060 | ||
|   | f9f86b0c64 | ||
|   | 0aed8df2bf | ||
|   | 2f61fe4ccc | ||
|   | 03359e9864 | 
| @@ -4,6 +4,9 @@ python: | ||||
|   - "2.7" | ||||
|   - "3.3" | ||||
|   - "3.4" | ||||
| before_install: | ||||
|   - sudo apt-get update -qq | ||||
|   - sudo apt-get install -yqq rtmpdump | ||||
| script: nosetests test --verbose | ||||
| notifications: | ||||
|   email: | ||||
|   | ||||
							
								
								
									
										8
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -104,3 +104,11 @@ Ondřej Caletka | ||||
| Dinesh S | ||||
| Johan K. Jensen | ||||
| Yen Chi Hsuan | ||||
| Enam Mijbah Noor | ||||
| David Luhmer | ||||
| Shaya Goldberg | ||||
| Paul Hartmann | ||||
| Frans de Jonge | ||||
| Robin de Rooij | ||||
| Ryan Schmidt | ||||
| Leslie P. Polzer | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. | ||||
| **Please include the full output of youtube-dl when run with `-v`**. | ||||
|  | ||||
| The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. | ||||
|  | ||||
| Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): | ||||
|  | ||||
| @@ -122,7 +124,7 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
| 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). | ||||
| 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. | ||||
| 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. | ||||
| 8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). | ||||
| 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). | ||||
| 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: | ||||
|  | ||||
|         $ git add youtube_dl/extractor/__init__.py | ||||
|   | ||||
							
								
								
									
										5
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,10 +1,7 @@ | ||||
| all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites | ||||
|  | ||||
| clean: | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp | ||||
|  | ||||
| cleanall: clean | ||||
| 	rm -f youtube-dl youtube-dl.exe | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe | ||||
|  | ||||
| PREFIX ?= /usr/local | ||||
| BINDIR ?= $(PREFIX)/bin | ||||
|   | ||||
							
								
								
									
										113
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										113
									
								
								README.md
									
									
									
									
									
								
							| @@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      on Windows) | ||||
|     --flat-playlist                  Do not extract the videos of a playlist, | ||||
|                                      only list them. | ||||
|     --no-color                       Do not emit color codes in output. | ||||
|  | ||||
| ## Network Options: | ||||
|     --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in | ||||
| @@ -93,6 +94,14 @@ which means you can modify it, redistribute it or use it however you like. | ||||
| ## Video Selection: | ||||
|     --playlist-start NUMBER          playlist video to start at (default is 1) | ||||
|     --playlist-end NUMBER            playlist video to end at (default is last) | ||||
|     --playlist-items ITEM_SPEC       playlist video items to download. Specify | ||||
|                                      indices of the videos in the playlist | ||||
|                                      seperated by commas like: "--playlist-items | ||||
|                                      1,2,5,8" if you want to download videos | ||||
|                                      indexed 1, 2, 5, 8 in the playlist. You can | ||||
|                                      specify range: "--playlist-items | ||||
|                                      1-3,7,10-13", it will download the videos | ||||
|                                      at index 1, 2, 3, 7, 10, 11, 12 and 13. | ||||
|     --match-title REGEX              download only matching titles (regex or | ||||
|                                      caseless sub-string) | ||||
|     --reject-title REGEX             skip download for matching titles (regex or | ||||
| @@ -111,6 +120,23 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      COUNT views | ||||
|     --max-views COUNT                Do not download any videos with more than | ||||
|                                      COUNT views | ||||
|     --match-filter FILTER            (Experimental) Generic video filter. | ||||
|                                      Specify any key (see help for -o for a list | ||||
|                                      of available keys) to match if the key is | ||||
|                                      present, !key to check if the key is not | ||||
|                                      present,key > NUMBER (like "comment_count > | ||||
|                                      12", also works with >=, <, <=, !=, =) to | ||||
|                                      compare against a number, and & to require | ||||
|                                      multiple matches. Values which are not | ||||
|                                      known are excluded unless you put a | ||||
|                                      question mark (?) after the operator.For | ||||
|                                      example, to only match videos that have | ||||
|                                      been liked more than 100 times and disliked | ||||
|                                      less than 50 times (or the dislike | ||||
|                                      functionality is not available at the given | ||||
|                                      service), but who also have a description, | ||||
|                                      use  --match-filter "like_count > 100 & | ||||
|                                      dislike_count <? 50 & description" . | ||||
|     --no-playlist                    If the URL refers to a video and a | ||||
|                                      playlist, download only the video. | ||||
|     --age-limit YEARS                download only videos suitable for the given | ||||
| @@ -124,7 +150,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
| ## Download Options: | ||||
|     -r, --rate-limit LIMIT           maximum download rate in bytes per second | ||||
|                                      (e.g. 50K or 4.2M) | ||||
|     -R, --retries RETRIES            number of retries (default is 10) | ||||
|     -R, --retries RETRIES            number of retries (default is 10), or | ||||
|                                      "infinite". | ||||
|     --buffer-size SIZE               size of download buffer (e.g. 1024 or 16K) | ||||
|                                      (default is 1024) | ||||
|     --no-resize-buffer               do not automatically adjust the buffer | ||||
| @@ -132,6 +159,13 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      automatically resized from an initial value | ||||
|                                      of SIZE. | ||||
|     --playlist-reverse               Download playlist videos in reverse order | ||||
|     --xattr-set-filesize             (experimental) set file xattribute | ||||
|                                      ytdl.filesize with expected filesize | ||||
|     --hls-prefer-native              (experimental) Use the native HLS | ||||
|                                      downloader instead of ffmpeg. | ||||
|     --external-downloader COMMAND    (experimental) Use the specified external | ||||
|                                      downloader. Currently supports | ||||
|                                      aria2c,curl,wget | ||||
|  | ||||
| ## Filesystem Options: | ||||
|     -a, --batch-file FILE            file containing URLs to download ('-' for | ||||
| @@ -191,7 +225,6 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --write-info-json                write video metadata to a .info.json file | ||||
|     --write-annotations              write video annotations to a .annotation | ||||
|                                      file | ||||
|     --write-thumbnail                write thumbnail image to disk | ||||
|     --load-info FILE                 json file containing the video information | ||||
|                                      (created with the "--write-json" option) | ||||
|     --cookies FILE                   file to read cookies from and dump cookie | ||||
| @@ -206,6 +239,12 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --no-cache-dir                   Disable filesystem caching | ||||
|     --rm-cache-dir                   Delete all filesystem cache files | ||||
|  | ||||
| ## Thumbnail images: | ||||
|     --write-thumbnail                write thumbnail image to disk | ||||
|     --write-all-thumbnails           write all thumbnail image formats to disk | ||||
|     --list-thumbnails                Simulate and list all available thumbnail | ||||
|                                      formats | ||||
|  | ||||
| ## Verbosity / Simulation Options: | ||||
|     -q, --quiet                      activates quiet mode | ||||
|     --no-warnings                    Ignore warnings | ||||
| @@ -259,6 +298,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --bidi-workaround                Work around terminals that lack | ||||
|                                      bidirectional text support. Requires bidiv | ||||
|                                      or fribidi executable in PATH | ||||
|     --sleep-interval SECONDS         Number of seconds to sleep before each | ||||
|                                      download. | ||||
|  | ||||
| ## Video Format Options: | ||||
|     -f, --format FORMAT              video format code, specify the order of | ||||
| @@ -271,18 +312,20 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      video results by putting a condition in | ||||
|                                      brackets, as in -f "best[height=720]" (or | ||||
|                                      -f "[filesize>10M]").  This works for | ||||
|                                      filesize, height, width, tbr, abr, and vbr | ||||
|                                      and the comparisons <, <=, >, >=, =, != . | ||||
|                                      Formats for which the value is not known | ||||
|                                      are excluded unless you put a question mark | ||||
|                                      (?) after the operator. You can combine | ||||
|                                      format filters, so  -f "[height <=? | ||||
|                                      720][tbr>500]" selects up to 720p videos | ||||
|                                      (or videos where the height is not known) | ||||
|                                      with a bitrate of at least 500 KBit/s. By | ||||
|                                      default, youtube-dl will pick the best | ||||
|                                      quality. Use commas to download multiple | ||||
|                                      audio formats, such as -f | ||||
|                                      filesize, height, width, tbr, abr, vbr, | ||||
|                                      asr, and fps and the comparisons <, <=, >, | ||||
|                                      >=, =, != and for ext, acodec, vcodec, | ||||
|                                      container, and protocol and the comparisons | ||||
|                                      =, != . Formats for which the value is not | ||||
|                                      known are excluded unless you put a | ||||
|                                      question mark (?) after the operator. You | ||||
|                                      can combine format filters, so  -f "[height | ||||
|                                      <=? 720][tbr>500]" selects up to 720p | ||||
|                                      videos (or videos where the height is not | ||||
|                                      known) with a bitrate of at least 500 | ||||
|                                      KBit/s. By default, youtube-dl will pick | ||||
|                                      the best quality. Use commas to download | ||||
|                                      multiple audio formats, such as -f | ||||
|                                      136/137/mp4/bestvideo,140/m4a/bestaudio. | ||||
|                                      You can merge the video and audio of two | ||||
|                                      formats into a single file using -f <video- | ||||
| @@ -347,15 +390,18 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --add-metadata                   write metadata to the video file | ||||
|     --xattrs                         write metadata to the video file's xattrs | ||||
|                                      (using dublin core and xdg standards) | ||||
|     --fixup POLICY                   (experimental) Automatically correct known | ||||
|                                      faults of the file. One of never (do | ||||
|                                      nothing), warn (only emit a warning), | ||||
|                                      detect_or_warn(check whether we can do | ||||
|                                      anything about it, warn otherwise | ||||
|     --fixup POLICY                   Automatically correct known faults of the | ||||
|                                      file. One of never (do nothing), warn (only | ||||
|                                      emit a warning), detect_or_warn(the | ||||
|                                      default; fix file if we can, warn | ||||
|                                      otherwise) | ||||
|     --prefer-avconv                  Prefer avconv over ffmpeg for running the | ||||
|                                      postprocessors (default) | ||||
|     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the | ||||
|                                      postprocessors | ||||
|     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary; | ||||
|                                      either the path to the binary or its | ||||
|                                      containing directory. | ||||
|     --exec CMD                       Execute a command on the file after | ||||
|                                      downloading, similar to find's -exec | ||||
|                                      syntax. Example: --exec 'adb push {} | ||||
| @@ -469,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c | ||||
|  | ||||
| ### ERROR: no fmt_url_map or conn information found in video info | ||||
|  | ||||
| youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. | ||||
| YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### ERROR: unable to download video ### | ||||
|  | ||||
| youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. | ||||
| YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### ExtractorError: Could not find JS function u'OF' | ||||
|  | ||||
| In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### SyntaxError: Non-ASCII character ### | ||||
|  | ||||
| @@ -504,9 +554,24 @@ From then on, after restarting your shell, you will be able to access both youtu | ||||
|  | ||||
| Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration). | ||||
|  | ||||
| ### How do I download a video starting with a `-` ? | ||||
|  | ||||
| Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`: | ||||
|  | ||||
|     youtube-dl -- -wNyEUrxzFU | ||||
|     youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU" | ||||
|  | ||||
| ### Can you add support for this anime video site, or site which shows current movies for free? | ||||
|  | ||||
| As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. | ||||
|  | ||||
| A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization. | ||||
|  | ||||
| Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content. | ||||
|  | ||||
| ### How can I detect whether a given URL is supported by youtube-dl? | ||||
|  | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
|  | ||||
| It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. | ||||
|  | ||||
| @@ -584,7 +649,7 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
| 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). | ||||
| 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. | ||||
| 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. | ||||
| 8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). | ||||
| 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). | ||||
| 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: | ||||
|  | ||||
|         $ git add youtube_dl/extractor/__init__.py | ||||
| @@ -700,7 +765,7 @@ In particular, every site support request issue should only pertain to services | ||||
|  | ||||
| ###  Is anyone going to need the feature? | ||||
|  | ||||
| Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. | ||||
| Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. | ||||
|  | ||||
| ###  Is your question about youtube-dl? | ||||
|  | ||||
|   | ||||
| @@ -45,12 +45,12 @@ for test in get_testcases(): | ||||
|  | ||||
|         RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) | ||||
|  | ||||
|     if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] | ||||
|                    or test['info_dict']['age_limit'] != 18): | ||||
|     if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or | ||||
|                    test['info_dict']['age_limit'] != 18): | ||||
|         print('\nPotential missing age_limit check: {0}'.format(test['name'])) | ||||
|  | ||||
|     elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] | ||||
|                          and test['info_dict']['age_limit'] == 18): | ||||
|     elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and | ||||
|                          test['info_dict']['age_limit'] == 18): | ||||
|         print('\nPotential false negative: {0}'.format(test['name'])) | ||||
|  | ||||
|     else: | ||||
|   | ||||
| @@ -35,7 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us | ||||
| if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi | ||||
|  | ||||
| /bin/echo -e "\n### First of all, testing..." | ||||
| make cleanall | ||||
| make clean | ||||
| if $skip_tests ; then | ||||
|     echo 'SKIPPING TESTS' | ||||
| else | ||||
| @@ -45,9 +45,9 @@ fi | ||||
| /bin/echo -e "\n### Changing version in version.py..." | ||||
| sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
|  | ||||
| /bin/echo -e "\n### Committing README.md and youtube_dl/version.py..." | ||||
| make README.md | ||||
| git add README.md youtube_dl/version.py | ||||
| /bin/echo -e "\n### Committing documentation and youtube_dl/version.py..." | ||||
| make README.md CONTRIBUTING.md supportedsites | ||||
| git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py | ||||
| git commit -m "release $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now tagging, signing and pushing..." | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| # Supported sites | ||||
|  - **1tv**: Первый канал | ||||
|  - **1up.com** | ||||
|  - **220.ro** | ||||
|  - **24video** | ||||
| @@ -9,16 +10,21 @@ | ||||
|  - **8tracks** | ||||
|  - **9gag** | ||||
|  - **abc.net.au** | ||||
|  - **Abc7News** | ||||
|  - **AcademicEarth:Course** | ||||
|  - **AddAnime** | ||||
|  - **AdobeTV** | ||||
|  - **AdultSwim** | ||||
|  - **Aftenposten** | ||||
|  - **Aftonbladet** | ||||
|  - **AlJazeera** | ||||
|  - **Allocine** | ||||
|  - **AlphaPorno** | ||||
|  - **anitube.se** | ||||
|  - **AnySex** | ||||
|  - **Aparat** | ||||
|  - **AppleDailyAnimationNews** | ||||
|  - **AppleDailyRealtimeNews** | ||||
|  - **AppleTrailers** | ||||
|  - **archive.org**: archive.org videos | ||||
|  - **ARD** | ||||
| @@ -30,8 +36,10 @@ | ||||
|  - **arte.tv:ddc** | ||||
|  - **arte.tv:embed** | ||||
|  - **arte.tv:future** | ||||
|  - **AtresPlayer** | ||||
|  - **ATTTechChannel** | ||||
|  - **audiomack** | ||||
|  - **AUEngine** | ||||
|  - **audiomack:album** | ||||
|  - **Azubu** | ||||
|  - **bambuser** | ||||
|  - **bambuser:channel** | ||||
| @@ -53,14 +61,19 @@ | ||||
|  - **Brightcove** | ||||
|  - **BuzzFeed** | ||||
|  - **BYUtv** | ||||
|  - **Camdemy** | ||||
|  - **CamdemyFolder** | ||||
|  - **Canal13cl** | ||||
|  - **canalc2.tv** | ||||
|  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv | ||||
|  - **CBS** | ||||
|  - **CBSNews**: CBS News | ||||
|  - **CBSSports** | ||||
|  - **CeskaTelevize** | ||||
|  - **channel9**: Channel 9 | ||||
|  - **Chilloutzone** | ||||
|  - **chirbit** | ||||
|  - **chirbit:profile** | ||||
|  - **Cinchcast** | ||||
|  - **Cinemassacre** | ||||
|  - **clipfish** | ||||
| @@ -71,8 +84,10 @@ | ||||
|  - **cmt.com** | ||||
|  - **CNET** | ||||
|  - **CNN** | ||||
|  - **CNNArticle** | ||||
|  - **CNNBlogs** | ||||
|  - **CollegeHumor** | ||||
|  - **CollegeRama** | ||||
|  - **ComCarCoff** | ||||
|  - **ComedyCentral** | ||||
|  - **ComedyCentralShows**: The Daily Show / The Colbert Report | ||||
| @@ -82,32 +97,38 @@ | ||||
|  - **Crunchyroll** | ||||
|  - **crunchyroll:playlist** | ||||
|  - **CSpan**: C-SPAN | ||||
|  - **CtsNews** | ||||
|  - **culturebox.francetvinfo.fr** | ||||
|  - **dailymotion** | ||||
|  - **dailymotion:playlist** | ||||
|  - **dailymotion:user** | ||||
|  - **daum.net** | ||||
|  - **DBTV** | ||||
|  - **DctpTv** | ||||
|  - **DeezerPlaylist** | ||||
|  - **defense.gouv.fr** | ||||
|  - **Discovery** | ||||
|  - **divxstage**: DivxStage | ||||
|  - **Dotsub** | ||||
|  - **DRBonanza** | ||||
|  - **Dropbox** | ||||
|  - **DrTuber** | ||||
|  - **DRTV** | ||||
|  - **Dump** | ||||
|  - **dvtv**: http://video.aktualne.cz/ | ||||
|  - **EbaumsWorld** | ||||
|  - **EchoMsk** | ||||
|  - **eHow** | ||||
|  - **Einthusan** | ||||
|  - **eitb.tv** | ||||
|  - **EllenTV** | ||||
|  - **EllenTV:clips** | ||||
|  - **ElPais**: El País | ||||
|  - **Embedly** | ||||
|  - **EMPFlix** | ||||
|  - **Engadget** | ||||
|  - **Eporner** | ||||
|  - **EroProfile** | ||||
|  - **Escapist** | ||||
|  - **EveryonesMixtape** | ||||
|  - **exfm**: ex.fm | ||||
| @@ -120,7 +141,6 @@ | ||||
|  - **fernsehkritik.tv:postecke** | ||||
|  - **Firedrive** | ||||
|  - **Firstpost** | ||||
|  - **firsttv**: Видеоархив - Первый канал | ||||
|  - **Flickr** | ||||
|  - **Folketinget**: Folketinget (ft.dk; Danish parliament) | ||||
|  - **Foxgay** | ||||
| @@ -143,6 +163,7 @@ | ||||
|  - **GDCVault** | ||||
|  - **generic**: Generic downloader that works on some sites | ||||
|  - **GiantBomb** | ||||
|  - **Giga** | ||||
|  - **Glide**: Glide mobile video messages (glide.me) | ||||
|  - **Globo** | ||||
|  - **GodTube** | ||||
| @@ -153,9 +174,15 @@ | ||||
|  - **Grooveshark** | ||||
|  - **Groupon** | ||||
|  - **Hark** | ||||
|  - **HearThisAt** | ||||
|  - **Heise** | ||||
|  - **HellPorno** | ||||
|  - **Helsinki**: helsinki.fi | ||||
|  - **HentaiStigma** | ||||
|  - **HistoricFilms** | ||||
|  - **History** | ||||
|  - **hitbox** | ||||
|  - **hitbox:live** | ||||
|  - **HornBunny** | ||||
|  - **HostingBulk** | ||||
|  - **HotNewHipHop** | ||||
| @@ -167,6 +194,7 @@ | ||||
|  - **ign.com** | ||||
|  - **imdb**: Internet Movie Database trailers | ||||
|  - **imdb:list**: Internet Movie Database lists | ||||
|  - **Imgur** | ||||
|  - **Ina** | ||||
|  - **InfoQ** | ||||
|  - **Instagram** | ||||
| @@ -182,6 +210,7 @@ | ||||
|  - **jpopsuki.tv** | ||||
|  - **Jukebox** | ||||
|  - **Kankan** | ||||
|  - **Karaoketv** | ||||
|  - **keek** | ||||
|  - **KeezMovies** | ||||
|  - **KhanAcademy** | ||||
| @@ -195,6 +224,7 @@ | ||||
|  - **LiveLeak** | ||||
|  - **livestream** | ||||
|  - **livestream:original** | ||||
|  - **LnkGo** | ||||
|  - **lrt.lt** | ||||
|  - **lynda**: lynda.com videos | ||||
|  - **lynda:course**: lynda.com online courses | ||||
| @@ -203,6 +233,7 @@ | ||||
|  - **mailru**: Видео@Mail.Ru | ||||
|  - **Malemotion** | ||||
|  - **MDR** | ||||
|  - **media.ccc.de** | ||||
|  - **metacafe** | ||||
|  - **Metacritic** | ||||
|  - **Mgoon** | ||||
| @@ -235,6 +266,8 @@ | ||||
|  - **MySpass** | ||||
|  - **myvideo** | ||||
|  - **MyVidster** | ||||
|  - **n-tv.de** | ||||
|  - **NationalGeographic** | ||||
|  - **Naver** | ||||
|  - **NBA** | ||||
|  - **NBC** | ||||
| @@ -242,11 +275,16 @@ | ||||
|  - **ndr**: NDR.de - Mediathek | ||||
|  - **NDTV** | ||||
|  - **NerdCubedFeed** | ||||
|  - **Nerdist** | ||||
|  - **Netzkino** | ||||
|  - **Newgrounds** | ||||
|  - **Newstube** | ||||
|  - **NextMedia** | ||||
|  - **NextMediaActionNews** | ||||
|  - **nfb**: National Film Board of Canada | ||||
|  - **nfl.com** | ||||
|  - **nhl.com** | ||||
|  - **nhl.com:news**: NHL news | ||||
|  - **nhl.com:videocenter**: NHL videocenter category | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
| @@ -257,18 +295,22 @@ | ||||
|  - **Nowness** | ||||
|  - **nowvideo**: NowVideo | ||||
|  - **npo.nl** | ||||
|  - **npo.nl:live** | ||||
|  - **npo.nl:radio** | ||||
|  - **npo.nl:radio:fragment** | ||||
|  - **NRK** | ||||
|  - **NRKTV** | ||||
|  - **NTV** | ||||
|  - **ntv.ru** | ||||
|  - **Nuvid** | ||||
|  - **NYTimes** | ||||
|  - **ocw.mit.edu** | ||||
|  - **OktoberfestTV** | ||||
|  - **on.aol.com** | ||||
|  - **Ooyala** | ||||
|  - **OpenFilm** | ||||
|  - **orf:fm4**: radio FM4 | ||||
|  - **orf:oe1**: Radio Österreich 1 | ||||
|  - **orf:tvthek**: ORF TVthek | ||||
|  - **ORFFM4**: radio FM4 | ||||
|  - **parliamentlive.tv**: UK parliament videos | ||||
|  - **Patreon** | ||||
|  - **PBS** | ||||
| @@ -283,13 +325,16 @@ | ||||
|  - **podomatic** | ||||
|  - **PornHd** | ||||
|  - **PornHub** | ||||
|  - **PornHubPlaylist** | ||||
|  - **Pornotube** | ||||
|  - **PornoXO** | ||||
|  - **PromptFile** | ||||
|  - **prosiebensat1**: ProSiebenSat.1 Digital | ||||
|  - **Pyvideo** | ||||
|  - **QuickVid** | ||||
|  - **R7** | ||||
|  - **radio.de** | ||||
|  - **radiobremen** | ||||
|  - **radiofrance** | ||||
|  - **Rai** | ||||
|  - **RBMARadio** | ||||
| @@ -300,8 +345,10 @@ | ||||
|  - **RottenTomatoes** | ||||
|  - **Roxwel** | ||||
|  - **RTBF** | ||||
|  - **Rte** | ||||
|  - **rtl.nl**: rtl.nl and rtlxl.nl | ||||
|  - **RTL2** | ||||
|  - **RTLnow** | ||||
|  - **rtlxl.nl** | ||||
|  - **RTP** | ||||
|  - **RTS**: RTS.ch | ||||
|  - **rtve.es:alacarta**: RTVE a la carta | ||||
| @@ -309,9 +356,11 @@ | ||||
|  - **RUHD** | ||||
|  - **rutube**: Rutube videos | ||||
|  - **rutube:channel**: Rutube channels | ||||
|  - **rutube:embed**: Rutube embedded videos | ||||
|  - **rutube:movie**: Rutube movies | ||||
|  - **rutube:person**: Rutube person videos | ||||
|  - **RUTV**: RUTV.RU | ||||
|  - **Sandia**: Sandia National Laboratories | ||||
|  - **Sapo**: SAPO Vídeos | ||||
|  - **savefrom.net** | ||||
|  - **SBS**: sbs.com.au | ||||
| @@ -339,7 +388,8 @@ | ||||
|  - **soundcloud:playlist** | ||||
|  - **soundcloud:set** | ||||
|  - **soundcloud:user** | ||||
|  - **Soundgasm** | ||||
|  - **soundgasm** | ||||
|  - **soundgasm:profile** | ||||
|  - **southpark.cc.com** | ||||
|  - **southpark.de** | ||||
|  - **Space** | ||||
| @@ -351,12 +401,14 @@ | ||||
|  - **Sport5** | ||||
|  - **SportBox** | ||||
|  - **SportDeutschland** | ||||
|  - **SRMediathek**: Süddeutscher Rundfunk | ||||
|  - **SRMediathek**: Saarländischer Rundfunk | ||||
|  - **stanfordoc**: Stanford Open ClassRoom | ||||
|  - **Steam** | ||||
|  - **streamcloud.eu** | ||||
|  - **StreamCZ** | ||||
|  - **StreetVoice** | ||||
|  - **SunPorno** | ||||
|  - **SVTPlay** | ||||
|  - **SWRMediathek** | ||||
|  - **Syfy** | ||||
|  - **SztvHu** | ||||
| @@ -375,7 +427,9 @@ | ||||
|  - **TeleBruxelles** | ||||
|  - **telecinco.es** | ||||
|  - **TeleMB** | ||||
|  - **TeleTask** | ||||
|  - **TenPlay** | ||||
|  - **TestTube** | ||||
|  - **TF1** | ||||
|  - **TheOnion** | ||||
|  - **ThePlatform** | ||||
| @@ -401,10 +455,19 @@ | ||||
|  - **Turbo** | ||||
|  - **Tutv** | ||||
|  - **tv.dfb.de** | ||||
|  - **TV4**: tv4.se and tv4play.se | ||||
|  - **tvigle**: Интернет-телевидение Tvigle.ru | ||||
|  - **tvp.pl** | ||||
|  - **tvp.pl:Series** | ||||
|  - **TVPlay**: TV3Play and related services | ||||
|  - **Twitch** | ||||
|  - **Tweakers** | ||||
|  - **twitch:bookmarks** | ||||
|  - **twitch:chapter** | ||||
|  - **twitch:past_broadcasts** | ||||
|  - **twitch:profile** | ||||
|  - **twitch:stream** | ||||
|  - **twitch:video** | ||||
|  - **twitch:vod** | ||||
|  - **Ubu** | ||||
|  - **udemy** | ||||
|  - **udemy:course** | ||||
| @@ -433,6 +496,8 @@ | ||||
|  - **videoweed**: VideoWeed | ||||
|  - **Vidme** | ||||
|  - **Vidzi** | ||||
|  - **vier** | ||||
|  - **vier:videos** | ||||
|  - **viki** | ||||
|  - **vimeo** | ||||
|  - **vimeo:album** | ||||
| @@ -460,11 +525,13 @@ | ||||
|  - **WDR** | ||||
|  - **wdr:mobile** | ||||
|  - **WDRMaus**: Sendung mit der Maus | ||||
|  - **WebOfStories** | ||||
|  - **Weibo** | ||||
|  - **Wimp** | ||||
|  - **Wistia** | ||||
|  - **WorldStarHipHop** | ||||
|  - **wrzuta.pl** | ||||
|  - **WSJ**: Wall Street Journal | ||||
|  - **XBef** | ||||
|  - **XboxClips** | ||||
|  - **XHamster** | ||||
| @@ -472,8 +539,11 @@ | ||||
|  - **XNXX** | ||||
|  - **XTube** | ||||
|  - **XTubeUser**: XTube user profile | ||||
|  - **Xuite** | ||||
|  - **XVideos** | ||||
|  - **XXXYMovies** | ||||
|  - **Yahoo**: Yahoo screen and movies | ||||
|  - **Yam** | ||||
|  - **YesJapan** | ||||
|  - **Ynet** | ||||
|  - **YouJizz** | ||||
| @@ -491,9 +561,9 @@ | ||||
|  - **youtube:search_url**: YouTube.com search URLs | ||||
|  - **youtube:show**: YouTube.com (multi-season) shows | ||||
|  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) | ||||
|  - **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks") | ||||
|  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) | ||||
|  - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) | ||||
|  - **Zapiks** | ||||
|  - **ZDF** | ||||
|  - **ZDFChannel** | ||||
|  - **zingmp3:album**: mp3.zing.vn albums | ||||
|   | ||||
| @@ -2,5 +2,5 @@ | ||||
| universal = True | ||||
|  | ||||
| [flake8] | ||||
| exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build | ||||
| ignore = E501 | ||||
| exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git | ||||
| ignore = E402,E501,E731 | ||||
|   | ||||
| @@ -103,6 +103,26 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|             self.assertTrue( | ||||
|                 match_rex.match(got), | ||||
|                 'field %s (value: %r) should match %r' % (info_field, got, match_str)) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('startswith:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             start_str = expected[len('startswith:'):] | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 got.startswith(start_str), | ||||
|                 'field %s (value: %r) should start with %r' % (info_field, got, start_str)) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('contains:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             contains_str = expected[len('contains:'):] | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 contains_str in got, | ||||
|                 'field %s (value: %r) should contain %r' % (info_field, got, contains_str)) | ||||
|         elif isinstance(expected, type): | ||||
|             got = got_dict.get(info_field) | ||||
|             self.assertTrue(isinstance(got, expected), | ||||
| @@ -140,7 +160,7 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|     # Are checkable fields missing from the test case definition? | ||||
|     test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) | ||||
|                           for key, value in got_dict.items() | ||||
|                           if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) | ||||
|                           if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) | ||||
|     missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) | ||||
|     if missing_keys: | ||||
|         def _repr(v): | ||||
| @@ -148,11 +168,19 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|                 return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') | ||||
|             else: | ||||
|                 return repr(v) | ||||
|         info_dict_str = ''.join( | ||||
|             '    %s: %s,\n' % (_repr(k), _repr(v)) | ||||
|             for k, v in test_info_dict.items()) | ||||
|         info_dict_str = '' | ||||
|         if len(missing_keys) != len(expected_dict): | ||||
|             info_dict_str += ''.join( | ||||
|                 '    %s: %s,\n' % (_repr(k), _repr(v)) | ||||
|                 for k, v in test_info_dict.items() if k not in missing_keys) | ||||
|  | ||||
|             if info_dict_str: | ||||
|                 info_dict_str += '\n' | ||||
|         info_dict_str += ''.join( | ||||
|             '    %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) | ||||
|             for k in missing_keys) | ||||
|         write_string( | ||||
|             '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) | ||||
|             '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) | ||||
|         self.assertFalse( | ||||
|             missing_keys, | ||||
|             'Missing keys in test definition: %s' % ( | ||||
|   | ||||
| @@ -39,5 +39,6 @@ | ||||
|     "writesubtitles": false, | ||||
|     "allsubtitles": false, | ||||
|     "listssubtitles": false, | ||||
|     "socket_timeout": 20 | ||||
|     "socket_timeout": 20, | ||||
|     "fixup": "never" | ||||
| } | ||||
|   | ||||
| @@ -13,6 +13,7 @@ import copy | ||||
| from test.helper import FakeYDL, assertRegexpMatches | ||||
| from youtube_dl import YoutubeDL | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.postprocessor.common import PostProcessor | ||||
|  | ||||
|  | ||||
| class YDL(FakeYDL): | ||||
| @@ -370,5 +371,35 @@ class TestFormatSelection(unittest.TestCase): | ||||
|             'vbr': 10, | ||||
|         }), '^\s*10k$') | ||||
|  | ||||
|     def test_postprocessors(self): | ||||
|         filename = 'post-processor-testfile.mp4' | ||||
|         audiofile = filename + '.mp3' | ||||
|  | ||||
|         class SimplePP(PostProcessor): | ||||
|             def run(self, info): | ||||
|                 with open(audiofile, 'wt') as f: | ||||
|                     f.write('EXAMPLE') | ||||
|                 info['filepath'] | ||||
|                 return False, info | ||||
|  | ||||
|         def run_pp(params): | ||||
|             with open(filename, 'wt') as f: | ||||
|                 f.write('EXAMPLE') | ||||
|             ydl = YoutubeDL(params) | ||||
|             ydl.add_post_processor(SimplePP()) | ||||
|             ydl.post_process(filename, {'filepath': filename}) | ||||
|  | ||||
|         run_pp({'keepvideo': True}) | ||||
|         self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) | ||||
|         self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) | ||||
|         os.unlink(filename) | ||||
|         os.unlink(audiofile) | ||||
|  | ||||
|         run_pp({'keepvideo': False}) | ||||
|         self.assertFalse(os.path.exists(filename), '%s exists' % filename) | ||||
|         self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) | ||||
|         os.unlink(audiofile) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -89,7 +89,7 @@ def generator(test_case): | ||||
|  | ||||
|         for tc in test_cases: | ||||
|             info_dict = tc.get('info_dict', {}) | ||||
|             if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')): | ||||
|             if not (info_dict.get('id') and info_dict.get('ext')): | ||||
|                 raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?') | ||||
|  | ||||
|         if 'skip' in test_case: | ||||
| @@ -116,7 +116,7 @@ def generator(test_case): | ||||
|         expect_warnings(ydl, test_case.get('expected_warnings', [])) | ||||
|  | ||||
|         def get_tc_filename(tc): | ||||
|             return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) | ||||
|             return ydl.prepare_filename(tc.get('info_dict', {})) | ||||
|  | ||||
|         res_dict = None | ||||
|  | ||||
|   | ||||
							
								
								
									
										72
									
								
								test/test_http.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								test/test_http.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| #!/usr/bin/env python | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl import YoutubeDL | ||||
| from youtube_dl.compat import compat_http_server | ||||
| import ssl | ||||
| import threading | ||||
|  | ||||
| TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||
|  | ||||
|  | ||||
| class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | ||||
|     def log_message(self, format, *args): | ||||
|         pass | ||||
|  | ||||
|     def do_GET(self): | ||||
|         if self.path == '/video.html': | ||||
|             self.send_response(200) | ||||
|             self.send_header('Content-Type', 'text/html; charset=utf-8') | ||||
|             self.end_headers() | ||||
|             self.wfile.write(b'<html><video src="/vid.mp4" /></html>') | ||||
|         elif self.path == '/vid.mp4': | ||||
|             self.send_response(200) | ||||
|             self.send_header('Content-Type', 'video/mp4') | ||||
|             self.end_headers() | ||||
|             self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') | ||||
|         else: | ||||
|             assert False | ||||
|  | ||||
|  | ||||
| class FakeLogger(object): | ||||
|     def debug(self, msg): | ||||
|         pass | ||||
|  | ||||
|     def warning(self, msg): | ||||
|         pass | ||||
|  | ||||
|     def error(self, msg): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class TestHTTP(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         certfn = os.path.join(TEST_DIR, 'testcert.pem') | ||||
|         self.httpd = compat_http_server.HTTPServer( | ||||
|             ('localhost', 0), HTTPTestRequestHandler) | ||||
|         self.httpd.socket = ssl.wrap_socket( | ||||
|             self.httpd.socket, certfile=certfn, server_side=True) | ||||
|         self.port = self.httpd.socket.getsockname()[1] | ||||
|         self.server_thread = threading.Thread(target=self.httpd.serve_forever) | ||||
|         self.server_thread.daemon = True | ||||
|         self.server_thread.start() | ||||
|  | ||||
|     def test_nocheckcertificate(self): | ||||
|         if sys.version_info >= (2, 7, 9):  # No certificate checking anyways | ||||
|             ydl = YoutubeDL({'logger': FakeLogger()}) | ||||
|             self.assertRaises( | ||||
|                 Exception, | ||||
|                 ydl.extract_info, 'https://localhost:%d/video.html' % self.port) | ||||
|  | ||||
|         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) | ||||
|         r = ydl.extract_info('https://localhost:%d/video.html' % self.port) | ||||
|         self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
							
								
								
									
										106
									
								
								test/test_jsinterp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								test/test_jsinterp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,106 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.jsinterp import JSInterpreter | ||||
|  | ||||
|  | ||||
| class TestJSInterpreter(unittest.TestCase): | ||||
|     def test_basic(self): | ||||
|         jsi = JSInterpreter('function x(){;}') | ||||
|         self.assertEqual(jsi.call_function('x'), None) | ||||
|  | ||||
|         jsi = JSInterpreter('function x3(){return 42;}') | ||||
|         self.assertEqual(jsi.call_function('x3'), 42) | ||||
|  | ||||
|     def test_calc(self): | ||||
|         jsi = JSInterpreter('function x4(a){return 2*a+1;}') | ||||
|         self.assertEqual(jsi.call_function('x4', 3), 7) | ||||
|  | ||||
|     def test_empty_return(self): | ||||
|         jsi = JSInterpreter('function f(){return; y()}') | ||||
|         self.assertEqual(jsi.call_function('f'), None) | ||||
|  | ||||
|     def test_morespace(self): | ||||
|         jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }') | ||||
|         self.assertEqual(jsi.call_function('x', 3), 7) | ||||
|  | ||||
|         jsi = JSInterpreter('function f () { x =  2  ; return x; }') | ||||
|         self.assertEqual(jsi.call_function('f'), 2) | ||||
|  | ||||
|     def test_strange_chars(self): | ||||
|         jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }') | ||||
|         self.assertEqual(jsi.call_function('$_xY1', 20), 21) | ||||
|  | ||||
|     def test_operators(self): | ||||
|         jsi = JSInterpreter('function f(){return 1 << 5;}') | ||||
|         self.assertEqual(jsi.call_function('f'), 32) | ||||
|  | ||||
|         jsi = JSInterpreter('function f(){return 19 & 21;}') | ||||
|         self.assertEqual(jsi.call_function('f'), 17) | ||||
|  | ||||
|         jsi = JSInterpreter('function f(){return 11 >> 2;}') | ||||
|         self.assertEqual(jsi.call_function('f'), 2) | ||||
|  | ||||
|     def test_array_access(self): | ||||
|         jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}') | ||||
|         self.assertEqual(jsi.call_function('f'), [5, 2, 7]) | ||||
|  | ||||
|     def test_parens(self): | ||||
|         jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}') | ||||
|         self.assertEqual(jsi.call_function('f'), 7) | ||||
|  | ||||
|         jsi = JSInterpreter('function f(){return (1 + 2) * 3;}') | ||||
|         self.assertEqual(jsi.call_function('f'), 9) | ||||
|  | ||||
|     def test_assignments(self): | ||||
|         jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}') | ||||
|         self.assertEqual(jsi.call_function('f'), 31) | ||||
|  | ||||
|         jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}') | ||||
|         self.assertEqual(jsi.call_function('f'), 51) | ||||
|  | ||||
|         jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}') | ||||
|         self.assertEqual(jsi.call_function('f'), -11) | ||||
|  | ||||
|     def test_comments(self): | ||||
|         'Skipping: Not yet fully implemented' | ||||
|         return | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { | ||||
|             var x = /* 1 + */ 2; | ||||
|             var y = /* 30 | ||||
|             * 40 */ 50; | ||||
|             return x + y; | ||||
|         } | ||||
|         ''') | ||||
|         self.assertEqual(jsi.call_function('x'), 52) | ||||
|  | ||||
|         jsi = JSInterpreter(''' | ||||
|         function f() { | ||||
|             var x = "/*"; | ||||
|             var y = 1 /* comment */ + 2; | ||||
|             return y; | ||||
|         } | ||||
|         ''') | ||||
|         self.assertEqual(jsi.call_function('f'), 3) | ||||
|  | ||||
|     def test_precedence(self): | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { | ||||
|             var a = [10, 20, 30, 40, 50]; | ||||
|             var b = 6; | ||||
|             a[0]=a[b%a.length]; | ||||
|             return a; | ||||
|         }''') | ||||
|         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -138,7 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 5) | ||||
|         self.assertTrue(len(subtitles.keys()) >= 6) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
| @@ -247,7 +247,7 @@ class TestVimeoSubtitles(BaseTestSubtitles): | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4') | ||||
|         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
| @@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['cs'])) | ||||
|         self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') | ||||
|         self.assertTrue(len(subtitles['cs']) > 20000) | ||||
|  | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning('video doesn\'t have subtitles') | ||||
|   | ||||
| @@ -34,8 +34,8 @@ def _make_testfunc(testfile): | ||||
|     def test_func(self): | ||||
|         as_file = os.path.join(TEST_DIR, testfile) | ||||
|         swf_file = os.path.join(TEST_DIR, test_id + '.swf') | ||||
|         if ((not os.path.exists(swf_file)) | ||||
|                 or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|         if ((not os.path.exists(swf_file)) or | ||||
|                 os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|             # Recompile | ||||
|             try: | ||||
|                 subprocess.check_call([ | ||||
|   | ||||
| @@ -52,6 +52,8 @@ from youtube_dl.utils import ( | ||||
|     urlencode_postdata, | ||||
|     version_tuple, | ||||
|     xpath_with_ns, | ||||
|     render_table, | ||||
|     match_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -155,6 +157,9 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), | ||||
|             '20141126') | ||||
|         self.assertEqual( | ||||
|             unified_strdate('2/2/2015 6:47:40 PM', day_first=False), | ||||
|             '20150202') | ||||
|  | ||||
|     def test_find_xpath_attr(self): | ||||
|         testxml = '''<root> | ||||
| @@ -237,6 +242,8 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(parse_duration('5 s'), 5) | ||||
|         self.assertEqual(parse_duration('3 min'), 180) | ||||
|         self.assertEqual(parse_duration('2.5 hours'), 9000) | ||||
|         self.assertEqual(parse_duration('02:03:04'), 7384) | ||||
|         self.assertEqual(parse_duration('01:02:03:04'), 93784) | ||||
|  | ||||
|     def test_fix_xml_ampersands(self): | ||||
|         self.assertEqual( | ||||
| @@ -363,6 +370,10 @@ class TestUtil(unittest.TestCase): | ||||
|             "playlist":[{"controls":{"all":null}}] | ||||
|         }''') | ||||
|  | ||||
|         inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' | ||||
|         json_code = js_to_json(inp) | ||||
|         self.assertEqual(json.loads(json_code), json.loads(inp)) | ||||
|  | ||||
|     def test_js_to_json_edgecases(self): | ||||
|         on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") | ||||
|         self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) | ||||
| @@ -370,6 +381,16 @@ class TestUtil(unittest.TestCase): | ||||
|         on = js_to_json('{"abc": true}') | ||||
|         self.assertEqual(json.loads(on), {'abc': True}) | ||||
|  | ||||
|         # Ignore JavaScript code as well | ||||
|         on = js_to_json('''{ | ||||
|             "x": 1, | ||||
|             y: "a", | ||||
|             z: some.code | ||||
|         }''') | ||||
|         d = json.loads(on) | ||||
|         self.assertEqual(d['x'], 1) | ||||
|         self.assertEqual(d['y'], 'a') | ||||
|  | ||||
|     def test_clean_html(self): | ||||
|         self.assertEqual(clean_html('a:\nb'), 'a: b') | ||||
|         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"') | ||||
| @@ -434,5 +455,46 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') | ||||
|         self.assertTrue(is_html(  # UTF-32-LE | ||||
|             b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) | ||||
|  | ||||
|     def test_render_table(self): | ||||
|         self.assertEqual( | ||||
|             render_table( | ||||
|                 ['a', 'bcd'], | ||||
|                 [[123, 4], [9999, 51]]), | ||||
|             'a    bcd\n' | ||||
|             '123  4\n' | ||||
|             '9999 51') | ||||
|  | ||||
|     def test_match_str(self): | ||||
|         self.assertRaises(ValueError, match_str, 'xy>foobar', {}) | ||||
|         self.assertFalse(match_str('xy', {'x': 1200})) | ||||
|         self.assertTrue(match_str('!xy', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x', {'x': 1200})) | ||||
|         self.assertFalse(match_str('!x', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x', {'x': 0})) | ||||
|         self.assertFalse(match_str('x>0', {'x': 0})) | ||||
|         self.assertFalse(match_str('x>0', {})) | ||||
|         self.assertTrue(match_str('x>?0', {})) | ||||
|         self.assertTrue(match_str('x>1K', {'x': 1200})) | ||||
|         self.assertFalse(match_str('x>2K', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) | ||||
|         self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) | ||||
|         self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) | ||||
|         self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) | ||||
|         self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) | ||||
|         self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 90, 'description': 'foo'})) | ||||
|         self.assertTrue(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'description': 'foo'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'dislike_count': 60, 'description': 'foo'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'dislike_count': 10})) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -8,11 +8,11 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| import io | ||||
| import re | ||||
| import string | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.compat import compat_str, compat_urlretrieve | ||||
|  | ||||
| @@ -64,6 +64,12 @@ _TESTS = [ | ||||
|         'js', | ||||
|         '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', | ||||
|         '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' | ||||
|     ), | ||||
|     ( | ||||
|         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', | ||||
|         'js', | ||||
|         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', | ||||
|         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', | ||||
|     ) | ||||
| ] | ||||
|  | ||||
| @@ -88,7 +94,8 @@ def make_tfunc(url, stype, sig_input, expected_sig): | ||||
|         if not os.path.exists(fn): | ||||
|             compat_urlretrieve(url, fn) | ||||
|  | ||||
|         ie = YoutubeIE() | ||||
|         ydl = FakeYDL() | ||||
|         ie = YoutubeIE(ydl) | ||||
|         if stype == 'js': | ||||
|             with io.open(fn, encoding='utf-8') as testf: | ||||
|                 jscode = testf.read() | ||||
|   | ||||
							
								
								
									
										52
									
								
								test/testcert.pem
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								test/testcert.pem
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| -----BEGIN PRIVATE KEY----- | ||||
| MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB | ||||
| HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp | ||||
| Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h | ||||
| A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev | ||||
| mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J | ||||
| aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP | ||||
| tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54 | ||||
| BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4 | ||||
| Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd | ||||
| 63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk | ||||
| ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE | ||||
| 8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw | ||||
| XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA | ||||
| G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o | ||||
| zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN | ||||
| 8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP | ||||
| gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ | ||||
| XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28 | ||||
| Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/ | ||||
| 98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ | ||||
| 1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S | ||||
| DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN | ||||
| Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB | ||||
| FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD | ||||
| pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z | ||||
| VJua1Wn8HKxnXMI61DhTCSo= | ||||
| -----END PRIVATE KEY----- | ||||
| -----BEGIN CERTIFICATE----- | ||||
| MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD | ||||
| VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG | ||||
| A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl | ||||
| c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw | ||||
| aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw | ||||
| CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb | ||||
| MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs | ||||
| IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh | ||||
| Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA | ||||
| zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU | ||||
| YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq | ||||
| O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8 | ||||
| A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh | ||||
| KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/ | ||||
| Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd | ||||
| ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD | ||||
| AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x | ||||
| 5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb | ||||
| PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6 | ||||
| cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu | ||||
| SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe | ||||
| Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK | ||||
| -----END CERTIFICATE----- | ||||
| @@ -25,6 +25,7 @@ if os.name == 'nt': | ||||
|     import ctypes | ||||
|  | ||||
| from .compat import ( | ||||
|     compat_basestring, | ||||
|     compat_cookiejar, | ||||
|     compat_expanduser, | ||||
|     compat_http_client, | ||||
| @@ -54,8 +55,10 @@ from .utils import ( | ||||
|     PostProcessingError, | ||||
|     platform_name, | ||||
|     preferredencoding, | ||||
|     render_table, | ||||
|     SameFileError, | ||||
|     sanitize_filename, | ||||
|     std_headers, | ||||
|     subtitles_filename, | ||||
|     takewhile_inclusive, | ||||
|     UnavailableVideoError, | ||||
| @@ -73,6 +76,7 @@ from .extractor import get_info_extractor, gen_extractors | ||||
| from .downloader import get_suitable_downloader | ||||
| from .downloader.rtmp import rtmpdump_version | ||||
| from .postprocessor import ( | ||||
|     FFmpegFixupM4aPP, | ||||
|     FFmpegFixupStretchedPP, | ||||
|     FFmpegMergerPP, | ||||
|     FFmpegPostProcessor, | ||||
| @@ -134,6 +138,7 @@ class YoutubeDL(object): | ||||
|     nooverwrites:      Prevent overwriting files. | ||||
|     playliststart:     Playlist item to start at. | ||||
|     playlistend:       Playlist item to end at. | ||||
|     playlist_items:    Specific indices of playlist to download. | ||||
|     playlistreverse:   Download playlist items in reverse order. | ||||
|     matchtitle:        Download only matching titles. | ||||
|     rejecttitle:       Reject downloads for matching titles. | ||||
| @@ -143,6 +148,7 @@ class YoutubeDL(object): | ||||
|     writeinfojson:     Write the video description to a .info.json file | ||||
|     writeannotations:  Write the video annotations to a .annotations.xml file | ||||
|     writethumbnail:    Write the thumbnail image to a file | ||||
|     write_all_thumbnails:  Write all thumbnail formats to files | ||||
|     writesubtitles:    Write the video subtitles to a file | ||||
|     writeautomaticsub: Write the automatic subtitles to a file | ||||
|     allsubtitles:      Downloads all the subtitles of the video | ||||
| @@ -193,17 +199,25 @@ class YoutubeDL(object): | ||||
|                        postprocessor. | ||||
|     progress_hooks:    A list of functions that get called on download | ||||
|                        progress, with a dictionary with the entries | ||||
|                        * filename: The final filename | ||||
|                        * status: One of "downloading" and "finished" | ||||
|  | ||||
|                        The dict may also have some of the following entries: | ||||
|                        * status: One of "downloading", "error", or "finished". | ||||
|                                  Check this first and ignore unknown values. | ||||
|  | ||||
|                        If status is one of "downloading", or "finished", the | ||||
|                        following properties may also be present: | ||||
|                        * filename: The final filename (always present) | ||||
|                        * tmpfilename: The filename we're currently writing to | ||||
|                        * downloaded_bytes: Bytes on disk | ||||
|                        * total_bytes: Size of the whole file, None if unknown | ||||
|                        * tmpfilename: The filename we're currently writing to | ||||
|                        * total_bytes_estimate: Guess of the eventual file size, | ||||
|                                                None if unavailable. | ||||
|                        * elapsed: The number of seconds since download started. | ||||
|                        * eta: The estimated time in seconds, None if unknown | ||||
|                        * speed: The download speed in bytes/second, None if | ||||
|                                 unknown | ||||
|                        * fragment_index: The counter of the currently | ||||
|                                          downloaded video fragment. | ||||
|                        * fragment_count: The number of fragments (= individual | ||||
|                                          files that will be merged) | ||||
|  | ||||
|                        Progress hooks are guaranteed to be called at least once | ||||
|                        (with status "finished") if the download is successful. | ||||
| @@ -213,16 +227,30 @@ class YoutubeDL(object): | ||||
|                        - "never": do nothing | ||||
|                        - "warn": only emit a warning | ||||
|                        - "detect_or_warn": check whether we can do anything | ||||
|                                            about it, warn otherwise | ||||
|                                            about it, warn otherwise (default) | ||||
|     source_address:    (Experimental) Client-side IP address to bind to. | ||||
|     call_home:         Boolean, true iff we are allowed to contact the | ||||
|                        youtube-dl servers for debugging. | ||||
|     sleep_interval:    Number of seconds to sleep before each download. | ||||
|     listformats:       Print an overview of available video formats and exit. | ||||
|     list_thumbnails:   Print a table of all thumbnails and exit. | ||||
|     match_filter:      A function that gets called with the info_dict of | ||||
|                        every video. | ||||
|                        If it returns a message, the video is ignored. | ||||
|                        If it returns None, the video is downloaded. | ||||
|                        match_filter_func in utils.py is one example for this. | ||||
|     no_color:          Do not emit color codes in output. | ||||
|  | ||||
|     The following options determine which downloader is picked: | ||||
|     external_downloader: Executable of the external downloader to call. | ||||
|                        None or unset for standard (built-in) downloader. | ||||
|     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. | ||||
|  | ||||
|     The following parameters are not used by YoutubeDL itself, they are used by | ||||
|     the FileDownloader: | ||||
|     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, | ||||
|     noresizebuffer, retries, continuedl, noprogress, consoletitle | ||||
|     noresizebuffer, retries, continuedl, noprogress, consoletitle, | ||||
|     xattr_set_filesize. | ||||
|  | ||||
|     The following options are used by the post processors: | ||||
|     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available, | ||||
| @@ -280,8 +308,8 @@ class YoutubeDL(object): | ||||
|                     raise | ||||
|  | ||||
|         if (sys.version_info >= (3,) and sys.platform != 'win32' and | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] | ||||
|                 and not params.get('restrictfilenames', False)): | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and | ||||
|                 not params.get('restrictfilenames', False)): | ||||
|             # On Python 3, the Unicode filesystem API will throw errors (#1474) | ||||
|             self.report_warning( | ||||
|                 'Assuming --restrict-filenames since file system encoding ' | ||||
| @@ -473,7 +501,7 @@ class YoutubeDL(object): | ||||
|         else: | ||||
|             if self.params.get('no_warnings'): | ||||
|                 return | ||||
|             if self._err_file.isatty() and os.name != 'nt': | ||||
|             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': | ||||
|                 _msg_header = '\033[0;33mWARNING:\033[0m' | ||||
|             else: | ||||
|                 _msg_header = 'WARNING:' | ||||
| @@ -485,7 +513,7 @@ class YoutubeDL(object): | ||||
|         Do the same as trouble, but prefixes the message with 'ERROR:', colored | ||||
|         in red if stderr is a tty file. | ||||
|         ''' | ||||
|         if self._err_file.isatty() and os.name != 'nt': | ||||
|         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': | ||||
|             _msg_header = '\033[0;31mERROR:\033[0m' | ||||
|         else: | ||||
|             _msg_header = 'ERROR:' | ||||
| @@ -532,12 +560,17 @@ class YoutubeDL(object): | ||||
|             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|             tmpl = compat_expanduser(outtmpl) | ||||
|             filename = tmpl % template_dict | ||||
|             # Temporary fix for #4787 | ||||
|             # 'Treat' all problem characters by passing filename through preferredencoding | ||||
|             # to workaround encoding issues with subprocess on python2 @ Windows | ||||
|             if sys.version_info < (3, 0) and sys.platform == 'win32': | ||||
|                 filename = encodeFilename(filename, True).decode(preferredencoding()) | ||||
|             return filename | ||||
|         except ValueError as err: | ||||
|             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') | ||||
|             return None | ||||
|  | ||||
|     def _match_entry(self, info_dict): | ||||
|     def _match_entry(self, info_dict, incomplete): | ||||
|         """ Returns None iff the file should be downloaded """ | ||||
|  | ||||
|         video_title = info_dict.get('title', info_dict.get('id', 'video')) | ||||
| @@ -566,9 +599,17 @@ class YoutubeDL(object): | ||||
|             if max_views is not None and view_count > max_views: | ||||
|                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) | ||||
|         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): | ||||
|             return 'Skipping "%s" because it is age restricted' % title | ||||
|             return 'Skipping "%s" because it is age restricted' % video_title | ||||
|         if self.in_download_archive(info_dict): | ||||
|             return '%s has already been recorded in archive' % video_title | ||||
|  | ||||
|         if not incomplete: | ||||
|             match_filter = self.params.get('match_filter') | ||||
|             if match_filter is not None: | ||||
|                 ret = match_filter(info_dict) | ||||
|                 if ret is not None: | ||||
|                     return ret | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -695,24 +736,51 @@ class YoutubeDL(object): | ||||
|             if playlistend == -1: | ||||
|                 playlistend = None | ||||
|  | ||||
|             playlistitems_str = self.params.get('playlist_items', None) | ||||
|             playlistitems = None | ||||
|             if playlistitems_str is not None: | ||||
|                 def iter_playlistitems(format): | ||||
|                     for string_segment in format.split(','): | ||||
|                         if '-' in string_segment: | ||||
|                             start, end = string_segment.split('-') | ||||
|                             for item in range(int(start), int(end) + 1): | ||||
|                                 yield int(item) | ||||
|                         else: | ||||
|                             yield int(string_segment) | ||||
|                 playlistitems = iter_playlistitems(playlistitems_str) | ||||
|  | ||||
|             ie_entries = ie_result['entries'] | ||||
|             if isinstance(ie_entries, list): | ||||
|                 n_all_entries = len(ie_entries) | ||||
|                 entries = ie_entries[playliststart:playlistend] | ||||
|                 if playlistitems: | ||||
|                     entries = [ie_entries[i - 1] for i in playlistitems] | ||||
|                 else: | ||||
|                     entries = ie_entries[playliststart:playlistend] | ||||
|                 n_entries = len(entries) | ||||
|                 self.to_screen( | ||||
|                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % | ||||
|                     (ie_result['extractor'], playlist, n_all_entries, n_entries)) | ||||
|             elif isinstance(ie_entries, PagedList): | ||||
|                 entries = ie_entries.getslice( | ||||
|                     playliststart, playlistend) | ||||
|                 if playlistitems: | ||||
|                     entries = [] | ||||
|                     for item in playlistitems: | ||||
|                         entries.extend(ie_entries.getslice( | ||||
|                             item - 1, item | ||||
|                         )) | ||||
|                 else: | ||||
|                     entries = ie_entries.getslice( | ||||
|                         playliststart, playlistend) | ||||
|                 n_entries = len(entries) | ||||
|                 self.to_screen( | ||||
|                     "[%s] playlist %s: Downloading %d videos" % | ||||
|                     (ie_result['extractor'], playlist, n_entries)) | ||||
|             else:  # iterable | ||||
|                 entries = list(itertools.islice( | ||||
|                     ie_entries, playliststart, playlistend)) | ||||
|                 if playlistitems: | ||||
|                     entry_list = list(ie_entries) | ||||
|                     entries = [entry_list[i - 1] for i in playlistitems] | ||||
|                 else: | ||||
|                     entries = list(itertools.islice( | ||||
|                         ie_entries, playliststart, playlistend)) | ||||
|                 n_entries = len(entries) | ||||
|                 self.to_screen( | ||||
|                     "[%s] playlist %s: Downloading %d videos" % | ||||
| @@ -735,7 +803,7 @@ class YoutubeDL(object): | ||||
|                     'extractor_key': ie_result['extractor_key'], | ||||
|                 } | ||||
|  | ||||
|                 reason = self._match_entry(entry) | ||||
|                 reason = self._match_entry(entry, incomplete=True) | ||||
|                 if reason is not None: | ||||
|                     self.to_screen('[download] ' + reason) | ||||
|                     continue | ||||
| @@ -782,27 +850,44 @@ class YoutubeDL(object): | ||||
|             '!=': operator.ne, | ||||
|         } | ||||
|         operator_rex = re.compile(r'''(?x)\s*\[ | ||||
|             (?P<key>width|height|tbr|abr|vbr|filesize) | ||||
|             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps) | ||||
|             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* | ||||
|             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) | ||||
|             \]$ | ||||
|             ''' % '|'.join(map(re.escape, OPERATORS.keys()))) | ||||
|         m = operator_rex.search(format_spec) | ||||
|         if m: | ||||
|             try: | ||||
|                 comparison_value = int(m.group('value')) | ||||
|             except ValueError: | ||||
|                 comparison_value = parse_filesize(m.group('value')) | ||||
|                 if comparison_value is None: | ||||
|                     comparison_value = parse_filesize(m.group('value') + 'B') | ||||
|                 if comparison_value is None: | ||||
|                     raise ValueError( | ||||
|                         'Invalid value %r in format specification %r' % ( | ||||
|                             m.group('value'), format_spec)) | ||||
|             op = OPERATORS[m.group('op')] | ||||
|  | ||||
|         if not m: | ||||
|             STR_OPERATORS = { | ||||
|                 '=': operator.eq, | ||||
|                 '!=': operator.ne, | ||||
|             } | ||||
|             str_operator_rex = re.compile(r'''(?x)\s*\[ | ||||
|                 \s*(?P<key>ext|acodec|vcodec|container|protocol) | ||||
|                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)? | ||||
|                 \s*(?P<value>[a-zA-Z0-9_-]+) | ||||
|                 \s*\]$ | ||||
|                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) | ||||
|             m = str_operator_rex.search(format_spec) | ||||
|             if m: | ||||
|                 comparison_value = m.group('value') | ||||
|                 op = STR_OPERATORS[m.group('op')] | ||||
|  | ||||
|         if not m: | ||||
|             raise ValueError('Invalid format specification %r' % format_spec) | ||||
|  | ||||
|         try: | ||||
|             comparison_value = int(m.group('value')) | ||||
|         except ValueError: | ||||
|             comparison_value = parse_filesize(m.group('value')) | ||||
|             if comparison_value is None: | ||||
|                 comparison_value = parse_filesize(m.group('value') + 'B') | ||||
|             if comparison_value is None: | ||||
|                 raise ValueError( | ||||
|                     'Invalid value %r in format specification %r' % ( | ||||
|                         m.group('value'), format_spec)) | ||||
|         op = OPERATORS[m.group('op')] | ||||
|  | ||||
|         def _filter(f): | ||||
|             actual_value = f.get(m.group('key')) | ||||
|             if actual_value is None: | ||||
| @@ -862,6 +947,24 @@ class YoutubeDL(object): | ||||
|                 return matches[-1] | ||||
|         return None | ||||
|  | ||||
|     def _calc_headers(self, info_dict): | ||||
|         res = std_headers.copy() | ||||
|  | ||||
|         add_headers = info_dict.get('http_headers') | ||||
|         if add_headers: | ||||
|             res.update(add_headers) | ||||
|  | ||||
|         cookies = self._calc_cookies(info_dict) | ||||
|         if cookies: | ||||
|             res['Cookie'] = cookies | ||||
|  | ||||
|         return res | ||||
|  | ||||
|     def _calc_cookies(self, info_dict): | ||||
|         pr = compat_urllib_request.Request(info_dict['url']) | ||||
|         self.cookiejar.add_cookie_header(pr) | ||||
|         return pr.get_header('Cookie') | ||||
|  | ||||
|     def process_video_result(self, info_dict, download=True): | ||||
|         assert info_dict.get('_type', 'video') == 'video' | ||||
|  | ||||
| @@ -876,12 +979,19 @@ class YoutubeDL(object): | ||||
|             info_dict['playlist_index'] = None | ||||
|  | ||||
|         thumbnails = info_dict.get('thumbnails') | ||||
|         if thumbnails is None: | ||||
|             thumbnail = info_dict.get('thumbnail') | ||||
|             if thumbnail: | ||||
|                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] | ||||
|         if thumbnails: | ||||
|             thumbnails.sort(key=lambda t: ( | ||||
|                 t.get('width'), t.get('height'), t.get('url'))) | ||||
|             for t in thumbnails: | ||||
|                 t.get('preference'), t.get('width'), t.get('height'), | ||||
|                 t.get('id'), t.get('url'))) | ||||
|             for i, t in enumerate(thumbnails): | ||||
|                 if 'width' in t and 'height' in t: | ||||
|                     t['resolution'] = '%dx%d' % (t['width'], t['height']) | ||||
|                 if t.get('id') is None: | ||||
|                     t['id'] = '%d' % i | ||||
|  | ||||
|         if thumbnails and 'thumbnail' not in info_dict: | ||||
|             info_dict['thumbnail'] = thumbnails[-1]['url'] | ||||
| @@ -930,6 +1040,11 @@ class YoutubeDL(object): | ||||
|             # Automatically determine file extension if missing | ||||
|             if 'ext' not in format: | ||||
|                 format['ext'] = determine_ext(format['url']).lower() | ||||
|             # Add HTTP headers, so that external programs can use them from the | ||||
|             # json output | ||||
|             full_format_info = info_dict.copy() | ||||
|             full_format_info.update(format) | ||||
|             format['http_headers'] = self._calc_headers(full_format_info) | ||||
|  | ||||
|         format_limit = self.params.get('format_limit', None) | ||||
|         if format_limit: | ||||
| @@ -945,9 +1060,12 @@ class YoutubeDL(object): | ||||
|             # element in the 'formats' field in info_dict is info_dict itself, | ||||
|             # wich can't be exported to json | ||||
|             info_dict['formats'] = formats | ||||
|         if self.params.get('listformats', None): | ||||
|         if self.params.get('listformats'): | ||||
|             self.list_formats(info_dict) | ||||
|             return | ||||
|         if self.params.get('list_thumbnails'): | ||||
|             self.list_thumbnails(info_dict) | ||||
|             return | ||||
|  | ||||
|         req_format = self.params.get('format') | ||||
|         if req_format is None: | ||||
| @@ -981,8 +1099,10 @@ class YoutubeDL(object): | ||||
|                                 else self.params['merge_output_format']) | ||||
|                             selected_format = { | ||||
|                                 'requested_formats': formats_info, | ||||
|                                 'format': rf, | ||||
|                                 'ext': formats_info[0]['ext'], | ||||
|                                 'format': '%s+%s' % (formats_info[0].get('format'), | ||||
|                                                      formats_info[1].get('format')), | ||||
|                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'), | ||||
|                                                         formats_info[1].get('format_id')), | ||||
|                                 'width': formats_info[0].get('width'), | ||||
|                                 'height': formats_info[0].get('height'), | ||||
|                                 'resolution': formats_info[0].get('resolution'), | ||||
| @@ -1036,14 +1156,14 @@ class YoutubeDL(object): | ||||
|         if 'format' not in info_dict: | ||||
|             info_dict['format'] = info_dict['ext'] | ||||
|  | ||||
|         reason = self._match_entry(info_dict) | ||||
|         reason = self._match_entry(info_dict, incomplete=False) | ||||
|         if reason is not None: | ||||
|             self.to_screen('[download] ' + reason) | ||||
|             return | ||||
|  | ||||
|         self._num_downloads += 1 | ||||
|  | ||||
|         filename = self.prepare_filename(info_dict) | ||||
|         info_dict['_filename'] = filename = self.prepare_filename(info_dict) | ||||
|  | ||||
|         # Forced printings | ||||
|         if self.params.get('forcetitle', False): | ||||
| @@ -1068,10 +1188,7 @@ class YoutubeDL(object): | ||||
|         if self.params.get('forceformat', False): | ||||
|             self.to_stdout(info_dict['format']) | ||||
|         if self.params.get('forcejson', False): | ||||
|             info_dict['_filename'] = filename | ||||
|             self.to_stdout(json.dumps(info_dict)) | ||||
|         if self.params.get('dump_single_json', False): | ||||
|             info_dict['_filename'] = filename | ||||
|  | ||||
|         # Do nothing else if in simulate mode | ||||
|         if self.params.get('simulate', False): | ||||
| @@ -1154,40 +1271,23 @@ class YoutubeDL(object): | ||||
|                     self.report_error('Cannot write metadata to JSON file ' + infofn) | ||||
|                     return | ||||
|  | ||||
|         if self.params.get('writethumbnail', False): | ||||
|             if info_dict.get('thumbnail') is not None: | ||||
|                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg') | ||||
|                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format | ||||
|                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): | ||||
|                     self.to_screen('[%s] %s: Thumbnail is already present' % | ||||
|                                    (info_dict['extractor'], info_dict['id'])) | ||||
|                 else: | ||||
|                     self.to_screen('[%s] %s: Downloading thumbnail ...' % | ||||
|                                    (info_dict['extractor'], info_dict['id'])) | ||||
|                     try: | ||||
|                         uf = self.urlopen(info_dict['thumbnail']) | ||||
|                         with open(thumb_filename, 'wb') as thumbf: | ||||
|                             shutil.copyfileobj(uf, thumbf) | ||||
|                         self.to_screen('[%s] %s: Writing thumbnail to: %s' % | ||||
|                                        (info_dict['extractor'], info_dict['id'], thumb_filename)) | ||||
|                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                         self.report_warning('Unable to download thumbnail "%s": %s' % | ||||
|                                             (info_dict['thumbnail'], compat_str(err))) | ||||
|         self._write_thumbnails(info_dict, filename) | ||||
|  | ||||
|         if not self.params.get('skip_download', False): | ||||
|             try: | ||||
|                 def dl(name, info): | ||||
|                     fd = get_suitable_downloader(info)(self, self.params) | ||||
|                     fd = get_suitable_downloader(info, self.params)(self, self.params) | ||||
|                     for ph in self._progress_hooks: | ||||
|                         fd.add_progress_hook(ph) | ||||
|                     if self.params.get('verbose'): | ||||
|                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) | ||||
|                     return fd.download(name, info) | ||||
|  | ||||
|                 if info_dict.get('requested_formats') is not None: | ||||
|                     downloaded = [] | ||||
|                     success = True | ||||
|                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) | ||||
|                     if not merger._executable: | ||||
|                     if not merger.available: | ||||
|                         postprocessors = [] | ||||
|                         self.report_warning('You have requested multiple ' | ||||
|                                             'formats but ffmpeg or avconv are not installed.' | ||||
| @@ -1218,11 +1318,12 @@ class YoutubeDL(object): | ||||
|  | ||||
|             if success: | ||||
|                 # Fixup content | ||||
|                 fixup_policy = self.params.get('fixup') | ||||
|                 if fixup_policy is None: | ||||
|                     fixup_policy = 'detect_or_warn' | ||||
|  | ||||
|                 stretched_ratio = info_dict.get('stretched_ratio') | ||||
|                 if stretched_ratio is not None and stretched_ratio != 1: | ||||
|                     fixup_policy = self.params.get('fixup') | ||||
|                     if fixup_policy is None: | ||||
|                         fixup_policy = 'detect_or_warn' | ||||
|                     if fixup_policy == 'warn': | ||||
|                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % ( | ||||
|                             info_dict['id'], stretched_ratio)) | ||||
| @@ -1236,7 +1337,23 @@ class YoutubeDL(object): | ||||
|                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( | ||||
|                                     info_dict['id'], stretched_ratio)) | ||||
|                     else: | ||||
|                         assert fixup_policy == 'ignore' | ||||
|                         assert fixup_policy in ('ignore', 'never') | ||||
|  | ||||
|                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': | ||||
|                     if fixup_policy == 'warn': | ||||
|                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( | ||||
|                             info_dict['id'])) | ||||
|                     elif fixup_policy == 'detect_or_warn': | ||||
|                         fixup_pp = FFmpegFixupM4aPP(self) | ||||
|                         if fixup_pp.available: | ||||
|                             info_dict.setdefault('__postprocessors', []) | ||||
|                             info_dict['__postprocessors'].append(fixup_pp) | ||||
|                         else: | ||||
|                             self.report_warning( | ||||
|                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( | ||||
|                                     info_dict['id'])) | ||||
|                     else: | ||||
|                         assert fixup_policy in ('ignore', 'never') | ||||
|  | ||||
|                 try: | ||||
|                     self.post_process(filename, info_dict) | ||||
| @@ -1249,8 +1366,8 @@ class YoutubeDL(object): | ||||
|         """Download a given list of URLs.""" | ||||
|         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|         if (len(url_list) > 1 and | ||||
|                 '%' not in outtmpl | ||||
|                 and self.params.get('max_downloads') != 1): | ||||
|                 '%' not in outtmpl and | ||||
|                 self.params.get('max_downloads') != 1): | ||||
|             raise SameFileError(outtmpl) | ||||
|  | ||||
|         for url in url_list: | ||||
| @@ -1417,29 +1534,35 @@ class YoutubeDL(object): | ||||
|         return res | ||||
|  | ||||
|     def list_formats(self, info_dict): | ||||
|         def line(format, idlen=20): | ||||
|             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( | ||||
|                 format['format_id'], | ||||
|                 format['ext'], | ||||
|                 self.format_resolution(format), | ||||
|                 self._format_note(format), | ||||
|             )) | ||||
|  | ||||
|         formats = info_dict.get('formats', [info_dict]) | ||||
|         idlen = max(len('format code'), | ||||
|                     max(len(f['format_id']) for f in formats)) | ||||
|         formats_s = [ | ||||
|             line(f, idlen) for f in formats | ||||
|         table = [ | ||||
|             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] | ||||
|             for f in formats | ||||
|             if f.get('preference') is None or f['preference'] >= -1000] | ||||
|         if len(formats) > 1: | ||||
|             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' | ||||
|             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' | ||||
|             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' | ||||
|  | ||||
|         header_line = line({ | ||||
|             'format_id': 'format code', 'ext': 'extension', | ||||
|             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) | ||||
|         self.to_screen('[info] Available formats for %s:\n%s\n%s' % | ||||
|                        (info_dict['id'], header_line, '\n'.join(formats_s))) | ||||
|         header_line = ['format code', 'extension', 'resolution', 'note'] | ||||
|         self.to_screen( | ||||
|             '[info] Available formats for %s:\n%s' % | ||||
|             (info_dict['id'], render_table(header_line, table))) | ||||
|  | ||||
|     def list_thumbnails(self, info_dict): | ||||
|         thumbnails = info_dict.get('thumbnails') | ||||
|         if not thumbnails: | ||||
|             tn_url = info_dict.get('thumbnail') | ||||
|             if tn_url: | ||||
|                 thumbnails = [{'id': '0', 'url': tn_url}] | ||||
|             else: | ||||
|                 self.to_screen( | ||||
|                     '[info] No thumbnails present for %s' % info_dict['id']) | ||||
|                 return | ||||
|  | ||||
|         self.to_screen( | ||||
|             '[info] Thumbnails for %s:' % info_dict['id']) | ||||
|         self.to_screen(render_table( | ||||
|             ['ID', 'width', 'height', 'URL'], | ||||
|             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) | ||||
|  | ||||
|     def urlopen(self, req): | ||||
|         """ Start an HTTP download """ | ||||
| @@ -1450,7 +1573,7 @@ class YoutubeDL(object): | ||||
|         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) | ||||
|         # To work around aforementioned issue we will replace request's original URL with | ||||
|         # percent-encoded one | ||||
|         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) | ||||
|         req_is_string = isinstance(req, compat_basestring) | ||||
|         url = req if req_is_string else req.get_full_url() | ||||
|         url_escaped = escape_url(url) | ||||
|  | ||||
| @@ -1502,7 +1625,7 @@ class YoutubeDL(object): | ||||
|         self._write_string('[debug] Python version %s - %s\n' % ( | ||||
|             platform.python_version(), platform_name())) | ||||
|  | ||||
|         exe_versions = FFmpegPostProcessor.get_versions() | ||||
|         exe_versions = FFmpegPostProcessor.get_versions(self) | ||||
|         exe_versions['rtmpdump'] = rtmpdump_version() | ||||
|         exe_str = ', '.join( | ||||
|             '%s %s' % (exe, v) | ||||
| @@ -1585,3 +1708,39 @@ class YoutubeDL(object): | ||||
|         if encoding is None: | ||||
|             encoding = preferredencoding() | ||||
|         return encoding | ||||
|  | ||||
|     def _write_thumbnails(self, info_dict, filename): | ||||
|         if self.params.get('writethumbnail', False): | ||||
|             thumbnails = info_dict.get('thumbnails') | ||||
|             if thumbnails: | ||||
|                 thumbnails = [thumbnails[-1]] | ||||
|         elif self.params.get('write_all_thumbnails', False): | ||||
|             thumbnails = info_dict.get('thumbnails') | ||||
|         else: | ||||
|             return | ||||
|  | ||||
|         if not thumbnails: | ||||
|             # No thumbnails present, so return immediately | ||||
|             return | ||||
|  | ||||
|         for t in thumbnails: | ||||
|             thumb_ext = determine_ext(t['url'], 'jpg') | ||||
|             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' | ||||
|             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' | ||||
|             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext | ||||
|  | ||||
|             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): | ||||
|                 self.to_screen('[%s] %s: Thumbnail %sis already present' % | ||||
|                                (info_dict['extractor'], info_dict['id'], thumb_display_id)) | ||||
|             else: | ||||
|                 self.to_screen('[%s] %s: Downloading thumbnail %s...' % | ||||
|                                (info_dict['extractor'], info_dict['id'], thumb_display_id)) | ||||
|                 try: | ||||
|                     uf = self.urlopen(t['url']) | ||||
|                     with open(thumb_filename, 'wb') as thumbf: | ||||
|                         shutil.copyfileobj(uf, thumbf) | ||||
|                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % | ||||
|                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) | ||||
|                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                     self.report_warning('Unable to download thumbnail "%s": %s' % | ||||
|                                         (t['url'], compat_str(err))) | ||||
|   | ||||
| @@ -23,9 +23,10 @@ from .compat import ( | ||||
| ) | ||||
| from .utils import ( | ||||
|     DateRange, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     decodeOption, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     DownloadError, | ||||
|     match_filter_func, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
|     read_batch_urls, | ||||
| @@ -143,10 +144,13 @@ def _real_main(argv=None): | ||||
|             parser.error('invalid max_filesize specified') | ||||
|         opts.max_filesize = numeric_limit | ||||
|     if opts.retries is not None: | ||||
|         try: | ||||
|             opts.retries = int(opts.retries) | ||||
|         except (TypeError, ValueError): | ||||
|             parser.error('invalid retry count specified') | ||||
|         if opts.retries in ('inf', 'infinite'): | ||||
|             opts_retries = float('inf') | ||||
|         else: | ||||
|             try: | ||||
|                 opts_retries = int(opts.retries) | ||||
|             except (TypeError, ValueError): | ||||
|                 parser.error('invalid retry count specified') | ||||
|     if opts.buffersize is not None: | ||||
|         numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) | ||||
|         if numeric_buffersize is None: | ||||
| @@ -185,14 +189,14 @@ def _real_main(argv=None): | ||||
|         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) | ||||
|         if opts.outtmpl is not None: | ||||
|             opts.outtmpl = opts.outtmpl.decode(preferredencoding()) | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) | ||||
|                or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.useid and '%(id)s.%(ext)s') | ||||
|                or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') | ||||
|                or DEFAULT_OUTTMPL) | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or | ||||
|                (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or | ||||
|                (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or | ||||
|                (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or | ||||
|                (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or | ||||
|                (opts.useid and '%(id)s.%(ext)s') or | ||||
|                (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or | ||||
|                DEFAULT_OUTTMPL) | ||||
|     if not os.path.splitext(outtmpl)[1] and opts.extractaudio: | ||||
|         parser.error('Cannot download a video and extract audio into the same' | ||||
|                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' | ||||
| @@ -238,6 +242,15 @@ def _real_main(argv=None): | ||||
|             'verboseOutput': opts.verbose, | ||||
|             'exec_cmd': opts.exec_cmd, | ||||
|         }) | ||||
|     if opts.xattr_set_filesize: | ||||
|         try: | ||||
|             import xattr | ||||
|             xattr  # Confuse flake8 | ||||
|         except ImportError: | ||||
|             parser.error('setting filesize xattr requested but python-xattr is not available') | ||||
|     match_filter = ( | ||||
|         None if opts.match_filter is None | ||||
|         else match_filter_func(opts.match_filter)) | ||||
|  | ||||
|     ydl_opts = { | ||||
|         'usenetrc': opts.usenetrc, | ||||
| @@ -268,7 +281,7 @@ def _real_main(argv=None): | ||||
|         'ignoreerrors': opts.ignoreerrors, | ||||
|         'ratelimit': opts.ratelimit, | ||||
|         'nooverwrites': opts.nooverwrites, | ||||
|         'retries': opts.retries, | ||||
|         'retries': opts_retries, | ||||
|         'buffersize': opts.buffersize, | ||||
|         'noresizebuffer': opts.noresizebuffer, | ||||
|         'continuedl': opts.continue_dl, | ||||
| @@ -286,6 +299,7 @@ def _real_main(argv=None): | ||||
|         'writeannotations': opts.writeannotations, | ||||
|         'writeinfojson': opts.writeinfojson, | ||||
|         'writethumbnail': opts.writethumbnail, | ||||
|         'write_all_thumbnails': opts.write_all_thumbnails, | ||||
|         'writesubtitles': opts.writesubtitles, | ||||
|         'writeautomaticsub': opts.writeautomaticsub, | ||||
|         'allsubtitles': opts.allsubtitles, | ||||
| @@ -329,6 +343,15 @@ def _real_main(argv=None): | ||||
|         'fixup': opts.fixup, | ||||
|         'source_address': opts.source_address, | ||||
|         'call_home': opts.call_home, | ||||
|         'sleep_interval': opts.sleep_interval, | ||||
|         'external_downloader': opts.external_downloader, | ||||
|         'list_thumbnails': opts.list_thumbnails, | ||||
|         'playlist_items': opts.playlist_items, | ||||
|         'xattr_set_filesize': opts.xattr_set_filesize, | ||||
|         'match_filter': match_filter, | ||||
|         'no_color': opts.no_color, | ||||
|         'ffmpeg_location': opts.ffmpeg_location, | ||||
|         'hls_prefer_native': opts.hls_prefer_native, | ||||
|     } | ||||
|  | ||||
|     with YoutubeDL(ydl_opts) as ydl: | ||||
| @@ -346,7 +369,9 @@ def _real_main(argv=None): | ||||
|                 sys.exit() | ||||
|  | ||||
|             ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) | ||||
|             parser.error('you must provide at least one URL') | ||||
|             parser.error( | ||||
|                 'You must provide at least one URL.\n' | ||||
|                 'Type youtube-dl --help to see a list of all options.') | ||||
|  | ||||
|         try: | ||||
|             if opts.load_info_filename is not None: | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] | ||||
|  | ||||
| import base64 | ||||
| from math import ceil | ||||
|  | ||||
| @@ -329,3 +327,5 @@ def inc(data): | ||||
|             data[i] = data[i] + 1 | ||||
|             break | ||||
|     return data | ||||
|  | ||||
| __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] | ||||
|   | ||||
| @@ -71,6 +71,11 @@ try: | ||||
| except ImportError: | ||||
|     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') | ||||
|  | ||||
| try: | ||||
|     import http.server as compat_http_server | ||||
| except ImportError: | ||||
|     import BaseHTTPServer as compat_http_server | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import unquote as compat_urllib_parse_unquote | ||||
| except ImportError: | ||||
| @@ -109,6 +114,26 @@ except ImportError: | ||||
|             string += pct_sequence.decode(encoding, errors) | ||||
|         return string | ||||
|  | ||||
| try: | ||||
|     compat_str = unicode  # Python 2 | ||||
| except NameError: | ||||
|     compat_str = str | ||||
|  | ||||
| try: | ||||
|     compat_basestring = basestring  # Python 2 | ||||
| except NameError: | ||||
|     compat_basestring = str | ||||
|  | ||||
| try: | ||||
|     compat_chr = unichr  # Python 2 | ||||
| except NameError: | ||||
|     compat_chr = chr | ||||
|  | ||||
| try: | ||||
|     from xml.etree.ElementTree import ParseError as compat_xml_parse_error | ||||
| except ImportError:  # Python 2.6 | ||||
|     from xml.parsers.expat import ExpatError as compat_xml_parse_error | ||||
|  | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import parse_qs as compat_parse_qs | ||||
| @@ -118,7 +143,7 @@ except ImportError:  # Python 2 | ||||
|  | ||||
|     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, | ||||
|                    encoding='utf-8', errors='replace'): | ||||
|         qs, _coerce_result = qs, unicode | ||||
|         qs, _coerce_result = qs, compat_str | ||||
|         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] | ||||
|         r = [] | ||||
|         for name_value in pairs: | ||||
| @@ -157,21 +182,6 @@ except ImportError:  # Python 2 | ||||
|                 parsed_result[name] = [value] | ||||
|         return parsed_result | ||||
|  | ||||
| try: | ||||
|     compat_str = unicode  # Python 2 | ||||
| except NameError: | ||||
|     compat_str = str | ||||
|  | ||||
| try: | ||||
|     compat_chr = unichr  # Python 2 | ||||
| except NameError: | ||||
|     compat_chr = chr | ||||
|  | ||||
| try: | ||||
|     from xml.etree.ElementTree import ParseError as compat_xml_parse_error | ||||
| except ImportError:  # Python 2.6 | ||||
|     from xml.parsers.expat import ExpatError as compat_xml_parse_error | ||||
|  | ||||
| try: | ||||
|     from shlex import quote as shlex_quote | ||||
| except ImportError:  # Python < 3.3 | ||||
| @@ -357,6 +367,7 @@ def workaround_optparse_bug9161(): | ||||
|  | ||||
| __all__ = [ | ||||
|     'compat_HTTPError', | ||||
|     'compat_basestring', | ||||
|     'compat_chr', | ||||
|     'compat_cookiejar', | ||||
|     'compat_expanduser', | ||||
| @@ -365,6 +376,7 @@ __all__ = [ | ||||
|     'compat_html_entities', | ||||
|     'compat_html_parser', | ||||
|     'compat_http_client', | ||||
|     'compat_http_server', | ||||
|     'compat_kwargs', | ||||
|     'compat_ord', | ||||
|     'compat_parse_qs', | ||||
|   | ||||
| @@ -1,35 +1,44 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from .external import get_external_downloader | ||||
| from .f4m import F4mFD | ||||
| from .hls import HlsFD | ||||
| from .hls import NativeHlsFD | ||||
| from .http import HttpFD | ||||
| from .mplayer import MplayerFD | ||||
| from .rtmp import RtmpFD | ||||
| from .f4m import F4mFD | ||||
|  | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     determine_protocol, | ||||
| ) | ||||
|  | ||||
| PROTOCOL_MAP = { | ||||
|     'rtmp': RtmpFD, | ||||
|     'm3u8_native': NativeHlsFD, | ||||
|     'm3u8': HlsFD, | ||||
|     'mms': MplayerFD, | ||||
|     'rtsp': MplayerFD, | ||||
|     'f4m': F4mFD, | ||||
| } | ||||
|  | ||||
| def get_suitable_downloader(info_dict): | ||||
|  | ||||
| def get_suitable_downloader(info_dict, params={}): | ||||
|     """Get the downloader class that can handle the info dict.""" | ||||
|     url = info_dict['url'] | ||||
|     protocol = info_dict.get('protocol') | ||||
|     protocol = determine_protocol(info_dict) | ||||
|     info_dict['protocol'] = protocol | ||||
|  | ||||
|     if url.startswith('rtmp'): | ||||
|         return RtmpFD | ||||
|     if protocol == 'm3u8_native': | ||||
|     external_downloader = params.get('external_downloader') | ||||
|     if external_downloader is not None: | ||||
|         ed = get_external_downloader(external_downloader) | ||||
|         if ed.supports(info_dict): | ||||
|             return ed | ||||
|  | ||||
|     if protocol == 'm3u8' and params.get('hls_prefer_native'): | ||||
|         return NativeHlsFD | ||||
|     if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'): | ||||
|         return HlsFD | ||||
|     if url.startswith('mms') or url.startswith('rtsp'): | ||||
|         return MplayerFD | ||||
|     if determine_ext(url) == 'f4m': | ||||
|         return F4mFD | ||||
|     else: | ||||
|         return HttpFD | ||||
|  | ||||
|     return PROTOCOL_MAP.get(protocol, HttpFD) | ||||
|  | ||||
|  | ||||
| __all__ = [ | ||||
|     'get_suitable_downloader', | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| @@ -25,21 +25,23 @@ class FileDownloader(object): | ||||
|  | ||||
|     Available options: | ||||
|  | ||||
|     verbose:           Print additional info to stdout. | ||||
|     quiet:             Do not print messages to stdout. | ||||
|     ratelimit:         Download speed limit, in bytes/sec. | ||||
|     retries:           Number of times to retry for HTTP error 5xx | ||||
|     buffersize:        Size of download buffer in bytes. | ||||
|     noresizebuffer:    Do not automatically resize the download buffer. | ||||
|     continuedl:        Try to continue downloads if possible. | ||||
|     noprogress:        Do not print the progress bar. | ||||
|     logtostderr:       Log messages to stderr instead of stdout. | ||||
|     consoletitle:      Display progress in console window's titlebar. | ||||
|     nopart:            Do not use temporary .part files. | ||||
|     updatetime:        Use the Last-modified header to set output file timestamps. | ||||
|     test:              Download only first bytes to test the downloader. | ||||
|     min_filesize:      Skip files smaller than this size | ||||
|     max_filesize:      Skip files larger than this size | ||||
|     verbose:            Print additional info to stdout. | ||||
|     quiet:              Do not print messages to stdout. | ||||
|     ratelimit:          Download speed limit, in bytes/sec. | ||||
|     retries:            Number of times to retry for HTTP error 5xx | ||||
|     buffersize:         Size of download buffer in bytes. | ||||
|     noresizebuffer:     Do not automatically resize the download buffer. | ||||
|     continuedl:         Try to continue downloads if possible. | ||||
|     noprogress:         Do not print the progress bar. | ||||
|     logtostderr:        Log messages to stderr instead of stdout. | ||||
|     consoletitle:       Display progress in console window's titlebar. | ||||
|     nopart:             Do not use temporary .part files. | ||||
|     updatetime:         Use the Last-modified header to set output file timestamps. | ||||
|     test:               Download only first bytes to test the downloader. | ||||
|     min_filesize:       Skip files smaller than this size | ||||
|     max_filesize:       Skip files larger than this size | ||||
|     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. | ||||
|                         (experimenatal) | ||||
|  | ||||
|     Subclasses of this one must re-define the real_download method. | ||||
|     """ | ||||
| @@ -52,6 +54,7 @@ class FileDownloader(object): | ||||
|         self.ydl = ydl | ||||
|         self._progress_hooks = [] | ||||
|         self.params = params | ||||
|         self.add_progress_hook(self.report_progress) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_seconds(seconds): | ||||
| @@ -224,42 +227,64 @@ class FileDownloader(object): | ||||
|             self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) | ||||
|         self.to_console_title('youtube-dl ' + msg) | ||||
|  | ||||
|     def report_progress(self, percent, data_len_str, speed, eta): | ||||
|         """Report download progress.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|     def report_progress(self, s): | ||||
|         if s['status'] == 'finished': | ||||
|             if self.params.get('noprogress', False): | ||||
|                 self.to_screen('[download] Download completed') | ||||
|             else: | ||||
|                 s['_total_bytes_str'] = format_bytes(s['total_bytes']) | ||||
|                 if s.get('elapsed') is not None: | ||||
|                     s['_elapsed_str'] = self.format_seconds(s['elapsed']) | ||||
|                     msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' | ||||
|                 else: | ||||
|                     msg_template = '100%% of %(_total_bytes_str)s' | ||||
|                 self._report_progress_status( | ||||
|                     msg_template % s, is_last_line=True) | ||||
|  | ||||
|         if self.params.get('noprogress'): | ||||
|             return | ||||
|         if eta is not None: | ||||
|             eta_str = self.format_eta(eta) | ||||
|         else: | ||||
|             eta_str = 'Unknown ETA' | ||||
|         if percent is not None: | ||||
|             percent_str = self.format_percent(percent) | ||||
|         else: | ||||
|             percent_str = 'Unknown %' | ||||
|         speed_str = self.format_speed(speed) | ||||
|  | ||||
|         msg = ('%s of %s at %s ETA %s' % | ||||
|                (percent_str, data_len_str, speed_str, eta_str)) | ||||
|         self._report_progress_status(msg) | ||||
|  | ||||
|     def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): | ||||
|         if self.params.get('noprogress', False): | ||||
|         if s['status'] != 'downloading': | ||||
|             return | ||||
|         downloaded_str = format_bytes(downloaded_data_len) | ||||
|         speed_str = self.format_speed(speed) | ||||
|         elapsed_str = FileDownloader.format_seconds(elapsed) | ||||
|         msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) | ||||
|         self._report_progress_status(msg) | ||||
|  | ||||
|     def report_finish(self, data_len_str, tot_time): | ||||
|         """Report download finished.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|             self.to_screen('[download] Download completed') | ||||
|         if s.get('eta') is not None: | ||||
|             s['_eta_str'] = self.format_eta(s['eta']) | ||||
|         else: | ||||
|             self._report_progress_status( | ||||
|                 ('100%% of %s in %s' % | ||||
|                  (data_len_str, self.format_seconds(tot_time))), | ||||
|                 is_last_line=True) | ||||
|             s['_eta_str'] = 'Unknown ETA' | ||||
|  | ||||
|         if s.get('total_bytes') and s.get('downloaded_bytes') is not None: | ||||
|             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) | ||||
|         elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: | ||||
|             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) | ||||
|         else: | ||||
|             if s.get('downloaded_bytes') == 0: | ||||
|                 s['_percent_str'] = self.format_percent(0) | ||||
|             else: | ||||
|                 s['_percent_str'] = 'Unknown %' | ||||
|  | ||||
|         if s.get('speed') is not None: | ||||
|             s['_speed_str'] = self.format_speed(s['speed']) | ||||
|         else: | ||||
|             s['_speed_str'] = 'Unknown speed' | ||||
|  | ||||
|         if s.get('total_bytes') is not None: | ||||
|             s['_total_bytes_str'] = format_bytes(s['total_bytes']) | ||||
|             msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' | ||||
|         elif s.get('total_bytes_estimate') is not None: | ||||
|             s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) | ||||
|             msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' | ||||
|         else: | ||||
|             if s.get('downloaded_bytes') is not None: | ||||
|                 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) | ||||
|                 if s.get('elapsed'): | ||||
|                     s['_elapsed_str'] = self.format_seconds(s['elapsed']) | ||||
|                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' | ||||
|                 else: | ||||
|                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' | ||||
|             else: | ||||
|                 msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' | ||||
|  | ||||
|         self._report_progress_status(msg_template % s) | ||||
|  | ||||
|     def report_resuming_byte(self, resume_len): | ||||
|         """Report attempt to resume at given byte.""" | ||||
| @@ -284,15 +309,16 @@ class FileDownloader(object): | ||||
|         """Download to a filename using the info from info_dict | ||||
|         Return True on success and False otherwise | ||||
|         """ | ||||
|  | ||||
|         nooverwrites_and_exists = ( | ||||
|             self.params.get('nooverwrites', False) | ||||
|             and os.path.exists(encodeFilename(filename)) | ||||
|             self.params.get('nooverwrites', False) and | ||||
|             os.path.exists(encodeFilename(filename)) | ||||
|         ) | ||||
|  | ||||
|         continuedl_and_exists = ( | ||||
|             self.params.get('continuedl', False) | ||||
|             and os.path.isfile(encodeFilename(filename)) | ||||
|             and not self.params.get('nopart', False) | ||||
|             self.params.get('continuedl', False) and | ||||
|             os.path.isfile(encodeFilename(filename)) and | ||||
|             not self.params.get('nopart', False) | ||||
|         ) | ||||
|  | ||||
|         # Check file already present | ||||
| @@ -305,6 +331,11 @@ class FileDownloader(object): | ||||
|             }) | ||||
|             return True | ||||
|  | ||||
|         sleep_interval = self.params.get('sleep_interval') | ||||
|         if sleep_interval: | ||||
|             self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) | ||||
|             time.sleep(sleep_interval) | ||||
|  | ||||
|         return self.real_download(filename, info_dict) | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
| @@ -319,3 +350,24 @@ class FileDownloader(object): | ||||
|         # See YoutubeDl.py (search for progress_hooks) for a description of | ||||
|         # this interface | ||||
|         self._progress_hooks.append(ph) | ||||
|  | ||||
|     def _debug_cmd(self, args, subprocess_encoding, exe=None): | ||||
|         if not self.params.get('verbose', False): | ||||
|             return | ||||
|  | ||||
|         if exe is None: | ||||
|             exe = os.path.basename(args[0]) | ||||
|  | ||||
|         if subprocess_encoding: | ||||
|             str_args = [ | ||||
|                 a.decode(subprocess_encoding) if isinstance(a, bytes) else a | ||||
|                 for a in args] | ||||
|         else: | ||||
|             str_args = args | ||||
|         try: | ||||
|             import pipes | ||||
|             shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) | ||||
|         except ImportError: | ||||
|             shell_quote = repr | ||||
|         self.to_screen('[debug] %s command line: %s' % ( | ||||
|             exe, shell_quote(str_args))) | ||||
|   | ||||
							
								
								
									
										126
									
								
								youtube_dl/downloader/external.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								youtube_dl/downloader/external.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,126 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os.path | ||||
| import subprocess | ||||
| import sys | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..utils import ( | ||||
|     encodeFilename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ExternalFD(FileDownloader): | ||||
|     def real_download(self, filename, info_dict): | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         retval = self._call_downloader(tmpfilename, info_dict) | ||||
|         if retval == 0: | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
|                 'total_bytes': fsize, | ||||
|                 'filename': filename, | ||||
|                 'status': 'finished', | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('%s exited with code %d' % ( | ||||
|                 self.get_basename(), retval)) | ||||
|             return False | ||||
|  | ||||
|     @classmethod | ||||
|     def get_basename(cls): | ||||
|         return cls.__name__[:-2].lower() | ||||
|  | ||||
|     @property | ||||
|     def exe(self): | ||||
|         return self.params.get('external_downloader') | ||||
|  | ||||
|     @classmethod | ||||
|     def supports(cls, info_dict): | ||||
|         return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') | ||||
|  | ||||
|     def _source_address(self, command_option): | ||||
|         source_address = self.params.get('source_address') | ||||
|         if source_address is None: | ||||
|             return [] | ||||
|         return [command_option, source_address] | ||||
|  | ||||
|     def _call_downloader(self, tmpfilename, info_dict): | ||||
|         """ Either overwrite this or implement _make_cmd """ | ||||
|         cmd = self._make_cmd(tmpfilename, info_dict) | ||||
|  | ||||
|         if sys.platform == 'win32' and sys.version_info < (3, 0): | ||||
|             # Windows subprocess module does not actually support Unicode | ||||
|             # on Python 2.x | ||||
|             # See http://stackoverflow.com/a/9951851/35070 | ||||
|             subprocess_encoding = sys.getfilesystemencoding() | ||||
|             cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd] | ||||
|         else: | ||||
|             subprocess_encoding = None | ||||
|         self._debug_cmd(cmd, subprocess_encoding) | ||||
|  | ||||
|         p = subprocess.Popen( | ||||
|             cmd, stderr=subprocess.PIPE) | ||||
|         _, stderr = p.communicate() | ||||
|         if p.returncode != 0: | ||||
|             self.to_stderr(stderr) | ||||
|         return p.returncode | ||||
|  | ||||
|  | ||||
| class CurlFD(ExternalFD): | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = [self.exe, '--location', '-o', tmpfilename] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._source_address('--interface') | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|  | ||||
| class WgetFD(ExternalFD): | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._source_address('--bind-address') | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|  | ||||
| class Aria2cFD(ExternalFD): | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = [ | ||||
|             self.exe, '-c', | ||||
|             '--min-split-size', '1M', '--max-connection-per-server', '4'] | ||||
|         dn = os.path.dirname(tmpfilename) | ||||
|         if dn: | ||||
|             cmd += ['--dir', dn] | ||||
|         cmd += ['--out', os.path.basename(tmpfilename)] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._source_address('--interface') | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
| _BY_NAME = dict( | ||||
|     (klass.get_basename(), klass) | ||||
|     for name, klass in globals().items() | ||||
|     if name.endswith('FD') and name != 'ExternalFD' | ||||
| ) | ||||
|  | ||||
|  | ||||
| def list_external_downloaders(): | ||||
|     return sorted(_BY_NAME.keys()) | ||||
|  | ||||
|  | ||||
| def get_external_downloader(external_downloader): | ||||
|     """ Given the name of the executable, see whether we support the given | ||||
|         downloader . """ | ||||
|     bn = os.path.basename(external_downloader) | ||||
|     return _BY_NAME[bn] | ||||
| @@ -1,4 +1,4 @@ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import io | ||||
| @@ -15,7 +15,6 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     struct_pack, | ||||
|     struct_unpack, | ||||
|     format_bytes, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     xpath_text, | ||||
| @@ -177,13 +176,12 @@ def build_fragments_list(boot_info): | ||||
|     """ Return a list of (segment, fragment) for each fragment in the video """ | ||||
|     res = [] | ||||
|     segment_run_table = boot_info['segments'][0] | ||||
|     # I've only found videos with one segment | ||||
|     segment_run_entry = segment_run_table['segment_run'][0] | ||||
|     n_frags = segment_run_entry[1] | ||||
|     fragment_run_entry_table = boot_info['fragments'][0]['fragments'] | ||||
|     first_frag_number = fragment_run_entry_table[0]['first'] | ||||
|     for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)): | ||||
|         res.append((1, frag_number)) | ||||
|     fragments_counter = itertools.count(first_frag_number) | ||||
|     for segment, fragments_count in segment_run_table['segment_run']: | ||||
|         for _ in range(fragments_count): | ||||
|             res.append((segment, next(fragments_counter))) | ||||
|     return res | ||||
|  | ||||
|  | ||||
| @@ -231,25 +229,32 @@ class F4mFD(FileDownloader): | ||||
|     A downloader for f4m manifests or AdobeHDS. | ||||
|     """ | ||||
|  | ||||
|     def _get_unencrypted_media(self, doc): | ||||
|         media = doc.findall(_add_ns('media')) | ||||
|         if not media: | ||||
|             self.report_error('No media found') | ||||
|         for e in (doc.findall(_add_ns('drmAdditionalHeader')) + | ||||
|                   doc.findall(_add_ns('drmAdditionalHeaderSet'))): | ||||
|             # If id attribute is missing it's valid for all media nodes | ||||
|             # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute | ||||
|             if 'id' not in e.attrib: | ||||
|                 self.report_error('Missing ID in f4m DRM') | ||||
|         media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and | ||||
|                                       'drmAdditionalHeaderSetId' not in e.attrib, | ||||
|                             media)) | ||||
|         if not media: | ||||
|             self.report_error('Unsupported DRM') | ||||
|         return media | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
|         requested_bitrate = info_dict.get('tbr') | ||||
|         self.to_screen('[download] Downloading f4m manifest') | ||||
|         manifest = self.ydl.urlopen(man_url).read() | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit', None), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         doc = etree.fromstring(manifest) | ||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] | ||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) | ||||
|                    for f in self._get_unencrypted_media(doc)] | ||||
|         if requested_bitrate is None: | ||||
|             # get the best format | ||||
|             formats = sorted(formats, key=lambda f: f[0]) | ||||
| @@ -281,39 +286,65 @@ class F4mFD(FileDownloader): | ||||
|         # For some akamai manifests we'll need to add a query to the fragment url | ||||
|         akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | ||||
|  | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit', None), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | ||||
|  | ||||
|         write_flv_header(dest_stream) | ||||
|         write_metadata_tag(dest_stream, metadata) | ||||
|  | ||||
|         # This dict stores the download progress, it's updated by the progress | ||||
|         # hook | ||||
|         state = { | ||||
|             'status': 'downloading', | ||||
|             'downloaded_bytes': 0, | ||||
|             'frag_counter': 0, | ||||
|             'frag_index': 0, | ||||
|             'frag_count': total_frags, | ||||
|             'filename': filename, | ||||
|             'tmpfilename': tmpfilename, | ||||
|         } | ||||
|         start = time.time() | ||||
|  | ||||
|         def frag_progress_hook(status): | ||||
|             frag_total_bytes = status.get('total_bytes', 0) | ||||
|             estimated_size = (state['downloaded_bytes'] + | ||||
|                               (total_frags - state['frag_counter']) * frag_total_bytes) | ||||
|             if status['status'] == 'finished': | ||||
|         def frag_progress_hook(s): | ||||
|             if s['status'] not in ('downloading', 'finished'): | ||||
|                 return | ||||
|  | ||||
|             frag_total_bytes = s.get('total_bytes', 0) | ||||
|             if s['status'] == 'finished': | ||||
|                 state['downloaded_bytes'] += frag_total_bytes | ||||
|                 state['frag_counter'] += 1 | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 byte_counter = state['downloaded_bytes'] | ||||
|                 state['frag_index'] += 1 | ||||
|  | ||||
|             estimated_size = ( | ||||
|                 (state['downloaded_bytes'] + frag_total_bytes) / | ||||
|                 (state['frag_index'] + 1) * total_frags) | ||||
|             time_now = time.time() | ||||
|             state['total_bytes_estimate'] = estimated_size | ||||
|             state['elapsed'] = time_now - start | ||||
|  | ||||
|             if s['status'] == 'finished': | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|             else: | ||||
|                 frag_downloaded_bytes = status['downloaded_bytes'] | ||||
|                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes | ||||
|                 frag_downloaded_bytes = s['downloaded_bytes'] | ||||
|                 frag_progress = self.calc_percent(frag_downloaded_bytes, | ||||
|                                                   frag_total_bytes) | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|                 progress += frag_progress / float(total_frags) | ||||
|  | ||||
|             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) | ||||
|             self.report_progress(progress, format_bytes(estimated_size), | ||||
|                                  status.get('speed'), eta) | ||||
|                 state['eta'] = self.calc_eta( | ||||
|                     start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) | ||||
|                 state['speed'] = s.get('speed') | ||||
|             self._hook_progress(state) | ||||
|  | ||||
|         http_dl.add_progress_hook(frag_progress_hook) | ||||
|  | ||||
|         frags_filenames = [] | ||||
| @@ -337,8 +368,8 @@ class F4mFD(FileDownloader): | ||||
|             frags_filenames.append(frag_filename) | ||||
|  | ||||
|         dest_stream.close() | ||||
|         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) | ||||
|  | ||||
|         elapsed = time.time() - start | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
| @@ -349,6 +380,7 @@ class F4mFD(FileDownloader): | ||||
|             'total_bytes': fsize, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|             'elapsed': elapsed, | ||||
|         }) | ||||
|  | ||||
|         return True | ||||
|   | ||||
| @@ -11,6 +11,7 @@ from ..compat import ( | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     encodeArgument, | ||||
|     encodeFilename, | ||||
| ) | ||||
|  | ||||
| @@ -21,23 +22,21 @@ class HlsFD(FileDownloader): | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         args = [ | ||||
|             '-y', '-i', url, '-f', 'mp4', '-c', 'copy', | ||||
|             '-bsf:a', 'aac_adtstoasc', | ||||
|             encodeFilename(tmpfilename, for_subprocess=True)] | ||||
|  | ||||
|         ffpp = FFmpegPostProcessor(downloader=self) | ||||
|         program = ffpp._executable | ||||
|         if program is None: | ||||
|         if not ffpp.available: | ||||
|             self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') | ||||
|             return False | ||||
|         ffpp.check_version() | ||||
|         cmd = [program] + args | ||||
|  | ||||
|         retval = subprocess.call(cmd) | ||||
|         args = [ | ||||
|             encodeArgument(opt) | ||||
|             for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] | ||||
|         args.append(encodeFilename(tmpfilename, True)) | ||||
|  | ||||
|         retval = subprocess.call(args) | ||||
|         if retval == 0: | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize)) | ||||
|             self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
| @@ -48,7 +47,7 @@ class HlsFD(FileDownloader): | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('%s exited with code %d' % (program, retval)) | ||||
|             self.report_error('%s exited with code %d' % (ffpp.basename, retval)) | ||||
|             return False | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import errno | ||||
| import os | ||||
| import socket | ||||
| import time | ||||
|  | ||||
| from .common import FileDownloader | ||||
| @@ -12,7 +14,6 @@ from ..utils import ( | ||||
|     ContentTooShortError, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     format_bytes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -24,10 +25,6 @@ class HttpFD(FileDownloader): | ||||
|  | ||||
|         # Do not include the Accept-Encoding header | ||||
|         headers = {'Youtubedl-no-compression': 'True'} | ||||
|         if 'user_agent' in info_dict: | ||||
|             headers['Youtubedl-user-agent'] = info_dict['user_agent'] | ||||
|         if 'http_referer' in info_dict: | ||||
|             headers['Referer'] = info_dict['http_referer'] | ||||
|         add_headers = info_dict.get('http_headers') | ||||
|         if add_headers: | ||||
|             headers.update(add_headers) | ||||
| @@ -103,6 +100,11 @@ class HttpFD(FileDownloader): | ||||
|                             resume_len = 0 | ||||
|                             open_mode = 'wb' | ||||
|                             break | ||||
|             except socket.error as e: | ||||
|                 if e.errno != errno.ECONNRESET: | ||||
|                     # Connection reset is no problem, just retry | ||||
|                     raise | ||||
|  | ||||
|             # Retry | ||||
|             count += 1 | ||||
|             if count <= retries: | ||||
| @@ -133,7 +135,6 @@ class HttpFD(FileDownloader): | ||||
|                 self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) | ||||
|                 return False | ||||
|  | ||||
|         data_len_str = format_bytes(data_len) | ||||
|         byte_counter = 0 + resume_len | ||||
|         block_size = self.params.get('buffersize', 1024) | ||||
|         start = time.time() | ||||
| @@ -161,6 +162,14 @@ class HttpFD(FileDownloader): | ||||
|                 except (OSError, IOError) as err: | ||||
|                     self.report_error('unable to open for writing: %s' % str(err)) | ||||
|                     return False | ||||
|  | ||||
|                 if self.params.get('xattr_set_filesize', False) and data_len is not None: | ||||
|                     try: | ||||
|                         import xattr | ||||
|                         xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) | ||||
|                     except(OSError, IOError, ImportError) as err: | ||||
|                         self.report_error('unable to set filesize xattr: %s' % str(err)) | ||||
|  | ||||
|             try: | ||||
|                 stream.write(data_block) | ||||
|             except (IOError, OSError) as err: | ||||
| @@ -184,20 +193,19 @@ class HttpFD(FileDownloader): | ||||
|             # Progress message | ||||
|             speed = self.calc_speed(start, now, byte_counter - resume_len) | ||||
|             if data_len is None: | ||||
|                 eta = percent = None | ||||
|                 eta = None | ||||
|             else: | ||||
|                 percent = self.calc_percent(byte_counter, data_len) | ||||
|                 eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) | ||||
|             self.report_progress(percent, data_len_str, speed, eta) | ||||
|  | ||||
|             self._hook_progress({ | ||||
|                 'status': 'downloading', | ||||
|                 'downloaded_bytes': byte_counter, | ||||
|                 'total_bytes': data_len, | ||||
|                 'tmpfilename': tmpfilename, | ||||
|                 'filename': filename, | ||||
|                 'status': 'downloading', | ||||
|                 'eta': eta, | ||||
|                 'speed': speed, | ||||
|                 'elapsed': now - start, | ||||
|             }) | ||||
|  | ||||
|             if is_test and byte_counter == data_len: | ||||
| @@ -209,7 +217,13 @@ class HttpFD(FileDownloader): | ||||
|             return False | ||||
|         if tmpfilename != '-': | ||||
|             stream.close() | ||||
|         self.report_finish(data_len_str, (time.time() - start)) | ||||
|  | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': byte_counter, | ||||
|             'total_bytes': data_len, | ||||
|             'tmpfilename': tmpfilename, | ||||
|             'status': 'error', | ||||
|         }) | ||||
|         if data_len is not None and byte_counter != data_len: | ||||
|             raise ContentTooShortError(byte_counter, int(data_len)) | ||||
|         self.try_rename(tmpfilename, filename) | ||||
| @@ -223,6 +237,7 @@ class HttpFD(FileDownloader): | ||||
|             'total_bytes': byte_counter, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|             'elapsed': time.time() - start, | ||||
|         }) | ||||
|  | ||||
|         return True | ||||
|   | ||||
| @@ -11,7 +11,6 @@ from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     check_executable, | ||||
|     encodeFilename, | ||||
|     format_bytes, | ||||
|     get_exe_version, | ||||
| ) | ||||
|  | ||||
| @@ -51,23 +50,23 @@ class RtmpFD(FileDownloader): | ||||
|                     if not resume_percent: | ||||
|                         resume_percent = percent | ||||
|                         resume_downloaded_data_len = downloaded_data_len | ||||
|                     eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent) | ||||
|                     speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len) | ||||
|                     time_now = time.time() | ||||
|                     eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) | ||||
|                     speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) | ||||
|                     data_len = None | ||||
|                     if percent > 0: | ||||
|                         data_len = int(downloaded_data_len * 100 / percent) | ||||
|                     data_len_str = '~' + format_bytes(data_len) | ||||
|                     self.report_progress(percent, data_len_str, speed, eta) | ||||
|                     cursor_in_new_line = False | ||||
|                     self._hook_progress({ | ||||
|                         'status': 'downloading', | ||||
|                         'downloaded_bytes': downloaded_data_len, | ||||
|                         'total_bytes': data_len, | ||||
|                         'total_bytes_estimate': data_len, | ||||
|                         'tmpfilename': tmpfilename, | ||||
|                         'filename': filename, | ||||
|                         'status': 'downloading', | ||||
|                         'eta': eta, | ||||
|                         'elapsed': time_now - start, | ||||
|                         'speed': speed, | ||||
|                     }) | ||||
|                     cursor_in_new_line = False | ||||
|                 else: | ||||
|                     # no percent for live streams | ||||
|                     mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) | ||||
| @@ -75,15 +74,15 @@ class RtmpFD(FileDownloader): | ||||
|                         downloaded_data_len = int(float(mobj.group(1)) * 1024) | ||||
|                         time_now = time.time() | ||||
|                         speed = self.calc_speed(start, time_now, downloaded_data_len) | ||||
|                         self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) | ||||
|                         cursor_in_new_line = False | ||||
|                         self._hook_progress({ | ||||
|                             'downloaded_bytes': downloaded_data_len, | ||||
|                             'tmpfilename': tmpfilename, | ||||
|                             'filename': filename, | ||||
|                             'status': 'downloading', | ||||
|                             'elapsed': time_now - start, | ||||
|                             'speed': speed, | ||||
|                         }) | ||||
|                         cursor_in_new_line = False | ||||
|                     elif self.params.get('verbose', False): | ||||
|                         if not cursor_in_new_line: | ||||
|                             self.to_screen('') | ||||
| @@ -104,6 +103,9 @@ class RtmpFD(FileDownloader): | ||||
|         live = info_dict.get('rtmp_live', False) | ||||
|         conn = info_dict.get('rtmp_conn', None) | ||||
|         protocol = info_dict.get('rtmp_protocol', None) | ||||
|         real_time = info_dict.get('rtmp_real_time', False) | ||||
|         no_resume = info_dict.get('no_resume', False) | ||||
|         continue_dl = info_dict.get('continuedl', False) | ||||
|  | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
| @@ -141,7 +143,14 @@ class RtmpFD(FileDownloader): | ||||
|             basic_args += ['--conn', conn] | ||||
|         if protocol is not None: | ||||
|             basic_args += ['--protocol', protocol] | ||||
|         args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)] | ||||
|         if real_time: | ||||
|             basic_args += ['--realtime'] | ||||
|  | ||||
|         args = basic_args | ||||
|         if not no_resume and continue_dl and not live: | ||||
|             args += ['--resume'] | ||||
|         if not live and continue_dl: | ||||
|             args += ['--skip', '1'] | ||||
|  | ||||
|         if sys.platform == 'win32' and sys.version_info < (3, 0): | ||||
|             # Windows subprocess module does not actually support Unicode | ||||
| @@ -152,19 +161,7 @@ class RtmpFD(FileDownloader): | ||||
|         else: | ||||
|             subprocess_encoding = None | ||||
|  | ||||
|         if self.params.get('verbose', False): | ||||
|             if subprocess_encoding: | ||||
|                 str_args = [ | ||||
|                     a.decode(subprocess_encoding) if isinstance(a, bytes) else a | ||||
|                     for a in args] | ||||
|             else: | ||||
|                 str_args = args | ||||
|             try: | ||||
|                 import pipes | ||||
|                 shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) | ||||
|             except ImportError: | ||||
|                 shell_quote = repr | ||||
|             self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args)) | ||||
|         self._debug_cmd(args, subprocess_encoding, exe='rtmpdump') | ||||
|  | ||||
|         RD_SUCCESS = 0 | ||||
|         RD_FAILED = 1 | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE | ||||
| from .addanime import AddAnimeIE | ||||
| from .adobetv import AdobeTVIE | ||||
| from .adultswim import AdultSwimIE | ||||
| from .aftenposten import AftenpostenIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .aljazeera import AlJazeeraIE | ||||
| from .alphaporno import AlphaPornoIE | ||||
| @@ -29,7 +30,6 @@ from .arte import ( | ||||
| from .atresplayer import AtresPlayerIE | ||||
| from .atttechchannel import ATTTechChannelIE | ||||
| from .audiomack import AudiomackIE, AudiomackAlbumIE | ||||
| from .auengine import AUEngineIE | ||||
| from .azubu import AzubuIE | ||||
| from .bambuser import BambuserIE, BambuserChannelIE | ||||
| from .bandcamp import BandcampIE, BandcampAlbumIE | ||||
| @@ -49,14 +49,24 @@ from .brightcove import BrightcoveIE | ||||
| from .buzzfeed import BuzzFeedIE | ||||
| from .byutv import BYUtvIE | ||||
| from .c56 import C56IE | ||||
| from .camdemy import ( | ||||
|     CamdemyIE, | ||||
|     CamdemyFolderIE | ||||
| ) | ||||
| from .canal13cl import Canal13clIE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cbs import CBSIE | ||||
| from .cbsnews import CBSNewsIE | ||||
| from .cbssports import CBSSportsIE | ||||
| from .ccc import CCCIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .chirbit import ( | ||||
|     ChirbitIE, | ||||
|     ChirbitProfileIE, | ||||
| ) | ||||
| from .cinchcast import CinchcastIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| @@ -74,7 +84,7 @@ from .collegehumor import CollegeHumorIE | ||||
| from .collegerama import CollegeRamaIE | ||||
| from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE | ||||
| from .comcarcoff import ComCarCoffIE | ||||
| from .commonmistakes import CommonMistakesIE | ||||
| from .commonmistakes import CommonMistakesIE, UnicodeBOMIE | ||||
| from .condenast import CondeNastIE | ||||
| from .cracked import CrackedIE | ||||
| from .criterion import CriterionIE | ||||
| @@ -83,6 +93,7 @@ from .crunchyroll import ( | ||||
|     CrunchyrollShowPlaylistIE | ||||
| ) | ||||
| from .cspan import CSpanIE | ||||
| from .ctsnews import CtsNewsIE | ||||
| from .dailymotion import ( | ||||
|     DailymotionIE, | ||||
|     DailymotionPlaylistIE, | ||||
| @@ -90,6 +101,7 @@ from .dailymotion import ( | ||||
| ) | ||||
| from .daum import DaumIE | ||||
| from .dbtv import DBTVIE | ||||
| from .dctp import DctpTvIE | ||||
| from .deezer import DeezerPlaylistIE | ||||
| from .dfb import DFBIE | ||||
| from .dotsub import DotsubIE | ||||
| @@ -114,6 +126,7 @@ from .ellentv import ( | ||||
|     EllenTVClipsIE, | ||||
| ) | ||||
| from .elpais import ElPaisIE | ||||
| from .embedly import EmbedlyIE | ||||
| from .empflix import EMPFlixIE | ||||
| from .engadget import EngadgetIE | ||||
| from .eporner import EpornerIE | ||||
| @@ -181,6 +194,8 @@ from .heise import HeiseIE | ||||
| from .hellporno import HellPornoIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| from .historicfilms import HistoricFilmsIE | ||||
| from .history import HistoryIE | ||||
| from .hitbox import HitboxIE, HitboxLiveIE | ||||
| from .hornbunny import HornBunnyIE | ||||
| from .hostingbulk import HostingBulkIE | ||||
| @@ -195,6 +210,7 @@ from .imdb import ( | ||||
|     ImdbIE, | ||||
|     ImdbListIE | ||||
| ) | ||||
| from .imgur import ImgurIE | ||||
| from .ina import InaIE | ||||
| from .infoq import InfoQIE | ||||
| from .instagram import InstagramIE, InstagramUserIE | ||||
| @@ -273,6 +289,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .myvidster import MyVidsterIE | ||||
| from .nationalgeographic import NationalGeographicIE | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nbc import ( | ||||
| @@ -283,11 +300,22 @@ from .ndr import NDRIE | ||||
| from .ndtv import NDTVIE | ||||
| from .netzkino import NetzkinoIE | ||||
| from .nerdcubed import NerdCubedFeedIE | ||||
| from .nerdist import NerdistIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .newstube import NewstubeIE | ||||
| from .nextmedia import ( | ||||
|     NextMediaIE, | ||||
|     NextMediaActionNewsIE, | ||||
|     AppleDailyRealtimeNewsIE, | ||||
|     AppleDailyAnimationNewsIE | ||||
| ) | ||||
| from .nfb import NFBIE | ||||
| from .nfl import NFLIE | ||||
| from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .nhl import ( | ||||
|     NHLIE, | ||||
|     NHLNewsIE, | ||||
|     NHLVideocenterIE, | ||||
| ) | ||||
| from .niconico import NiconicoIE, NiconicoPlaylistIE | ||||
| from .ninegag import NineGagIE | ||||
| from .noco import NocoIE | ||||
| @@ -299,13 +327,16 @@ from .nowvideo import NowVideoIE | ||||
| from .npo import ( | ||||
|     NPOIE, | ||||
|     NPOLiveIE, | ||||
|     NPORadioIE, | ||||
|     NPORadioFragmentIE, | ||||
|     TegenlichtVproIE, | ||||
| ) | ||||
| from .nrk import ( | ||||
|     NRKIE, | ||||
|     NRKTVIE, | ||||
| ) | ||||
| from .ntv import NTVIE | ||||
| from .ntvde import NTVDeIE | ||||
| from .ntvru import NTVRuIE | ||||
| from .nytimes import NYTimesIE | ||||
| from .nuvid import NuvidIE | ||||
| from .oktoberfesttv import OktoberfestTVIE | ||||
| @@ -327,13 +358,17 @@ from .playfm import PlayFMIE | ||||
| from .playvid import PlayvidIE | ||||
| from .podomatic import PodomaticIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import PornHubIE | ||||
| from .pornhub import ( | ||||
|     PornHubIE, | ||||
|     PornHubPlaylistIE, | ||||
| ) | ||||
| from .pornotube import PornotubeIE | ||||
| from .pornoxo import PornoXOIE | ||||
| from .promptfile import PromptFileIE | ||||
| from .prosiebensat1 import ProSiebenSat1IE | ||||
| from .pyvideo import PyvideoIE | ||||
| from .quickvid import QuickVidIE | ||||
| from .r7 import R7IE | ||||
| from .radiode import RadioDeIE | ||||
| from .radiobremen import RadioBremenIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| @@ -348,8 +383,9 @@ from .rottentomatoes import RottenTomatoesIE | ||||
| from .roxwel import RoxwelIE | ||||
| from .rtbf import RTBFIE | ||||
| from .rte import RteIE | ||||
| from .rtlnl import RtlXlIE | ||||
| from .rtlnl import RtlNlIE | ||||
| from .rtlnow import RTLnowIE | ||||
| from .rtl2 import RTL2IE | ||||
| from .rtp import RTPIE | ||||
| from .rts import RTSIE | ||||
| from .rtve import RTVEALaCartaIE, RTVELiveIE | ||||
| @@ -362,6 +398,7 @@ from .rutube import ( | ||||
|     RutubePersonIE, | ||||
| ) | ||||
| from .rutv import RUTVIE | ||||
| from .sandia import SandiaIE | ||||
| from .sapo import SapoIE | ||||
| from .savefrom import SaveFromIE | ||||
| from .sbs import SBSIE | ||||
| @@ -392,7 +429,10 @@ from .soundcloud import ( | ||||
|     SoundcloudUserIE, | ||||
|     SoundcloudPlaylistIE | ||||
| ) | ||||
| from .soundgasm import SoundgasmIE | ||||
| from .soundgasm import ( | ||||
|     SoundgasmIE, | ||||
|     SoundgasmProfileIE | ||||
| ) | ||||
| from .southpark import ( | ||||
|     SouthParkIE, | ||||
|     SouthparkDeIE, | ||||
| @@ -412,6 +452,7 @@ from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .streetvoice import StreetVoiceIE | ||||
| from .sunporno import SunPornoIE | ||||
| from .svtplay import SVTPlayIE | ||||
| from .swrmediathek import SWRMediathekIE | ||||
| from .syfy import SyfyIE | ||||
| from .sztvhu import SztvHuIE | ||||
| @@ -457,9 +498,11 @@ from .tumblr import TumblrIE | ||||
| from .tunein import TuneInIE | ||||
| from .turbo import TurboIE | ||||
| from .tutv import TutvIE | ||||
| from .tv4 import TV4IE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE, TvpSeriesIE | ||||
| from .tvplay import TVPlayIE | ||||
| from .tweakers import TweakersIE | ||||
| from .twentyfourvideo import TwentyFourVideoIE | ||||
| from .twitch import ( | ||||
|     TwitchVideoIE, | ||||
| @@ -467,6 +510,7 @@ from .twitch import ( | ||||
|     TwitchVodIE, | ||||
|     TwitchProfileIE, | ||||
|     TwitchPastBroadcastsIE, | ||||
|     TwitchBookmarksIE, | ||||
|     TwitchStreamIE, | ||||
| ) | ||||
| from .ubu import UbuIE | ||||
| @@ -538,6 +582,7 @@ from .wimp import WimpIE | ||||
| from .wistia import WistiaIE | ||||
| from .worldstarhiphop import WorldStarHipHopIE | ||||
| from .wrzuta import WrzutaIE | ||||
| from .wsj import WSJIE | ||||
| from .xbef import XBefIE | ||||
| from .xboxclips import XboxClipsIE | ||||
| from .xhamster import XHamsterIE | ||||
| @@ -545,11 +590,13 @@ from .xminus import XMinusIE | ||||
| from .xnxx import XNXXIE | ||||
| from .xvideos import XVideosIE | ||||
| from .xtube import XTubeUserIE, XTubeIE | ||||
| from .xuite import XuiteIE | ||||
| from .xxxymovies import XXXYMoviesIE | ||||
| from .yahoo import ( | ||||
|     YahooIE, | ||||
|     YahooSearchIE, | ||||
| ) | ||||
| from .yam import YamIE | ||||
| from .yesjapan import YesJapanIE | ||||
| from .ynet import YnetIE | ||||
| from .youjizz import YouJizzIE | ||||
| @@ -573,6 +620,7 @@ from .youtube import ( | ||||
|     YoutubeUserIE, | ||||
|     YoutubeWatchLaterIE, | ||||
| ) | ||||
| from .zapiks import ZapiksIE | ||||
| from .zdf import ZDFIE, ZDFChannelIE | ||||
| from .zingmp3 import ( | ||||
|     ZingMp3SongIE, | ||||
|   | ||||
| @@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         player = self._parse_json( | ||||
| @@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor): | ||||
|             self._html_search_meta('datepublished', webpage, 'upload date')) | ||||
|  | ||||
|         duration = parse_duration( | ||||
|             self._html_search_meta('duration', webpage, 'duration') | ||||
|             or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) | ||||
|             self._html_search_meta('duration', webpage, 'duration') or | ||||
|             self._search_regex( | ||||
|                 r'Runtime:\s*(\d{2}:\d{2}:\d{2})', | ||||
|                 webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', | ||||
|   | ||||
| @@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor): | ||||
|             }, | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'id': 'rQxZvXQ4ROaSOqq-or2Mow', | ||||
|             'title': 'Rick and Morty - Pilot', | ||||
|             'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " | ||||
|         } | ||||
| @@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor): | ||||
|             } | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'id': '-t8CamQlQ2aYZ49ItZCFog', | ||||
|             'title': 'American Dad - Putting Francine Out of Business', | ||||
|             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' | ||||
|         }, | ||||
|   | ||||
							
								
								
									
										103
									
								
								youtube_dl/extractor/aftenposten.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								youtube_dl/extractor/aftenposten.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     xpath_with_ns, | ||||
|     xpath_text, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AftenpostenIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish', | ||||
|         'md5': 'fd828cd29774a729bf4d4425fe192972', | ||||
|         'info_dict': { | ||||
|             'id': '21039', | ||||
|             'ext': 'mov', | ||||
|             'title': 'TRAILER: "Sweatshop" - I can´t take any more', | ||||
|             'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238', | ||||
|             'timestamp': 1416927969, | ||||
|             'upload_date': '20141125', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._html_search_regex( | ||||
|             r'data-xs-id="(\d+)"', webpage, 'video id') | ||||
|  | ||||
|         data = self._download_xml( | ||||
|             'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'atom': 'http://www.w3.org/2005/Atom', | ||||
|             'xt': 'http://xstream.dk/', | ||||
|             'media': 'http://search.yahoo.com/mrss/', | ||||
|         } | ||||
|  | ||||
|         entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) | ||||
|  | ||||
|         title = xpath_text( | ||||
|             entry, xpath_with_ns('./atom:title', NS_MAP), 'title') | ||||
|         description = xpath_text( | ||||
|             entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') | ||||
|         timestamp = parse_iso8601(xpath_text( | ||||
|             entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) | ||||
|  | ||||
|         formats = [] | ||||
|         media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) | ||||
|         for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): | ||||
|             media_url = media_content.get('url') | ||||
|             if not media_url: | ||||
|                 continue | ||||
|             tbr = int_or_none(media_content.get('bitrate')) | ||||
|             mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url) | ||||
|             if mobj: | ||||
|                 formats.append({ | ||||
|                     'url': mobj.group('url'), | ||||
|                     'play_path': 'mp4:%s' % mobj.group('playpath'), | ||||
|                     'app': mobj.group('app'), | ||||
|                     'ext': 'flv', | ||||
|                     'tbr': tbr, | ||||
|                     'format_id': 'rtmp-%d' % tbr, | ||||
|                 }) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': media_url, | ||||
|                     'tbr': tbr, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         link = find_xpath_attr( | ||||
|             entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') | ||||
|         if link is not None: | ||||
|             formats.append({ | ||||
|                 'url': link.get('href'), | ||||
|                 'format_id': link.get('rel'), | ||||
|             }) | ||||
|  | ||||
|         thumbnails = [{ | ||||
|             'url': splash.get('url'), | ||||
|             'width': int_or_none(splash.get('width')), | ||||
|             'height': int_or_none(splash.get('height')), | ||||
|         } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
| @@ -1,8 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -21,9 +19,7 @@ class AftonbladetIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.search(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('video_id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # find internal video meta data | ||||
|   | ||||
| @@ -20,6 +20,7 @@ class AparatIE(InfoExtractor): | ||||
|             'id': 'wP8On', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'تیم گلکسی 11 - زومیت', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         # 'skip': 'Extremely unreliable', | ||||
|     } | ||||
| @@ -34,7 +35,8 @@ class AparatIE(InfoExtractor): | ||||
|                      video_id + '/vt/frame') | ||||
|         webpage = self._download_webpage(embed_url, video_id) | ||||
|  | ||||
|         video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) | ||||
|         video_urls = [video_url.replace('\\/', '/') for video_url in re.findall( | ||||
|             r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)] | ||||
|         for i, video_url in enumerate(video_urls): | ||||
|             req = HEADRequest(video_url) | ||||
|             res = self._request_webpage( | ||||
| @@ -46,7 +48,7 @@ class AparatIE(InfoExtractor): | ||||
|  | ||||
|         title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') | ||||
|         thumbnail = self._search_regex( | ||||
|             r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) | ||||
|             r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -54,4 +56,5 @@ class AparatIE(InfoExtractor): | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'thumbnail': thumbnail, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -11,9 +11,12 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class AppleTrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         "url": "http://trailers.apple.com/trailers/wb/manofsteel/", | ||||
|         'info_dict': { | ||||
|             'id': 'manofsteel', | ||||
|         }, | ||||
|         "playlist": [ | ||||
|             { | ||||
|                 "md5": "d97a8e575432dbcb81b7c3acb741f8a8", | ||||
| @@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                 }, | ||||
|             }, | ||||
|         ] | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/ca/metropole/autrui/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _JSON_RE = r'iTunes.playURL\((.*?)\);' | ||||
|  | ||||
| @@ -122,14 +128,15 @@ class AppleTrailersIE(InfoExtractor): | ||||
|             playlist.append({ | ||||
|                 '_type': 'video', | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'formats': formats, | ||||
|                 'title': title, | ||||
|                 'duration': duration, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'upload_date': upload_date, | ||||
|                 'uploader_id': uploader_id, | ||||
|                 'user_agent': 'QuickTime compatible (youtube-dl)', | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'QuickTime compatible (youtube-dl)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', | ||||
|         'file': '22429276.mp4', | ||||
|         'md5': '469751912f1de0816a9fc9df8336476c', | ||||
|         'info_dict': { | ||||
|             'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?', | ||||
|             'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014', | ||||
|         }, | ||||
|         'skip': 'Blocked outside of Germany', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916', | ||||
|         'info_dict': { | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
| import time | ||||
| import hmac | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
| @@ -17,7 +17,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AtresPlayerIE(InfoExtractor): | ||||
| class AtresPlayerIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -95,7 +95,7 @@ class AtresPlayerIE(InfoExtractor): | ||||
|         for fmt in ['windows', 'android_tablet']: | ||||
|             request = compat_urllib_request.Request( | ||||
|                 self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token)) | ||||
|             request.add_header('Youtubedl-user-agent', self._USER_AGENT) | ||||
|             request.add_header('User-Agent', self._USER_AGENT) | ||||
|  | ||||
|             fmt_json = self._download_json( | ||||
|                 request, video_id, 'Downloading %s video JSON' % fmt) | ||||
| @@ -105,13 +105,22 @@ class AtresPlayerIE(InfoExtractor): | ||||
|                 raise ExtractorError( | ||||
|                     '%s returned error: %s' % (self.IE_NAME, result), expected=True) | ||||
|  | ||||
|             for _, video_url in fmt_json['resultObject'].items(): | ||||
|             for format_id, video_url in fmt_json['resultObject'].items(): | ||||
|                 if format_id == 'token' or not video_url.startswith('http'): | ||||
|                     continue | ||||
|                 if video_url.endswith('/Manifest'): | ||||
|                     formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id)) | ||||
|                     if 'geodeswowsmpra3player' in video_url: | ||||
|                         f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] | ||||
|                         f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) | ||||
|                         # this videos are protected by DRM, the f4m downloader doesn't support them | ||||
|                         continue | ||||
|                     else: | ||||
|                         f4m_url = video_url[:-9] + '/manifest.f4m' | ||||
|                     formats.extend(self._extract_f4m_formats(f4m_url, video_id)) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'url': video_url, | ||||
|                         'format_id': 'android', | ||||
|                         'format_id': 'android-%s' % format_id, | ||||
|                         'preference': 1, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
| @@ -134,6 +143,15 @@ class AtresPlayerIE(InfoExtractor): | ||||
|         description = xpath_text(art, './description', 'description') | ||||
|         thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') | ||||
|  | ||||
|         subtitles = {} | ||||
|         subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') | ||||
|         if subtitle: | ||||
|             subtitles['es'] = subtitle | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
| @@ -141,4 +159,5 @@ class AtresPlayerIE(InfoExtractor): | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(video_id, subtitles), | ||||
|         } | ||||
|   | ||||
| @@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor): | ||||
|         # Album playlist ripped from fakeshoredrive with no metadata | ||||
|         { | ||||
|             'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project', | ||||
|             'info_dict': { | ||||
|                 'title': 'PPP (Pistol P Project)', | ||||
|                 'id': '837572', | ||||
|             }, | ||||
|             'playlist': [{ | ||||
|                 'info_dict': { | ||||
|                     'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', | ||||
|                     'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', | ||||
|                     'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)', | ||||
|                     'id': '837577', | ||||
|                     'ext': 'mp3', | ||||
|                     'uploader': 'Lil Herb a.k.a. G Herbo', | ||||
|                 } | ||||
|             }], | ||||
|             'params': { | ||||
|                 'playliststart': 8, | ||||
|                 'playlistend': 8, | ||||
|                 'playliststart': 9, | ||||
|                 'playlistend': 9, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|   | ||||
| @@ -1,50 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AUEngineIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', | ||||
|         'md5': '48972bdbcf1a3a2f5533e62425b41d4f', | ||||
|         'info_dict': { | ||||
|             'id': 'lfvlytY6', | ||||
|             'ext': 'mp4', | ||||
|             'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title') | ||||
|         video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) | ||||
|         video_url = compat_urllib_parse.unquote(video_urls[0]) | ||||
|         thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) | ||||
|         thumbnail = compat_urllib_parse.unquote(thumbnails[0]) | ||||
|  | ||||
|         if not video_url: | ||||
|             raise ExtractorError('Could not find video URL') | ||||
|  | ||||
|         ext = '.' + determine_ext(video_url) | ||||
|         title = remove_end(title, ext) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', | ||||
|         } | ||||
| @@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor): | ||||
|             'duration': int(info['length']), | ||||
|             'view_count': int(info['views_total']), | ||||
|             'uploader': info['username'], | ||||
|             'uploader_id': info['uid'], | ||||
|             'uploader_id': info['owner']['uid'], | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor): | ||||
|  | ||||
|         download_link = m_download.group(1) | ||||
|         video_id = self._search_regex( | ||||
|             r'var TralbumData = {.*?id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id', flags=re.MULTILINE | re.DOTALL) | ||||
|             r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') | ||||
|         # We get the dictionary of the track from some javascript code | ||||
|         info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1) | ||||
|         info = json.loads(info)[0] | ||||
|         all_info = self._parse_json(self._search_regex( | ||||
|             r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) | ||||
|         info = all_info[0] | ||||
|         # We pick mp3-320 for now, until format selection can be easily implemented. | ||||
|         mp3_info = info['downloads']['mp3-320'] | ||||
|         # If we try to use this url it says the link has expired | ||||
|         initial_url = mp3_info['url'] | ||||
|         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' | ||||
|         m_url = re.match(re_url, initial_url) | ||||
|         m_url = re.match( | ||||
|             r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$', | ||||
|             initial_url) | ||||
|         # We build the url we will use to get the final track url | ||||
|         # This url is build in Bandcamp in the script download_bunde_*.js | ||||
|         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) | ||||
|         final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') | ||||
|         # If we could correctly generate the .rand field the url would be | ||||
|         # in the "download_url" key | ||||
|         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) | ||||
|         final_url = self._search_regex( | ||||
|             r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -106,7 +109,7 @@ class BandcampIE(InfoExtractor): | ||||
|  | ||||
| class BandcampAlbumIE(InfoExtractor): | ||||
|     IE_NAME = 'Bandcamp:album' | ||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))' | ||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', | ||||
| @@ -130,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'title': 'Jazz Format Mixtape vol.1', | ||||
|             'id': 'jazz-format-mixtape-vol-1', | ||||
|             'uploader_id': 'blazo', | ||||
|         }, | ||||
|         'params': { | ||||
|             'playlistend': 2 | ||||
|         }, | ||||
|         'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' | ||||
|         'skip': 'Bandcamp imposes download limits.' | ||||
|     }, { | ||||
|         'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', | ||||
|         'info_dict': { | ||||
|             'title': 'Hierophany of the Open Grave', | ||||
|             'uploader_id': 'nightbringer', | ||||
|             'id': 'hierophany-of-the-open-grave', | ||||
|         }, | ||||
|         'playlist_mincount': 9, | ||||
|     }, { | ||||
|         'url': 'http://dotscale.bandcamp.com', | ||||
|         'info_dict': { | ||||
|             'title': 'Loom', | ||||
|             'id': 'dotscale', | ||||
|             'uploader_id': 'dotscale', | ||||
|         }, | ||||
|         'playlist_mincount': 7, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('subdomain') | ||||
|         title = mobj.group('title') | ||||
|         display_id = title or playlist_id | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         uploader_id = mobj.group('subdomain') | ||||
|         album_id = mobj.group('album_id') | ||||
|         playlist_id = album_id or uploader_id | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) | ||||
|         if not tracks_paths: | ||||
|             raise ExtractorError('The page doesn\'t contain any tracks') | ||||
| @@ -165,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|             r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'uploader_id': uploader_id, | ||||
|             'id': playlist_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from ..compat import compat_HTTPError | ||||
| class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|     IE_NAME = 'bbc.co.uk' | ||||
|     IE_DESC = 'BBC iPlayer' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -118,6 +118,9 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|         }, { | ||||
|             'url': 'http://www.bbc.co.uk/music/clips#p02frcc3', | ||||
|             'only_matching': True, | ||||
|         }, { | ||||
|             'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -270,7 +273,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|                     formats, subtitles = self._download_media_selector(programme_id) | ||||
|                 return programme_id, title, description, duration, formats, subtitles | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: | ||||
|             if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): | ||||
|                 raise | ||||
|  | ||||
|         # fallback to legacy playlist | ||||
|   | ||||
| @@ -9,7 +9,7 @@ class BeegIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://beeg.com/5416503', | ||||
|         'md5': '634526ae978711f6b748fe0dd6c11f57', | ||||
|         'md5': '1bff67111adb785c51d1b42959ec10e5', | ||||
|         'info_dict': { | ||||
|             'id': '5416503', | ||||
|             'ext': 'mp4', | ||||
|   | ||||
| @@ -1,40 +1,35 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import remove_start | ||||
| from ..utils import ( | ||||
|     remove_start, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlinkxIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     IE_NAME = 'blinkx' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', | ||||
|         'md5': '2e9a07364af40163a908edbf10bb2492', | ||||
|         'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', | ||||
|         'md5': '337cf7a344663ec79bf93a526a2e06c7', | ||||
|         'info_dict': { | ||||
|             'id': '8aQUy7GV', | ||||
|             'id': 'Da0Gw3xc', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Police Car Rolls Away', | ||||
|             'uploader': 'stupidvideos.com', | ||||
|             'upload_date': '20131215', | ||||
|             'timestamp': 1387068000, | ||||
|             'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!', | ||||
|             'duration': 14.886, | ||||
|             'thumbnails': [{ | ||||
|                 'width': 100, | ||||
|                 'height': 76, | ||||
|                 'resolution': '100x76', | ||||
|                 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg', | ||||
|             }], | ||||
|             'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', | ||||
|             'uploader': 'IGN News', | ||||
|             'upload_date': '20150217', | ||||
|             'timestamp': 1424215740, | ||||
|             'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', | ||||
|             'duration': 47.743333, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, rl): | ||||
|         m = re.match(self._VALID_URL, rl) | ||||
|         video_id = m.group('id') | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         display_id = video_id[:8] | ||||
|  | ||||
|         api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + | ||||
| @@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor): | ||||
|             elif m['type'] in ('flv', 'mp4'): | ||||
|                 vcodec = remove_start(m['vcodec'], 'ff') | ||||
|                 acodec = remove_start(m['acodec'], 'ff') | ||||
|                 tbr = (int(m['vbr']) + int(m['abr'])) // 1000 | ||||
|                 vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) | ||||
|                 abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) | ||||
|                 tbr = vbr + abr if vbr and abr else None | ||||
|                 format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': m['link'], | ||||
|                     'vcodec': vcodec, | ||||
|                     'acodec': acodec, | ||||
|                     'abr': int(m['abr']) // 1000, | ||||
|                     'vbr': int(m['vbr']) // 1000, | ||||
|                     'abr': abr, | ||||
|                     'vbr': vbr, | ||||
|                     'tbr': tbr, | ||||
|                     'width': int(m['w']), | ||||
|                     'height': int(m['h']), | ||||
|                     'width': int_or_none(m.get('w')), | ||||
|                     'height': int_or_none(m.get('h')), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|   | ||||
| @@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|         # For some weird reason, blip.tv serves a video instead of subtitles | ||||
|         # when we request with a common UA | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Youtubedl-user-agent', 'youtube-dl') | ||||
|         req.add_header('User-Agent', 'youtube-dl') | ||||
|         return self._download_webpage(req, None, note=False) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', | ||||
|             'info_dict': { | ||||
|                 'title': 'Sealife', | ||||
|                 'id': '3550319591001', | ||||
|             }, | ||||
|             'playlist_mincount': 7, | ||||
|         }, | ||||
| @@ -108,7 +109,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|         """ | ||||
|  | ||||
|         # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 | ||||
|         object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>', | ||||
|         object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>', | ||||
|                             lambda m: m.group(1) + '/>', object_str) | ||||
|         # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 | ||||
|         object_str = object_str.replace('<--', '<!--') | ||||
| @@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|         playlist_info = json_data['videoList'] | ||||
|         videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] | ||||
|  | ||||
|         return self.playlist_result(videos, playlist_id=playlist_info['id'], | ||||
|         return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], | ||||
|                                     playlist_title=playlist_info['mediaCollectionDTO']['displayName']) | ||||
|  | ||||
|     def _extract_video_info(self, video_info): | ||||
|   | ||||
| @@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor): | ||||
|             'skip_download': True,  # Got enough YouTube download tests | ||||
|         }, | ||||
|         'info_dict': { | ||||
|             'id': 'look-at-this-cute-dog-omg', | ||||
|             'description': 're:Munchkin the Teddy Bear is back ?!', | ||||
|             'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', | ||||
|         }, | ||||
| @@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20141124', | ||||
|                 'uploader_id': 'CindysMunchkin', | ||||
|                 'description': 're:© 2014 Munchkin the Shih Tzu', | ||||
|                 'uploader': 'Munchkin the Shih Tzu', | ||||
|                 'description': 're:© 2014 Munchkin the', | ||||
|                 'uploader': 're:^Munchkin the', | ||||
|                 'title': 're:Munchkin the Teddy Bear gets her exercise', | ||||
|             }, | ||||
|         }] | ||||
|   | ||||
							
								
								
									
										153
									
								
								youtube_dl/extractor/camdemy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								youtube_dl/extractor/camdemy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,153 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CamdemyIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # single file | ||||
|         'url': 'http://www.camdemy.com/media/5181/', | ||||
|         'md5': '5a5562b6a98b37873119102e052e311b', | ||||
|         'info_dict': { | ||||
|             'id': '5181', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ch1-1 Introduction, Signals (02-23-2012)', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': '', | ||||
|             'creator': 'ss11spring', | ||||
|             'upload_date': '20130114', | ||||
|             'timestamp': 1358154556, | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         # With non-empty description | ||||
|         'url': 'http://www.camdemy.com/media/13885', | ||||
|         'md5': '4576a3bb2581f86c61044822adbd1249', | ||||
|         'info_dict': { | ||||
|             'id': '13885', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'EverCam + Camdemy QuickStart', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:050b62f71ed62928f8a35f1a41e186c9', | ||||
|             'creator': 'evercam', | ||||
|             'upload_date': '20140620', | ||||
|             'timestamp': 1403271569, | ||||
|         } | ||||
|     }, { | ||||
|         # External source | ||||
|         'url': 'http://www.camdemy.com/media/14842', | ||||
|         'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7', | ||||
|         'info_dict': { | ||||
|             'id': '2vsYQzNIsJo', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20130211', | ||||
|             'uploader': 'Hun Kim', | ||||
|             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', | ||||
|             'uploader_id': 'hunkimtutorials', | ||||
|             'title': 'Excel 2013 Tutorial - How to add Password Protection', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         src_from = self._html_search_regex( | ||||
|             r"<div class='srcFrom'>Source: <a title='([^']+)'", page, | ||||
|             'external source', default=None) | ||||
|         if src_from: | ||||
|             return self.url_result(src_from) | ||||
|  | ||||
|         oembed_obj = self._download_json( | ||||
|             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) | ||||
|  | ||||
|         thumb_url = oembed_obj['thumbnail_url'] | ||||
|         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') | ||||
|         file_list_doc = self._download_xml( | ||||
|             compat_urlparse.urljoin(video_folder, 'fileList.xml'), | ||||
|             video_id, 'Filelist XML') | ||||
|         file_name = file_list_doc.find('./video/item/fileName').text | ||||
|         video_url = compat_urlparse.urljoin(video_folder, file_name) | ||||
|  | ||||
|         timestamp = parse_iso8601(self._html_search_regex( | ||||
|             r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'creation time', fatal=False), | ||||
|             delimiter=' ', timezone=datetime.timedelta(hours=8)) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': oembed_obj['title'], | ||||
|             'thumbnail': thumb_url, | ||||
|             'description': self._html_search_meta('description', page), | ||||
|             'creator': oembed_obj['author_name'], | ||||
|             'duration': oembed_obj['duration'], | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CamdemyFolderIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # links with trailing slash | ||||
|         'url': 'http://www.camdemy.com/folder/450', | ||||
|         'info_dict': { | ||||
|             'id': '450', | ||||
|             'title': '信號與系統 2012 & 2011 (Signals and Systems)', | ||||
|         }, | ||||
|         'playlist_mincount': 145 | ||||
|     }, { | ||||
|         # links without trailing slash | ||||
|         # and multi-page | ||||
|         'url': 'http://www.camdemy.com/folder/853', | ||||
|         'info_dict': { | ||||
|             'id': '853', | ||||
|             'title': '科學計算 - 使用 Matlab' | ||||
|         }, | ||||
|         'playlist_mincount': 20 | ||||
|     }, { | ||||
|         # with displayMode parameter. For testing the codes to add parameters | ||||
|         'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', | ||||
|         'info_dict': { | ||||
|             'id': '853', | ||||
|             'title': '科學計算 - 使用 Matlab' | ||||
|         }, | ||||
|         'playlist_mincount': 20 | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         folder_id = self._match_id(url) | ||||
|  | ||||
|         # Add displayMode=list so that all links are displayed in a single page | ||||
|         parsed_url = list(compat_urlparse.urlparse(url)) | ||||
|         query = dict(compat_urlparse.parse_qsl(parsed_url[4])) | ||||
|         query.update({'displayMode': 'list'}) | ||||
|         parsed_url[4] = compat_urllib_parse.urlencode(query) | ||||
|         final_url = compat_urlparse.urlunparse(parsed_url) | ||||
|  | ||||
|         page = self._download_webpage(final_url, folder_id) | ||||
|         matches = re.findall(r"href='(/media/\d+/?)'", page) | ||||
|  | ||||
|         entries = [self.url_result('http://www.camdemy.com' + media_path) | ||||
|                    for media_path in matches] | ||||
|  | ||||
|         folder_title = self._html_search_meta('keywords', page) | ||||
|  | ||||
|         return self.playlist_result(entries, folder_id, folder_title) | ||||
| @@ -15,12 +15,13 @@ from ..utils import ( | ||||
|  | ||||
| class CanalplusIE(InfoExtractor): | ||||
|     IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' | ||||
|     _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' | ||||
|     _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s' | ||||
|     _SITE_ID_MAP = { | ||||
|         'canalplus.fr': 'cplus', | ||||
|         'piwiplus.fr': 'teletoon', | ||||
|         'd8.tv': 'd8', | ||||
|         'itele.fr': 'itele', | ||||
|     } | ||||
|  | ||||
|     _TESTS = [{ | ||||
| @@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor): | ||||
|             'upload_date': '20131108', | ||||
|         }, | ||||
|         'skip': 'videos get deleted after a while', | ||||
|     }, { | ||||
|         'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', | ||||
|         'md5': '65aa83ad62fe107ce29e564bb8712580', | ||||
|         'info_dict': { | ||||
|             'id': '1213714', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', | ||||
|             'description': 'md5:8216206ec53426ea6321321f3b3c16db', | ||||
|             'upload_date': '20150211', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -39,8 +37,7 @@ class CBSIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         real_id = self._search_regex( | ||||
|             r"video\.settings\.pid\s*=\s*'([^']+)';", | ||||
|   | ||||
							
								
								
									
										30
									
								
								youtube_dl/extractor/cbssports.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								youtube_dl/extractor/cbssports.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CBSSportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', | ||||
|         'info_dict': { | ||||
|             'id': '_d5_GbO8p1sT', | ||||
|             'ext': 'flv', | ||||
|             'title': 'US Open flashbacks: 1990s', | ||||
|             'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         section = mobj.group('section') | ||||
|         video_id = mobj.group('id') | ||||
|         all_videos = self._download_json( | ||||
|             'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, | ||||
|             video_id) | ||||
|         # The json file contains the info of all the videos in the section | ||||
|         video_info = next(v for v in all_videos if v['pcid'] == video_id) | ||||
|         return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') | ||||
							
								
								
									
										99
									
								
								youtube_dl/extractor/ccc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								youtube_dl/extractor/ccc.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CCCIE(InfoExtractor): | ||||
|     IE_NAME = 'media.ccc.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video', | ||||
|         'md5': '205a365d0d57c0b1e43a12c9ffe8f9be', | ||||
|         'info_dict': { | ||||
|             'id': '20131228183', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Introduction to Processor Design', | ||||
|             'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'view_count': int, | ||||
|             'upload_date': '20131229', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if self._downloader.params.get('prefer_free_formats'): | ||||
|             preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd']) | ||||
|         else: | ||||
|             preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd']) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<h1>(.*?)</h1>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r"(?s)<p class='description'>(.*?)</p>", | ||||
|             webpage, 'description', fatal=False) | ||||
|         upload_date = unified_strdate(self._html_search_regex( | ||||
|             r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>", | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>", | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         matches = re.finditer(r'''(?xs) | ||||
|             <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s* | ||||
|             <a\s+href='(?P<http_url>[^']+)'>\s* | ||||
|             (?: | ||||
|                 .*? | ||||
|                 <a\s+href='(?P<torrent_url>[^']+\.torrent)' | ||||
|             )?''', webpage) | ||||
|         formats = [] | ||||
|         for m in matches: | ||||
|             format = m.group('format') | ||||
|             format_id = self._search_regex( | ||||
|                 r'.*/([a-z0-9_-]+)/[^/]*$', | ||||
|                 m.group('http_url'), 'format id', default=None) | ||||
|             vcodec = 'h264' if 'h264' in format_id else ( | ||||
|                 'none' if format_id in ('mp3', 'opus') else None | ||||
|             ) | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'format': format, | ||||
|                 'url': m.group('http_url'), | ||||
|                 'vcodec': vcodec, | ||||
|                 'preference': preference(format_id), | ||||
|             }) | ||||
|  | ||||
|             if m.group('torrent_url'): | ||||
|                 formats.append({ | ||||
|                     'format_id': 'torrent-%s' % (format if format_id is None else format_id), | ||||
|                     'format': '%s (torrent)' % format, | ||||
|                     'proto': 'torrent', | ||||
|                     'format_note': '(unsupported; will just download the .torrent file)', | ||||
|                     'vcodec': vcodec, | ||||
|                     'preference': -100 + preference(format_id), | ||||
|                     'url': m.group('torrent_url'), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/chirbit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/chirbit.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ChirbitIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://chirb.it/PrIPv5', | ||||
|         'md5': '9847b0dad6ac3e074568bf2cfb197de8', | ||||
|         'info_dict': { | ||||
|             'id': 'PrIPv5', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Фасадстрой', | ||||
|             'duration': 52, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         audio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://chirb.it/%s' % audio_id, audio_id) | ||||
|  | ||||
|         audio_url = self._search_regex( | ||||
|             r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'itemprop="name">([^<]+)', webpage, 'title') | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'itemprop="playCount"\s*>(\d+)', webpage, | ||||
|             'listen count', fatal=False)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'>(\d+) Comments?:', webpage, | ||||
|             'comment count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ChirbitProfileIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit:profile' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://chirbit.com/ScarletBeauty', | ||||
|         'info_dict': { | ||||
|             'id': 'ScarletBeauty', | ||||
|             'title': 'Chirbits by ScarletBeauty', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         profile_id = self._match_id(url) | ||||
|  | ||||
|         rss = self._download_xml( | ||||
|             'http://chirbit.com/rss/%s' % profile_id, profile_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(audio_url.text, 'Chirbit') | ||||
|             for audio_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         title = rss.find('./channel/title').text | ||||
|  | ||||
|         return self.playlist_result(entries, profile_id, title) | ||||
| @@ -28,12 +28,10 @@ class CinchcastIE(InfoExtractor): | ||||
|             item, './{http://developer.longtailvideo.com/trac/}date') | ||||
|         upload_date = unified_strdate(date_str, day_first=False) | ||||
|         # duration is present but wrong | ||||
|         formats = [] | ||||
|         formats.append({ | ||||
|         formats = [{ | ||||
|             'format_id': 'main', | ||||
|             'url': item.find( | ||||
|                 './{http://search.yahoo.com/mrss/}content').attrib['url'], | ||||
|         }) | ||||
|             'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'], | ||||
|         }] | ||||
|         backup_url = xpath_text( | ||||
|             item, './{http://developer.longtailvideo.com/trac/}backupContent') | ||||
|         if backup_url: | ||||
|   | ||||
| @@ -1,9 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| _translation_table = { | ||||
| @@ -27,10 +25,10 @@ class CliphunterIE(InfoExtractor): | ||||
|     ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', | ||||
|         'md5': 'a2ba71eebf523859fe527a61018f723e', | ||||
|         'md5': 'b7c9bbd4eb3a226ab91093714dcaa480', | ||||
|         'info_dict': { | ||||
|             'id': '1012420', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Fun Jynx Maze solo', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'age_limit': 18, | ||||
| @@ -44,39 +42,31 @@ class CliphunterIE(InfoExtractor): | ||||
|         video_title = self._search_regex( | ||||
|             r'mediaTitle = "([^"]+)"', webpage, 'title') | ||||
|  | ||||
|         pl_fiji = self._search_regex( | ||||
|             r'pl_fiji = \'([^\']+)\'', webpage, 'video data') | ||||
|         pl_c_qual = self._search_regex( | ||||
|             r'pl_c_qual = "(.)"', webpage, 'video quality') | ||||
|         video_url = _decode(pl_fiji) | ||||
|         formats = [{ | ||||
|             'url': video_url, | ||||
|             'format_id': 'default-%s' % pl_c_qual, | ||||
|         }] | ||||
|         fmts = {} | ||||
|         for fmt in ('mp4', 'flv'): | ||||
|             fmt_list = self._parse_json(self._search_regex( | ||||
|                 r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id) | ||||
|             for f in fmt_list: | ||||
|                 fmts[f['fname']] = _decode(f['sUrl']) | ||||
|  | ||||
|         qualities_json = self._search_regex( | ||||
|             r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info') | ||||
|         qualities_data = json.loads(qualities_json) | ||||
|         qualities = self._parse_json(self._search_regex( | ||||
|             r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id) | ||||
|  | ||||
|         for i, t in enumerate( | ||||
|                 re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)): | ||||
|             quality_id, crypted_url = t | ||||
|             video_url = _decode(crypted_url) | ||||
|         formats = [] | ||||
|         for fname, url in fmts.items(): | ||||
|             f = { | ||||
|                 'format_id': quality_id, | ||||
|                 'url': video_url, | ||||
|                 'quality': i, | ||||
|                 'url': url, | ||||
|             } | ||||
|             if quality_id in qualities_data: | ||||
|                 qd = qualities_data[quality_id] | ||||
|                 m = re.match( | ||||
|                     r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b> | ||||
|                         \s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd) | ||||
|                 if m: | ||||
|                     f['width'] = int(m.group('width')) | ||||
|                     f['height'] = int(m.group('height')) | ||||
|                     f['tbr'] = int(m.group('tbr')) | ||||
|             if fname in qualities: | ||||
|                 qual = qualities[fname] | ||||
|                 f.update({ | ||||
|                     'format_id': '%s_%sp' % (determine_ext(url), qual['h']), | ||||
|                     'width': qual['w'], | ||||
|                     'height': qual['h'], | ||||
|                     'tbr': qual['br'], | ||||
|                 }) | ||||
|             formats.append(f) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = self._search_regex( | ||||
|   | ||||
| @@ -49,7 +49,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): | ||||
|                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) | ||||
|                           )| | ||||
|                           (?P<interview> | ||||
|                               extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?))) | ||||
|                               extended-interviews/(?P<interID>[0-9a-z]+)/ | ||||
|                               (?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?) | ||||
|                               (?:/[^/?#]?|[?#]|$)))) | ||||
|                      ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', | ||||
| @@ -62,6 +64,38 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): | ||||
|             'uploader': 'thedailyshow', | ||||
|             'title': 'thedailyshow kristen-stewart part 1', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview', | ||||
|         'info_dict': { | ||||
|             'id': 'sarah-chayes-extended-interview', | ||||
|             'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."', | ||||
|             'title': 'thedailyshow Sarah Chayes Extended Interview', | ||||
|         }, | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 'info_dict': { | ||||
|                     'id': '0baad492-cbec-4ec1-9e50-ad91c291127f', | ||||
|                     'ext': 'mp4', | ||||
|                     'upload_date': '20150129', | ||||
|                     'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."', | ||||
|                     'uploader': 'thedailyshow', | ||||
|                     'title': 'thedailyshow sarah-chayes-extended-interview part 1', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 'info_dict': { | ||||
|                     'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283', | ||||
|                     'ext': 'mp4', | ||||
|                     'upload_date': '20150129', | ||||
|                     'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."', | ||||
|                     'uploader': 'thedailyshow', | ||||
|                     'title': 'thedailyshow sarah-chayes-extended-interview part 2', | ||||
|                 }, | ||||
|             }, | ||||
|         ], | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', | ||||
|         'only_matching': True, | ||||
| @@ -230,6 +264,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': epTitle, | ||||
|             'entries': entries, | ||||
|             'title': show_name + ' ' + title, | ||||
|             'description': description, | ||||
|   | ||||
| @@ -14,6 +14,7 @@ import xml.etree.ElementTree | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_cookiejar, | ||||
|     compat_HTTPError, | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_urlparse, | ||||
| @@ -87,7 +88,8 @@ class InfoExtractor(object): | ||||
|                     * player_url SWF Player URL (used for rtmpdump). | ||||
|                     * protocol   The protocol that will be used for the actual | ||||
|                                  download, lower-case. | ||||
|                                  "http", "https", "rtsp", "rtmp", "m3u8" or so. | ||||
|                                  "http", "https", "rtsp", "rtmp", "rtmpe", | ||||
|                                  "m3u8", or "m3u8_native". | ||||
|                     * preference Order number of this format. If this field is | ||||
|                                  present and not None, the formats get sorted | ||||
|                                  by this field, regardless of all other values. | ||||
| @@ -108,15 +110,17 @@ class InfoExtractor(object): | ||||
|                                   (quality takes higher priority) | ||||
|                                  -1 for default (order by other properties), | ||||
|                                  -2 or smaller for less than default. | ||||
|                     * http_referer  HTTP Referer header value to set. | ||||
|                     * http_method  HTTP method to use for the download. | ||||
|                     * http_headers  A dictionary of additional HTTP headers | ||||
|                                  to add to the request. | ||||
|                     * http_post_data  Additional data to send with a POST | ||||
|                                  request. | ||||
|                     * stretched_ratio  If given and not 1, indicates that the | ||||
|                                        video's pixels are not square. | ||||
|                                        width : height ratio as float. | ||||
|                                  video's pixels are not square. | ||||
|                                  width : height ratio as float. | ||||
|                     * no_resume  The server does not support resuming the | ||||
|                                  (HTTP or RTMP) download. Boolean. | ||||
|  | ||||
|     url:            Final video URL. | ||||
|     ext:            Video filename extension. | ||||
|     format:         The video format, defaults to ext (used for --get-format) | ||||
| @@ -130,7 +134,9 @@ class InfoExtractor(object): | ||||
|                     something like "4234987", title "Dancing naked mole rats", | ||||
|                     and display_id "dancing-naked-mole-rats" | ||||
|     thumbnails:     A list of dictionaries, with the following entries: | ||||
|                         * "id" (optional, string) - Thumbnail format ID | ||||
|                         * "url" | ||||
|                         * "preference" (optional, int) - quality of the image | ||||
|                         * "width" (optional, int) | ||||
|                         * "height" (optional, int) | ||||
|                         * "resolution" (optional, string "{width}x{height"}, | ||||
| @@ -138,6 +144,7 @@ class InfoExtractor(object): | ||||
|     thumbnail:      Full URL to a video thumbnail image. | ||||
|     description:    Full video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
|     creator:        The main artist who created the video. | ||||
|     timestamp:      UNIX timestamp of the moment the video became available. | ||||
|     upload_date:    Video upload date (YYYYMMDD). | ||||
|                     If not explicitly set, calculated from timestamp. | ||||
| @@ -149,6 +156,7 @@ class InfoExtractor(object): | ||||
|     view_count:     How many users have watched the video on the platform. | ||||
|     like_count:     Number of positive ratings of the video | ||||
|     dislike_count:  Number of negative ratings of the video | ||||
|     average_rating: Average rating give by users, the scale used depends on the webpage | ||||
|     comment_count:  Number of comments on the video | ||||
|     comments:       A list of comments, each with one or more of the following | ||||
|                     properties (all but one of text or html optional): | ||||
| @@ -256,8 +264,15 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def extract(self, url): | ||||
|         """Extracts URL information and returns it in list of dicts.""" | ||||
|         self.initialize() | ||||
|         return self._real_extract(url) | ||||
|         try: | ||||
|             self.initialize() | ||||
|             return self._real_extract(url) | ||||
|         except ExtractorError: | ||||
|             raise | ||||
|         except compat_http_client.IncompleteRead as e: | ||||
|             raise ExtractorError('A network error has occured.', cause=e, expected=True) | ||||
|         except (KeyError, StopIteration) as e: | ||||
|             raise ExtractorError('An extractor error has occured.', cause=e) | ||||
|  | ||||
|     def set_downloader(self, downloader): | ||||
|         """Sets the downloader for this IE.""" | ||||
| @@ -376,6 +391,16 @@ class InfoExtractor(object): | ||||
|             if blocked_iframe: | ||||
|                 msg += ' Visit %s for more details' % blocked_iframe | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|         if '<title>The URL you requested has been blocked</title>' in content[:512]: | ||||
|             msg = ( | ||||
|                 'Access to this webpage has been blocked by Indian censorship. ' | ||||
|                 'Use a VPN or proxy server (with --proxy) to route around it.') | ||||
|             block_msg = self._html_search_regex( | ||||
|                 r'</h1><p>(.*?)</p>', | ||||
|                 content, 'block message', default=None) | ||||
|             if block_msg: | ||||
|                 msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         return content | ||||
|  | ||||
| @@ -499,7 +524,7 @@ class InfoExtractor(object): | ||||
|                 if mobj: | ||||
|                     break | ||||
|  | ||||
|         if os.name != 'nt' and sys.stderr.isatty(): | ||||
|         if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty(): | ||||
|             _name = '\033[0;34m%s\033[0m' % name | ||||
|         else: | ||||
|             _name = name | ||||
| @@ -648,6 +673,21 @@ class InfoExtractor(object): | ||||
|         } | ||||
|         return RATING_TABLE.get(rating.lower(), None) | ||||
|  | ||||
|     def _family_friendly_search(self, html): | ||||
|         # See http://schema.org/VideoObject | ||||
|         family_friendly = self._html_search_meta('isFamilyFriendly', html) | ||||
|  | ||||
|         if not family_friendly: | ||||
|             return None | ||||
|  | ||||
|         RATING_TABLE = { | ||||
|             '1': 0, | ||||
|             'true': 0, | ||||
|             '0': 18, | ||||
|             'false': 18, | ||||
|         } | ||||
|         return RATING_TABLE.get(family_friendly.lower(), None) | ||||
|  | ||||
|     def _twitter_search_player(self, html): | ||||
|         return self._html_search_meta('twitter:player', html, | ||||
|                                       'twitter card player') | ||||
| @@ -697,21 +737,40 @@ class InfoExtractor(object): | ||||
|                 preference, | ||||
|                 f.get('language_preference') if f.get('language_preference') is not None else -1, | ||||
|                 f.get('quality') if f.get('quality') is not None else -1, | ||||
|                 f.get('tbr') if f.get('tbr') is not None else -1, | ||||
|                 f.get('filesize') if f.get('filesize') is not None else -1, | ||||
|                 f.get('vbr') if f.get('vbr') is not None else -1, | ||||
|                 f.get('height') if f.get('height') is not None else -1, | ||||
|                 f.get('width') if f.get('width') is not None else -1, | ||||
|                 ext_preference, | ||||
|                 f.get('tbr') if f.get('tbr') is not None else -1, | ||||
|                 f.get('vbr') if f.get('vbr') is not None else -1, | ||||
|                 f.get('abr') if f.get('abr') is not None else -1, | ||||
|                 audio_ext_preference, | ||||
|                 f.get('fps') if f.get('fps') is not None else -1, | ||||
|                 f.get('filesize') if f.get('filesize') is not None else -1, | ||||
|                 f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, | ||||
|                 f.get('source_preference') if f.get('source_preference') is not None else -1, | ||||
|                 f.get('format_id'), | ||||
|             ) | ||||
|         formats.sort(key=_formats_key) | ||||
|  | ||||
|     def _check_formats(self, formats, video_id): | ||||
|         if formats: | ||||
|             formats[:] = filter( | ||||
|                 lambda f: self._is_valid_url( | ||||
|                     f['url'], video_id, | ||||
|                     item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'), | ||||
|                 formats) | ||||
|  | ||||
|     def _is_valid_url(self, url, video_id, item='video'): | ||||
|         try: | ||||
|             self._request_webpage(url, video_id, 'Checking %s URL' % item) | ||||
|             return True | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 self.report_warning( | ||||
|                     '%s URL is invalid, skipping' % item, video_id) | ||||
|                 return False | ||||
|             raise | ||||
|  | ||||
|     def http_scheme(self): | ||||
|         """ Either "http:" or "https:", depending on the user's preferences """ | ||||
|         return ( | ||||
| @@ -736,7 +795,7 @@ class InfoExtractor(object): | ||||
|         self.to_screen(msg) | ||||
|         time.sleep(timeout) | ||||
|  | ||||
|     def _extract_f4m_formats(self, manifest_url, video_id): | ||||
|     def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None): | ||||
|         manifest = self._download_xml( | ||||
|             manifest_url, video_id, 'Downloading f4m manifest', | ||||
|             'Unable to download f4m manifest') | ||||
| @@ -749,30 +808,32 @@ class InfoExtractor(object): | ||||
|             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') | ||||
|         for i, media_el in enumerate(media_nodes): | ||||
|             if manifest_version == '2.0': | ||||
|                 manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href') | ||||
|                 manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + | ||||
|                                 (media_el.attrib.get('href') or media_el.attrib.get('url'))) | ||||
|             tbr = int_or_none(media_el.attrib.get('bitrate')) | ||||
|             format_id = 'f4m-%d' % (i if tbr is None else tbr) | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), | ||||
|                 'url': manifest_url, | ||||
|                 'ext': 'flv', | ||||
|                 'tbr': tbr, | ||||
|                 'width': int_or_none(media_el.attrib.get('width')), | ||||
|                 'height': int_or_none(media_el.attrib.get('height')), | ||||
|                 'preference': preference, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return formats | ||||
|  | ||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, | ||||
|                               entry_protocol='m3u8', preference=None): | ||||
|                               entry_protocol='m3u8', preference=None, | ||||
|                               m3u8_id=None): | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'm3u8-meta', | ||||
|             'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])), | ||||
|             'url': m3u8_url, | ||||
|             'ext': ext, | ||||
|             'protocol': 'm3u8', | ||||
|             'preference': -1, | ||||
|             'preference': preference - 1 if preference else -1, | ||||
|             'resolution': 'multiple', | ||||
|             'format_note': 'Quality selection URL', | ||||
|         }] | ||||
| @@ -787,6 +848,7 @@ class InfoExtractor(object): | ||||
|             note='Downloading m3u8 information', | ||||
|             errnote='Failed to download m3u8 information') | ||||
|         last_info = None | ||||
|         last_media = None | ||||
|         kv_rex = re.compile( | ||||
|             r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') | ||||
|         for line in m3u8_doc.splitlines(): | ||||
| @@ -797,6 +859,13 @@ class InfoExtractor(object): | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_info[m.group('key')] = v | ||||
|             elif line.startswith('#EXT-X-MEDIA:'): | ||||
|                 last_media = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_media[m.group('key')] = v | ||||
|             elif line.startswith('#') or not line.strip(): | ||||
|                 continue | ||||
|             else: | ||||
| @@ -804,9 +873,8 @@ class InfoExtractor(object): | ||||
|                     formats.append({'url': format_url(line)}) | ||||
|                     continue | ||||
|                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) | ||||
|  | ||||
|                 f = { | ||||
|                     'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), | ||||
|                     'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])), | ||||
|                     'url': format_url(line.strip()), | ||||
|                     'tbr': tbr, | ||||
|                     'ext': ext, | ||||
| @@ -826,16 +894,22 @@ class InfoExtractor(object): | ||||
|                     width_str, height_str = resolution.split('x') | ||||
|                     f['width'] = int(width_str) | ||||
|                     f['height'] = int(height_str) | ||||
|                 if last_media is not None: | ||||
|                     f['m3u8_media'] = last_media | ||||
|                     last_media = None | ||||
|                 formats.append(f) | ||||
|                 last_info = {} | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|     # TODO: improve extraction | ||||
|     def _extract_smil_formats(self, smil_url, video_id): | ||||
|     def _extract_smil_formats(self, smil_url, video_id, fatal=True): | ||||
|         smil = self._download_xml( | ||||
|             smil_url, video_id, 'Downloading SMIL file', | ||||
|             'Unable to download SMIL file') | ||||
|             'Unable to download SMIL file', fatal=fatal) | ||||
|         if smil is False: | ||||
|             assert not fatal | ||||
|             return [] | ||||
|  | ||||
|         base = smil.find('./head/meta').get('base') | ||||
|  | ||||
|   | ||||
| @@ -24,6 +24,23 @@ class CommonMistakesIE(InfoExtractor): | ||||
|             'That doesn\'t make any sense. ' | ||||
|             'Simply remove the parameter in your command or configuration.' | ||||
|         ) % url | ||||
|         if self._downloader.params.get('verbose'): | ||||
|         if not self._downloader.params.get('verbose'): | ||||
|             msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.' | ||||
|         raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|  | ||||
| class UnicodeBOMIE(InfoExtractor): | ||||
|         IE_DESC = False | ||||
|         _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' | ||||
|  | ||||
|         _TESTS = [{ | ||||
|             'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', | ||||
|             'only_matching': True, | ||||
|         }] | ||||
|  | ||||
|         def _real_extract(self, url): | ||||
|             real_url = self._match_id(url) | ||||
|             self.report_warning( | ||||
|                 'Your URL starts with a Byte Order Mark (BOM). ' | ||||
|                 'Removing the BOM and looking for "%s" ...' % real_url) | ||||
|             return self.url_result(real_url) | ||||
|   | ||||
							
								
								
									
										93
									
								
								youtube_dl/extractor/ctsnews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								youtube_dl/extractor/ctsnews.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_iso8601, ExtractorError | ||||
|  | ||||
|  | ||||
| class CtsNewsIE(InfoExtractor): | ||||
|     # https connection failed (Connection reset) | ||||
|     _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html', | ||||
|         'md5': 'a9875cb790252b08431186d741beaabe', | ||||
|         'info_dict': { | ||||
|             'id': '201501291578109', | ||||
|             'ext': 'mp4', | ||||
|             'title': '以色列.真主黨交火 3人死亡', | ||||
|             'description': 'md5:95e9b295c898b7ff294f09d450178d7d', | ||||
|             'timestamp': 1422528540, | ||||
|             'upload_date': '20150129', | ||||
|         } | ||||
|     }, { | ||||
|         # News count not appear on page but still available in database | ||||
|         'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html', | ||||
|         'md5': '3aee7e0df7cdff94e43581f54c22619e', | ||||
|         'info_dict': { | ||||
|             'id': '201309031304098', | ||||
|             'ext': 'mp4', | ||||
|             'title': '韓國31歲童顏男 貌如十多歲小孩', | ||||
|             'description': 'md5:f183feeba3752b683827aab71adad584', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1378205880, | ||||
|             'upload_date': '20130903', | ||||
|         } | ||||
|     }, { | ||||
|         # With Youtube embedded video | ||||
|         'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html', | ||||
|         'md5': '1d842c771dc94c8c3bca5af2cc1db9c5', | ||||
|         'add_ie': ['Youtube'], | ||||
|         'info_dict': { | ||||
|             'id': 'OVbfO7d0_hQ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'iPhone6熱銷 蘋果財報亮眼', | ||||
|             'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'upload_date': '20150128', | ||||
|             'uploader_id': 'TBSCTS', | ||||
|             'uploader': '中華電視公司', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         news_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, news_id) | ||||
|  | ||||
|         if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None): | ||||
|             feed_url = self._html_search_regex( | ||||
|                 r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)', | ||||
|                 page, 'feed url') | ||||
|             video_url = self._download_webpage( | ||||
|                 feed_url, news_id, note='Fetching feed') | ||||
|         else: | ||||
|             self.to_screen('Not CTSPlayer video, trying Youtube...') | ||||
|             youtube_url = self._search_regex( | ||||
|                 r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url', | ||||
|                 default=None) | ||||
|             if not youtube_url: | ||||
|                 raise ExtractorError('The news includes no videos!', expected=True) | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': youtube_url, | ||||
|                 'ie_key': 'Youtube', | ||||
|             } | ||||
|  | ||||
|         description = self._html_search_meta('description', page) | ||||
|         title = self._html_search_meta('title', page) | ||||
|         thumbnail = self._html_search_meta('image', page) | ||||
|  | ||||
|         datetime_str = self._html_search_regex( | ||||
|             r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time') | ||||
|         # Transform into ISO 8601 format with timezone info | ||||
|         datetime_str = datetime_str.replace('/', '-') + ':00+0800' | ||||
|         timestamp = parse_iso8601(datetime_str, delimiter=' ') | ||||
|  | ||||
|         return { | ||||
|             'id': news_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|         } | ||||
| @@ -194,6 +194,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|         'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', | ||||
|         'info_dict': { | ||||
|             'title': 'SPORT', | ||||
|             'id': 'xv4bw_nqtv_sport', | ||||
|         }, | ||||
|         'playlist_mincount': 20, | ||||
|     }] | ||||
|   | ||||
							
								
								
									
										61
									
								
								youtube_dl/extractor/dctp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								youtube_dl/extractor/dctp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
|  | ||||
|  | ||||
| class DctpTvIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', | ||||
|         'info_dict': { | ||||
|             'id': '1324', | ||||
|             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Videoinstallation für eine Kaufhausfassade' | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/' | ||||
|         version_json = self._download_json( | ||||
|             base_url + 'version.json', | ||||
|             video_id, note='Determining file version') | ||||
|         version = version_json['version_name'] | ||||
|         info_json = self._download_json( | ||||
|             '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id), | ||||
|             video_id, note='Fetching object ID') | ||||
|         object_id = compat_str(info_json['object_id']) | ||||
|         meta_json = self._download_json( | ||||
|             '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id), | ||||
|             video_id, note='Downloading metadata') | ||||
|         uuid = meta_json['uuid'] | ||||
|         title = meta_json['title'] | ||||
|         wide = meta_json['is_wide'] | ||||
|         if wide: | ||||
|             ratio = '16x9' | ||||
|         else: | ||||
|             ratio = '4x3' | ||||
|         play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio) | ||||
|  | ||||
|         servers_json = self._download_json( | ||||
|             'http://www.dctp.tv/streaming_servers/', | ||||
|             video_id, note='Downloading server list') | ||||
|         url = servers_json[0]['endpoint'] | ||||
|  | ||||
|         return { | ||||
|             'id': object_id, | ||||
|             'title': title, | ||||
|             'format': 'rtmp', | ||||
|             'url': url, | ||||
|             'play_path': play_path, | ||||
|             'rtmp_real_time': True, | ||||
|             'ext': 'flv', | ||||
|             'display_id': video_id | ||||
|         } | ||||
| @@ -1,40 +1,39 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class DefenseGouvFrIE(InfoExtractor): | ||||
|     IE_NAME = 'defense.gouv.fr' | ||||
|     _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' | ||||
|                   r'ligthboxvideo/base-de-medias/webtv/(.*)') | ||||
|     _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', | ||||
|         'file': '11213.mp4', | ||||
|         'md5': '75bba6124da7e63d2d60b5244ec9430c', | ||||
|         "info_dict": { | ||||
|             "title": "attaque-chimique-syrienne-du-21-aout-2013-1" | ||||
|         'info_dict': { | ||||
|             'id': '11213', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'attaque-chimique-syrienne-du-21-aout-2013-1' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         title = re.match(self._VALID_URL, url).group(1) | ||||
|         title = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, title) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r"flashvars.pvg_id=\"(\d+)\";", | ||||
|             webpage, 'ID') | ||||
|  | ||||
|         json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' | ||||
|                     + video_id) | ||||
|         info = self._download_webpage(json_url, title, | ||||
|                                       'Downloading JSON config') | ||||
|         video_url = json.loads(info)['renditions'][0]['url'] | ||||
|         json_url = ( | ||||
|             'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % | ||||
|             video_id) | ||||
|         info = self._download_json(json_url, title, 'Downloading JSON config') | ||||
|         video_url = info['renditions'][0]['url'] | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'url': video_url, | ||||
|                 'title': title, | ||||
|                 } | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'ext': 'mp4', | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|         } | ||||
|   | ||||
| @@ -1,13 +1,14 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DotsubIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', | ||||
|         'md5': '0914d4d69605090f623b7ac329fea66e', | ||||
| @@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor): | ||||
|             'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', | ||||
|             'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074', | ||||
|             'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', | ||||
|             'duration': 3169, | ||||
|             'uploader': '4v4l0n42', | ||||
|             'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism  and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com', | ||||
|             'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', | ||||
|             'timestamp': 1292248482.625, | ||||
|             'upload_date': '20101213', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info_url = "https://dotsub.com/api/media/%s/metadata" % video_id | ||||
|         info = self._download_json(info_url, video_id) | ||||
|         date = time.gmtime(info['dateCreated'] / 1000)  # The timestamp is in miliseconds | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'https://dotsub.com/api/media/%s/metadata' % video_id, video_id) | ||||
|         video_url = info.get('mediaURI') | ||||
|  | ||||
|         if not video_url: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|             video_url = self._search_regex( | ||||
|                 r'"file"\s*:\s*\'([^\']+)', webpage, 'video url') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': info['mediaURI'], | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'title': info['title'], | ||||
|             'thumbnail': info['screenshotURI'], | ||||
|             'description': info['description'], | ||||
|             'uploader': info['user'], | ||||
|             'view_count': info['numberOfViews'], | ||||
|             'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday), | ||||
|             'description': info.get('description'), | ||||
|             'thumbnail': info.get('screenshotURI'), | ||||
|             'duration': int_or_none(info.get('duration'), 1000), | ||||
|             'uploader': info.get('user'), | ||||
|             'timestamp': float_or_none(info.get('dateCreated'), 1000), | ||||
|             'view_count': int_or_none(info.get('numberOfViews')), | ||||
|         } | ||||
|   | ||||
| @@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor): | ||||
|             'id': '1740434', | ||||
|             'display_id': 'hot-perky-blonde-naked-golf', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hot Perky Blonde Naked Golf', | ||||
|             'title': 'hot perky blonde naked golf', | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
| @@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor): | ||||
|             r'<source src="([^"]+)"', webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>([^<]+)\s*-\s*Free', webpage, 'title') | ||||
|             [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'], | ||||
|             webpage, 'title') | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'poster="([^"]+)"', | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from ..utils import parse_iso8601 | ||||
|  | ||||
|  | ||||
| class DRTVIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', | ||||
| @@ -25,9 +25,15 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         programcard = self._download_json( | ||||
|             'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-(?:material-identifier|episode-slug)="([^"]+)"', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         programcard = self._download_json( | ||||
|             'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, | ||||
|             video_id, 'Downloading video JSON') | ||||
|         data = programcard['Data'][0] | ||||
|  | ||||
|         title = data['Title'] | ||||
| @@ -48,14 +54,20 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|             elif asset['Kind'] == 'VideoResource': | ||||
|                 duration = asset['DurationInMilliseconds'] / 1000.0 | ||||
|                 restricted_to_denmark = asset['RestrictedToDenmark'] | ||||
|                 spoken_subtitles = asset['Target'] == 'SpokenSubtitles' | ||||
|                 for link in asset['Links']: | ||||
|                     target = link['Target'] | ||||
|                     uri = link['Uri'] | ||||
|                     format_id = target | ||||
|                     preference = -1 if target == 'HDS' else -2 | ||||
|                     if spoken_subtitles: | ||||
|                         preference -= 2 | ||||
|                         format_id += '-spoken-subtitles' | ||||
|                     formats.append({ | ||||
|                         'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, | ||||
|                         'format_id': target, | ||||
|                         'format_id': format_id, | ||||
|                         'ext': link['FileFormat'], | ||||
|                         'preference': -1 if target == 'HDS' else -2, | ||||
|                         'preference': preference, | ||||
|                     }) | ||||
|                 subtitles_list = asset.get('SubtitlesList') | ||||
|                 if isinstance(subtitles_list, list): | ||||
|   | ||||
							
								
								
									
										16
									
								
								youtube_dl/extractor/embedly.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								youtube_dl/extractor/embedly.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
|  | ||||
|  | ||||
| class EmbedlyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) | ||||
| @@ -1,18 +1,17 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     js_to_json, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EscapistIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-' | ||||
|     _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', | ||||
|         'md5': 'ab3a706c681efca53f0a35f1415cf0d1', | ||||
| @@ -20,31 +19,30 @@ class EscapistIE(InfoExtractor): | ||||
|             'id': '6618', | ||||
|             'ext': 'mp4', | ||||
|             'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", | ||||
|             'uploader': 'the-escapist-presents', | ||||
|             'uploader_id': 'the-escapist-presents', | ||||
|             'uploader': 'The Escapist Presents', | ||||
|             'title': "Breaking Down Baldur's Gate", | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         showName = mobj.group('showname') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         videoDesc = self._html_search_regex( | ||||
|             r'<meta name="description" content="([^"]*)"', | ||||
|             webpage, 'description', fatal=False) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'", | ||||
|             webpage, 'uploader ID', fatal=False) | ||||
|         uploader = self._html_search_regex( | ||||
|             r"<h1\s+class='headline'>(.*?)</a>", | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         playerUrl = self._og_search_video_url(webpage, name='player URL') | ||||
|         raw_title = self._html_search_meta('title', webpage, fatal=True) | ||||
|         title = raw_title.partition(' : ')[2] | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<meta name="title" content="([^"]*)"', | ||||
|             webpage, 'title').split(' : ')[-1] | ||||
|  | ||||
|         configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL') | ||||
|         configUrl = compat_urllib_parse.unquote(configUrl) | ||||
|         config_url = compat_urllib_parse.unquote(self._html_search_regex( | ||||
|             r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
| @@ -53,18 +51,21 @@ class EscapistIE(InfoExtractor): | ||||
|                 cfgurl, video_id, | ||||
|                 'Downloading ' + name + ' configuration', | ||||
|                 'Unable to download ' + name + ' configuration', | ||||
|                 transform_source=lambda s: s.replace("'", '"')) | ||||
|                 transform_source=js_to_json) | ||||
|  | ||||
|             playlist = config['playlist'] | ||||
|             video_url = next( | ||||
|                 p['url'] for p in playlist | ||||
|                 if p.get('eventCategory') == 'Video') | ||||
|             formats.append({ | ||||
|                 'url': playlist[1]['url'], | ||||
|                 'url': video_url, | ||||
|                 'format_id': name, | ||||
|                 'quality': quality, | ||||
|             }) | ||||
|  | ||||
|         _add_format('normal', configUrl, quality=0) | ||||
|         hq_url = (configUrl + | ||||
|                   ('&hq=1' if '?' in configUrl else configUrl + '?hq=1')) | ||||
|         _add_format('normal', config_url, quality=0) | ||||
|         hq_url = (config_url + | ||||
|                   ('&hq=1' if '?' in config_url else config_url + '?hq=1')) | ||||
|         try: | ||||
|             _add_format('hq', hq_url, quality=1) | ||||
|         except ExtractorError: | ||||
| @@ -75,9 +76,9 @@ class EscapistIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'uploader': showName, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': videoDesc, | ||||
|             'player_url': playerUrl, | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| @@ -16,7 +17,8 @@ from ..utils import ( | ||||
| class FC2IE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)' | ||||
|     IE_NAME = 'fc2' | ||||
|     _TEST = { | ||||
|     _NETRC_MACHINE = 'fc2' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', | ||||
|         'md5': 'a6ebe8ebe0396518689d963774a54eb7', | ||||
|         'info_dict': { | ||||
| @@ -24,12 +26,57 @@ class FC2IE(InfoExtractor): | ||||
|             'ext': 'flv', | ||||
|             'title': 'Boxing again with Puff', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/', | ||||
|         'info_dict': { | ||||
|             'id': '20150125cEva0hDn', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|         'params': { | ||||
|             'username': 'ytdl@yt-dl.org', | ||||
|             'password': '(snip)', | ||||
|             'skip': 'requires actual password' | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None or password is None: | ||||
|             return False | ||||
|  | ||||
|         # Log in | ||||
|         login_form_strs = { | ||||
|             'email': username, | ||||
|             'password': password, | ||||
|             'done': 'video', | ||||
|             'Submit': ' Login ', | ||||
|         } | ||||
|  | ||||
|         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') | ||||
|         request = compat_urllib_request.Request( | ||||
|             'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) | ||||
|  | ||||
|         login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') | ||||
|         if 'mode=redirect&login=done' not in login_results: | ||||
|             self.report_warning('unable to log in: bad username or password') | ||||
|             return False | ||||
|  | ||||
|         # this is also needed | ||||
|         login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done') | ||||
|         self._download_webpage( | ||||
|             login_redir, None, note='Login redirect', errnote='Login redirect failed') | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         self._login() | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         self._downloader.cookiejar.clear_session_cookies()  # must clear | ||||
|         self._login() | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
| @@ -46,7 +93,12 @@ class FC2IE(InfoExtractor): | ||||
|         info = compat_urlparse.parse_qs(info_webpage) | ||||
|  | ||||
|         if 'err_code' in info: | ||||
|             raise ExtractorError('Error code: %s' % info['err_code'][0]) | ||||
|             # most of the time we can still download wideo even if err_code is 403 or 602 | ||||
|             self.report_warning( | ||||
|                 'Error code was: %s... but still trying' % info['err_code'][0]) | ||||
|  | ||||
|         if 'filepath' not in info: | ||||
|             raise ExtractorError('Cannot download file. Are you logged in?') | ||||
|  | ||||
|         video_url = info['filepath'][0] + '?mid=' + info['mid'][0] | ||||
|         title_info = info.get('title') | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_meta('twitter:title', page, 'title') | ||||
|  | ||||
|         title = self._html_search_meta('twitter:title', page, 'title', fatal=True) | ||||
|         description = self._html_search_meta('twitter:description', page, 'title') | ||||
|  | ||||
|         data = self._download_xml( | ||||
| @@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor): | ||||
|                 'height': int(details.find('./height').text.strip()), | ||||
|             } for details in item.findall('./source/file_details') if details.find('./file').text | ||||
|         ] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -1,52 +1,71 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class FirstTVIE(InfoExtractor): | ||||
|     IE_NAME = 'firsttv' | ||||
|     IE_DESC = 'Видеоархив - Первый канал' | ||||
|     _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)' | ||||
|     IE_NAME = '1tv' | ||||
|     IE_DESC = 'Первый канал' | ||||
|     _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.1tv.ru/videoarchive/73390', | ||||
|         'md5': '3de6390cf0cca4a5eae1d1d83895e5ad', | ||||
|         'md5': '777f525feeec4806130f4f764bc18a4f', | ||||
|         'info_dict': { | ||||
|             'id': '73390', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Олимпийские канатные дороги', | ||||
|             'description': 'md5:cc730d2bf4215463e37fff6a1e277b13', | ||||
|             'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG', | ||||
|             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'duration': 149, | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|         }, | ||||
|         'skip': 'Only works from Russia', | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', | ||||
|         'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', | ||||
|         'info_dict': { | ||||
|             'id': '35930', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Наедине со всеми. Людмила Сенчина', | ||||
|             'description': 'md5:89553aed1d641416001fe8d450f06cb9', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'duration': 2694, | ||||
|         }, | ||||
|         'skip': 'Only works from Russia', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL') | ||||
|             r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''', | ||||
|             webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title') | ||||
|             [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', | ||||
|              r"'title'\s*:\s*'([^']+)'"], webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False) | ||||
|             r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', | ||||
|             webpage, 'description', default=None) or self._html_search_meta( | ||||
|                 'description', webpage, 'description') | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) | ||||
|         duration = self._og_search_property( | ||||
|             'video:duration', webpage, | ||||
|             'video duration', fatal=False) | ||||
|  | ||||
|         like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]', | ||||
|                                              webpage, 'like count', fatal=False) | ||||
|         dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]', | ||||
|                                                 webpage, 'dislike count', fatal=False) | ||||
|         like_count = self._html_search_regex( | ||||
|             r'title="Понравилось".*?/></label> \[(\d+)\]', | ||||
|             webpage, 'like count', default=None) | ||||
|         dislike_count = self._html_search_regex( | ||||
|             r'title="Не понравилось".*?/></label> \[(\d+)\]', | ||||
|             webpage, 'dislike count', default=None) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor): | ||||
|     IE_NAME = '5min' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| | ||||
|             https?://(?:(?:massively|www)\.)?joystiq\.com/video/| | ||||
|             5min:) | ||||
|         (?P<id>\d+) | ||||
|         ''' | ||||
|   | ||||
| @@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player', | ||||
|         'md5': '6269e8626fa1a891bf5369b386ae996a', | ||||
|         'info_dict': { | ||||
|             'id': '1165642', | ||||
|             'ext': 'mp4', | ||||
| @@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor): | ||||
|             'upload_date': '20141120', | ||||
|             'duration': 3960, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'rtmpdump required', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,77 +1,69 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FranceCultureIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.franceculture.fr/player/reecouter?play=4795174', | ||||
|         'info_dict': { | ||||
|             'id': '4795174', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Rendez-vous au pays des geeks', | ||||
|             'alt_title': 'Carnet nomade | 13-14', | ||||
|             'vcodec': 'none', | ||||
|             'uploader': 'Colette Fellous', | ||||
|             'upload_date': '20140301', | ||||
|             'duration': 3601, | ||||
|             'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$', | ||||
|             'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...', | ||||
|             'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats', | ||||
|             'timestamp': 1393700400, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         baseurl = mobj.group('baseurl') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         params_code = self._search_regex( | ||||
|             r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />", | ||||
|             webpage, 'parameter code') | ||||
|         params = compat_parse_qs(params_code) | ||||
|         video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0]) | ||||
|  | ||||
|         video_path = self._search_regex( | ||||
|             r'<a id="player".*?href="([^"]+)"', webpage, 'video path') | ||||
|         video_url = compat_urlparse.urljoin(url, video_path) | ||||
|         timestamp = int_or_none(self._search_regex( | ||||
|             r'<a id="player".*?data-date="([0-9]+)"', | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|         thumbnail = self._search_regex( | ||||
|             r'<a id="player".*?>\s+<img src="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title') | ||||
|             r'<span class="title-diffusion">(.*?)</span>', webpage, 'title') | ||||
|         alt_title = self._html_search_regex( | ||||
|             r'<span class="title">(.*?)</span>', | ||||
|             webpage, 'alt_title', fatal=False) | ||||
|         description = self._html_search_regex( | ||||
|             r'<span class="description">(.*?)</span>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         uploader = self._html_search_regex( | ||||
|             r'(?s)<div id="emission".*?<span class="author">(.*?)</span>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         thumbnail_part = self._html_search_regex( | ||||
|             r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage, | ||||
|             'thumbnail', fatal=False) | ||||
|         if thumbnail_part is None: | ||||
|             thumbnail = None | ||||
|         else: | ||||
|             thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part) | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<p class="desc">(.*?)</p>', webpage, 'description') | ||||
|  | ||||
|         info = json.loads(params['infoData'][0])[0] | ||||
|         duration = info.get('media_length') | ||||
|         upload_date_candidate = info.get('media_section5') | ||||
|         upload_date = ( | ||||
|             upload_date_candidate | ||||
|             if (upload_date_candidate is not None and | ||||
|                 re.match(r'[0-9]{8}$', upload_date_candidate)) | ||||
|             else None) | ||||
|             webpage, 'uploader', default=None) | ||||
|         vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'vcodec': 'none' if video_url.lower().endswith('.mp3') else None, | ||||
|             'duration': duration, | ||||
|             'vcodec': vcodec, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'timestamp': timestamp, | ||||
|             'title': title, | ||||
|             'alt_title': alt_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|         } | ||||
|   | ||||
| @@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor): | ||||
|  | ||||
| class GenerationQuoiIE(InfoExtractor): | ||||
|     IE_NAME = 'france2.fr:generation-quoi' | ||||
|     _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' | ||||
|     _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', | ||||
|         'file': 'k7FJX8VBcvvLmX4wA5Q.mp4', | ||||
|         'info_dict': { | ||||
|             'id': 'k7FJX8VBcvvLmX4wA5Q', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Génération Quoi - Garde à Vous', | ||||
|             'uploader': 'Génération Quoi', | ||||
|         }, | ||||
| @@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor): | ||||
|             # It uses Dailymotion | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Only available from France', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) | ||||
|         info_json = self._download_webpage(info_url, name) | ||||
|         display_id = self._match_id(url) | ||||
|         info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id) | ||||
|         info_json = self._download_webpage(info_url, display_id) | ||||
|         info = json.loads(info_json) | ||||
|         return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], | ||||
|                                ie='Dailymotion') | ||||
|   | ||||
| @@ -1,41 +1,67 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GamekingsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', | ||||
|         # MD5 is flaky, seems to change regularly | ||||
|         # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', | ||||
|         'info_dict': { | ||||
|             'id': '20130811', | ||||
|             'id': 'phoenix-wright-ace-attorney-dual-destinies-review', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', | ||||
|             'description': 'md5:36fd701e57e8c15ac8682a2374c99731', | ||||
|         } | ||||
|     } | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         # vimeo video | ||||
|         'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/', | ||||
|         'md5': '12bf04dfd238e70058046937657ea68d', | ||||
|         'info_dict': { | ||||
|             'id': 'the-legend-of-zelda-majoras-mask', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Legend of Zelda: Majora’s Mask', | ||||
|             'description': 'md5:9917825fe0e9f4057601fe1e38860de3', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_url = self._og_search_video_url(webpage) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video = re.search(r'[0-9]+', video_url) | ||||
|         video_id = video.group(0) | ||||
|         playlist_id = self._search_regex( | ||||
|             r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id') | ||||
|  | ||||
|         # Todo: add medium format | ||||
|         video_url = video_url.replace(video_id, 'large/' + video_id) | ||||
|         playlist = self._download_xml( | ||||
|             'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id, | ||||
|             video_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'jwplayer': 'http://rss.jwpcdn.com/' | ||||
|         } | ||||
|  | ||||
|         item = playlist.find('./channel/item') | ||||
|  | ||||
|         thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail') | ||||
|         video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'ext': 'mp4', | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import remove_end | ||||
|  | ||||
|  | ||||
| class GDCVaultIE(InfoExtractor): | ||||
| @@ -68,7 +69,9 @@ class GDCVaultIE(InfoExtractor): | ||||
|         akami_url = xml_description.find('./metadata/akamaiHost').text | ||||
|         slide_video_path = xml_description.find('./metadata/slideVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + slide_video_path, | ||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st', | ||||
|             'play_path': remove_end(slide_video_path, '.flv'), | ||||
|             'ext': 'flv', | ||||
|             'format_note': 'slide deck video', | ||||
|             'quality': -2, | ||||
|             'preference': -2, | ||||
| @@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor): | ||||
|         }) | ||||
|         speaker_video_path = xml_description.find('./metadata/speakerVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + speaker_video_path, | ||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st', | ||||
|             'play_path': remove_end(speaker_video_path, '.flv'), | ||||
|             'ext': 'flv', | ||||
|             'format_note': 'speaker video', | ||||
|             'quality': -1, | ||||
|             'preference': -1, | ||||
|   | ||||
| @@ -140,6 +140,19 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'add_ie': ['Ooyala'], | ||||
|         }, | ||||
|         # multiple ooyala embeds on SBN network websites | ||||
|         { | ||||
|             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', | ||||
|             'info_dict': { | ||||
|                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok', | ||||
|                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com', | ||||
|             }, | ||||
|             'playlist_mincount': 3, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'add_ie': ['Ooyala'], | ||||
|         }, | ||||
|         # google redirect | ||||
|         { | ||||
|             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', | ||||
| @@ -362,7 +375,7 @@ class GenericIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', | ||||
|                 'title': 'Zero Punctuation', | ||||
|                 'description': 're:' | ||||
|                 'description': 're:.*groundbreaking video review series.*' | ||||
|             }, | ||||
|             'playlist_mincount': 11, | ||||
|         }, | ||||
| @@ -460,6 +473,7 @@ class GenericIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', | ||||
|             'info_dict': { | ||||
|                 'id': '1986', | ||||
|                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', | ||||
|             }, | ||||
|             'playlist_mincount': 2, | ||||
| @@ -489,6 +503,60 @@ class GenericIE(InfoExtractor): | ||||
|                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', | ||||
|             } | ||||
|         }, | ||||
|         # Cinerama player | ||||
|         { | ||||
|             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm', | ||||
|             'info_dict': { | ||||
|                 'id': '730m_DandD_1901_512k', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'www.abc.net.au', | ||||
|                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015', | ||||
|             } | ||||
|         }, | ||||
|         # embedded viddler video | ||||
|         { | ||||
|             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597', | ||||
|             'info_dict': { | ||||
|                 'id': '4d03aad9', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'deadspin', | ||||
|                 'title': 'WALL-TO-GORTAT', | ||||
|                 'timestamp': 1422285291, | ||||
|                 'upload_date': '20150126', | ||||
|             }, | ||||
|             'add_ie': ['Viddler'], | ||||
|         }, | ||||
|         # jwplayer YouTube | ||||
|         { | ||||
|             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/', | ||||
|             'info_dict': { | ||||
|                 'id': 'Mrj4DVp2zeA', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20150212', | ||||
|                 'uploader': 'The National Archives UK', | ||||
|                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', | ||||
|                 'uploader_id': 'NationalArchives08', | ||||
|                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', | ||||
|             }, | ||||
|         }, | ||||
|         # rtl.nl embed | ||||
|         { | ||||
|             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', | ||||
|             'playlist_mincount': 5, | ||||
|             'info_dict': { | ||||
|                 'id': 'aanslagen-kopenhagen', | ||||
|                 'title': 'Aanslagen Kopenhagen | RTL Nieuws', | ||||
|             } | ||||
|         }, | ||||
|         # Zapiks embed | ||||
|         { | ||||
|             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', | ||||
|             'info_dict': { | ||||
|                 'id': '118046', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -733,6 +801,13 @@ class GenericIE(InfoExtractor): | ||||
|                 'entries': entries, | ||||
|             } | ||||
|  | ||||
|         # Look for embedded rtl.nl player | ||||
|         matches = re.findall( | ||||
|             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"', | ||||
|             webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches, ie='RtlNl') | ||||
|  | ||||
|         # Look for embedded (iframe) Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) | ||||
| @@ -740,7 +815,6 @@ class GenericIE(InfoExtractor): | ||||
|             player_url = unescapeHTML(mobj.group('url')) | ||||
|             surl = smuggle_url(player_url, {'Referer': url}) | ||||
|             return self.url_result(surl) | ||||
|  | ||||
|         # Look for embedded (swf embed) Vimeo player | ||||
|         mobj = re.search( | ||||
|             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) | ||||
| @@ -850,12 +924,28 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url')) | ||||
|  | ||||
|         # Look for embedded Viddler player | ||||
|         mobj = re.search( | ||||
|             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url')) | ||||
|  | ||||
|         # Look for Ooyala videos | ||||
|         mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or | ||||
|                 re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)) | ||||
|         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or | ||||
|                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or | ||||
|                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)) | ||||
|         if mobj is not None: | ||||
|             return OoyalaIE._build_url_result(mobj.group('ec')) | ||||
|  | ||||
|         # Look for multiple Ooyala embeds on SBN network websites | ||||
|         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) | ||||
|         if mobj is not None: | ||||
|             embeds = self._parse_json(mobj.group(1), video_id, fatal=False) | ||||
|             if embeds: | ||||
|                 return _playlist_from_matches( | ||||
|                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') | ||||
|  | ||||
|         # Look for Aparat videos | ||||
|         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) | ||||
|         if mobj is not None: | ||||
| @@ -982,7 +1072,12 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded sbs.com.au player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1', | ||||
|             r'''(?x) | ||||
|             (?: | ||||
|                 <meta\s+property="og:video"\s+content=| | ||||
|                 <iframe[^>]+?src= | ||||
|             ) | ||||
|             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'SBS') | ||||
| @@ -1012,7 +1107,15 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Livestream') | ||||
|  | ||||
|         # Look for Zapiks embed | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Zapiks') | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|             vpath = compat_urlparse.urlparse(vurl).path | ||||
|             vext = determine_ext(vpath) | ||||
|             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') | ||||
| @@ -1030,7 +1133,8 @@ class GenericIE(InfoExtractor): | ||||
|                     JWPlayerOptions| | ||||
|                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup | ||||
|                 ) | ||||
|                 .*?file\s*:\s*["\'](.*?)["\']''', webpage)) | ||||
|                 .*? | ||||
|                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) | ||||
|         if not found: | ||||
|             # Broaden the search a little bit | ||||
|             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) | ||||
| @@ -1043,9 +1147,13 @@ class GenericIE(InfoExtractor): | ||||
|             found = filter_video(re.findall(r'''(?xs) | ||||
|                 flowplayer\("[^"]+",\s* | ||||
|                     \{[^}]+?\}\s*, | ||||
|                     \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s* | ||||
|                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s* | ||||
|                         ["']?url["']?\s*:\s*["']([^"']+)["'] | ||||
|             ''', webpage)) | ||||
|         if not found: | ||||
|             # Cinerama player | ||||
|             found = re.findall( | ||||
|                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage) | ||||
|         if not found: | ||||
|             # Try to find twitter cards info | ||||
|             found = filter_video(re.findall( | ||||
|   | ||||
| @@ -70,6 +70,19 @@ class GloboIE(InfoExtractor): | ||||
|                 'like_count': int, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', | ||||
|             'md5': 'c1defca721ce25b2354e927d3e4b3dec', | ||||
|             'info_dict': { | ||||
|                 'id': '3928201', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas', | ||||
|                 'duration': 1472.906, | ||||
|                 'uploader': 'Canal Brasil', | ||||
|                 'uploader_id': 705, | ||||
|                 'like_count': int, | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     class MD5(): | ||||
| @@ -381,11 +394,16 @@ class GloboIE(InfoExtractor): | ||||
|             signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding) | ||||
|             signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5 | ||||
|  | ||||
|             formats.append({ | ||||
|                 'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'), | ||||
|                 'format_id': resource_id, | ||||
|                 'height': resource['height'] | ||||
|             }) | ||||
|             resource_url = resource['url'] | ||||
|             signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') | ||||
|             if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4')) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': signed_url, | ||||
|                     'format_id': resource_id, | ||||
|                     'height': resource.get('height'), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor): | ||||
|         duration = parse_duration(self._html_search_regex( | ||||
|             r'<span class="duration">\s*-?\s*(.*?)</span>', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|         family_friendly = self._html_search_meta( | ||||
|             'isFamilyFriendly', webpage, default='false') | ||||
|  | ||||
|         flashvars = compat_parse_qs(self._html_search_regex( | ||||
|             r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"', | ||||
| @@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'age_limit': 0 if family_friendly == 'true' else 18, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -83,7 +83,7 @@ class GroovesharkIE(InfoExtractor): | ||||
|         return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None)) | ||||
|  | ||||
|     def _transform_bootstrap(self, js): | ||||
|         return re.split('(?m)^\s*try\s*{', js)[0] \ | ||||
|         return re.split('(?m)^\s*try\s*\{', js)[0] \ | ||||
|                  .split(' = ', 1)[1].strip().rstrip(';') | ||||
|  | ||||
|     def _transform_meta(self, js): | ||||
|   | ||||
							
								
								
									
										46
									
								
								youtube_dl/extractor/historicfilms.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								youtube_dl/extractor/historicfilms.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_duration | ||||
|  | ||||
|  | ||||
| class HistoricFilmsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.historicfilms.com/tapes/4728', | ||||
|         'md5': 'd4a437aec45d8d796a38a215db064e9a', | ||||
|         'info_dict': { | ||||
|             'id': '4728', | ||||
|             'ext': 'mov', | ||||
|             'title': 'Historic Films: GP-7', | ||||
|             'description': 'md5:1a86a0f3ac54024e419aba97210d959a', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 2096, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         tape_id = self._search_regex( | ||||
|             r'class="tapeId">([^<]+)<', webpage, 'tape id') | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._html_search_meta( | ||||
|             'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage) | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration')) | ||||
|  | ||||
|         video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|         } | ||||
							
								
								
									
										31
									
								
								youtube_dl/extractor/history.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								youtube_dl/extractor/history.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
|  | ||||
|  | ||||
| class HistoryIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', | ||||
|         'md5': '6fe632d033c92aa10b8d4a9be047a7c5', | ||||
|         'info_dict': { | ||||
|             'id': 'bLx5Dv5Aka1G', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Bet You Didn't Know: Valentine's Day", | ||||
|             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, | ||||
|             webpage, 'video url') | ||||
|  | ||||
|         return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}})) | ||||
| @@ -34,6 +34,9 @@ class IGNIE(InfoExtractor): | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', | ||||
|             'info_dict': { | ||||
|                 'id': '100-little-things-in-gta-5-that-will-blow-your-mind', | ||||
|             }, | ||||
|             'playlist': [ | ||||
|                 { | ||||
|                     'info_dict': { | ||||
|   | ||||
							
								
								
									
										97
									
								
								youtube_dl/extractor/imgur.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								youtube_dl/extractor/imgur.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ImgurIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://i.imgur.com/A61SaA1.gifv', | ||||
|         'info_dict': { | ||||
|             'id': 'A61SaA1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', | ||||
|             'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://imgur.com/A61SaA1', | ||||
|         'info_dict': { | ||||
|             'id': 'A61SaA1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', | ||||
|             'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         width = int_or_none(self._search_regex( | ||||
|             r'<param name="width" value="([0-9]+)"', | ||||
|             webpage, 'width', fatal=False)) | ||||
|         height = int_or_none(self._search_regex( | ||||
|             r'<param name="height" value="([0-9]+)"', | ||||
|             webpage, 'height', fatal=False)) | ||||
|  | ||||
|         video_elements = self._search_regex( | ||||
|             r'(?s)<div class="video-elements">(.*?)</div>', | ||||
|             webpage, 'video elements', default=None) | ||||
|         if not video_elements: | ||||
|             raise ExtractorError( | ||||
|                 'No sources found for video %s. Maybe an image?' % video_id, | ||||
|                 expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): | ||||
|             formats.append({ | ||||
|                 'format_id': m.group('type').partition('/')[2], | ||||
|                 'url': self._proto_relative_url(m.group('src')), | ||||
|                 'ext': mimetype2ext(m.group('type')), | ||||
|                 'acodec': 'none', | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'youtube-dl (like wget)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         gif_json = self._search_regex( | ||||
|             r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', | ||||
|             webpage, 'GIF code', fatal=False) | ||||
|         if gif_json: | ||||
|             gifd = self._parse_json( | ||||
|                 gif_json, video_id, transform_source=js_to_json) | ||||
|             formats.append({ | ||||
|                 'format_id': 'gif', | ||||
|                 'preference': -10, | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'ext': 'gif', | ||||
|                 'acodec': 'none', | ||||
|                 'vcodec': 'gif', | ||||
|                 'container': 'gif', | ||||
|                 'url': self._proto_relative_url(gifd['gifUrl']), | ||||
|                 'filesize': gifd.get('size'), | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'youtube-dl (like wget)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': self._og_search_title(webpage), | ||||
|         } | ||||
| @@ -16,7 +16,7 @@ from ..utils import ( | ||||
| class IviIE(InfoExtractor): | ||||
|     IE_DESC = 'ivi.ru' | ||||
|     IE_NAME = 'ivi' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # Single movie | ||||
| @@ -63,29 +63,34 @@ class IviIE(InfoExtractor): | ||||
|         return int(m.group('commentcount')) if m is not None else 0 | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         api_url = 'http://api.digitalaccess.ru/api/json/' | ||||
|  | ||||
|         data = {'method': 'da.content.get', | ||||
|                 'params': [video_id, {'site': 's183', | ||||
|                                       'referrer': 'http://www.ivi.ru/watch/%s' % video_id, | ||||
|                                       'contentid': video_id | ||||
|                                       } | ||||
|                            ] | ||||
|         data = { | ||||
|             'method': 'da.content.get', | ||||
|             'params': [ | ||||
|                 video_id, { | ||||
|                     'site': 's183', | ||||
|                     'referrer': 'http://www.ivi.ru/watch/%s' % video_id, | ||||
|                     'contentid': video_id | ||||
|                 } | ||||
|             ] | ||||
|         } | ||||
|  | ||||
|         request = compat_urllib_request.Request(api_url, json.dumps(data)) | ||||
|  | ||||
|         video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON') | ||||
|         video_json_page = self._download_webpage( | ||||
|             request, video_id, 'Downloading video JSON') | ||||
|         video_json = json.loads(video_json_page) | ||||
|  | ||||
|         if 'error' in video_json: | ||||
|             error = video_json['error'] | ||||
|             if error['origin'] == 'NoRedisValidData': | ||||
|                 raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
|             raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True) | ||||
|             raise ExtractorError( | ||||
|                 'Unable to download video %s: %s' % (video_id, error['message']), | ||||
|                 expected=True) | ||||
|  | ||||
|         result = video_json['result'] | ||||
|  | ||||
|   | ||||
| @@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor): | ||||
|             r'comment_count\s*=\s*\'([^\']+)\';', | ||||
|             webpage, 'comment_count', fatal=False) | ||||
|  | ||||
|         family_friendly = self._html_search_meta( | ||||
|             'isFamilyFriendly', webpage, 'age limit', fatal=False) | ||||
|  | ||||
|         content_url = self._html_search_meta( | ||||
|             'contentURL', webpage, 'content URL', fatal=False) | ||||
|         ext = determine_ext(content_url, 'mp4') | ||||
| @@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor): | ||||
|             'duration': duration, | ||||
|             'view_count': int_or_none(view_count), | ||||
|             'comment_count': int_or_none(comment_count), | ||||
|             'age_limit': 18 if family_friendly == 'False' else 0, | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -13,17 +13,17 @@ class KankanIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', | ||||
|         'file': '48863.flv', | ||||
|         'md5': '29aca1e47ae68fc28804aca89f29507e', | ||||
|         'info_dict': { | ||||
|             'id': '48863', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Ready To Go', | ||||
|         }, | ||||
|         'skip': 'Only available from China', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title') | ||||
|   | ||||
| @@ -7,10 +7,6 @@ from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_decrypt_text | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', | ||||
|         'file': '1214711.mp4', | ||||
|         'md5': '6e297b7e789329923fcf83abb67c9289', | ||||
|         'info_dict': { | ||||
|             'id': '1214711', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Petite Asian Lady Mai Playing In Bathtub', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor): | ||||
|             embedded_url = mobj.group(1) | ||||
|             return self.url_result(embedded_url) | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title') | ||||
|         video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, 'video_url')) | ||||
|         if 'encrypted=true' in webpage: | ||||
|             password = self._html_search_regex(r'video_title=(.+?)&', webpage, 'password') | ||||
|             video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h1 [^>]*>([^<]+)', webpage, 'title') | ||||
|         video_url = self._html_search_regex( | ||||
|             r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL') | ||||
|         path = compat_urllib_parse_urlparse(video_url).path | ||||
|         extension = os.path.splitext(path)[1][1:] | ||||
|         format = path.split('/')[4].split('_')[:2] | ||||
|   | ||||
| @@ -2,18 +2,17 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
|     js_to_json, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class KrasViewIE(InfoExtractor): | ||||
|     IE_DESC = 'Красвью' | ||||
|     _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://krasview.ru/video/512228', | ||||
| @@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         flashvars = json.loads(self._search_regex( | ||||
|             r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars')) | ||||
|         flashvars = json.loads(js_to_json(self._search_regex( | ||||
|             r'video_Init\(({.+?})', webpage, 'flashvars'))) | ||||
|  | ||||
|         video_url = flashvars['url'] | ||||
|         title = unescapeHTML(flashvars['title']) | ||||
|         description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None)) | ||||
|         thumbnail = flashvars['image'] | ||||
|         duration = int(flashvars['duration']) | ||||
|         filesize = int(flashvars['size']) | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage, default=None) | ||||
|         thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage) | ||||
|         duration = int_or_none(flashvars.get('duration')) | ||||
|         width = int_or_none(self._og_search_property('video:width', webpage, 'video width')) | ||||
|         height = int_or_none(self._og_search_property('video:height', webpage, 'video height')) | ||||
|  | ||||
| @@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor): | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'filesize': filesize, | ||||
|             'width': width, | ||||
|             'height': height, | ||||
|         } | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
| @@ -20,9 +18,10 @@ class LA7IE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.la7.tv/richplayer/?assetid=50355319', | ||||
|         'file': '50355319.mp4', | ||||
|         'md5': 'ec7d1f0224d20ba293ab56cf2259651f', | ||||
|         'info_dict': { | ||||
|             'id': '50355319', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'IL DIVO', | ||||
|             'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci', | ||||
|             'duration': 6254, | ||||
| @@ -31,9 +30,7 @@ class LA7IE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id | ||||
|         doc = self._download_xml(xml_url, video_id) | ||||
|  | ||||
|   | ||||
| @@ -8,20 +8,20 @@ from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class LiveLeakIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.liveleak.com/view?i=757_1364311680', | ||||
|         'md5': '0813c2430bea7a46bf13acf3406992f4', | ||||
|         'md5': '50f79e05ba149149c1b4ea961223d5b3', | ||||
|         'info_dict': { | ||||
|             'id': '757_1364311680', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'flv', | ||||
|             'description': 'extremely bad day for this guy..!', | ||||
|             'uploader': 'ljfriel2', | ||||
|             'title': 'Most unlucky car accident' | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.liveleak.com/view?i=f93_1390833151', | ||||
|         'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf', | ||||
|         'md5': 'b13a29626183c9d33944e6a04f41aafc', | ||||
|         'info_dict': { | ||||
|             'id': 'f93_1390833151', | ||||
|             'ext': 'mp4', | ||||
| @@ -43,8 +43,7 @@ class LiveLeakIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip() | ||||
| @@ -81,9 +80,19 @@ class LiveLeakIE(InfoExtractor): | ||||
|         sources = json.loads(sources_json) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%s' % i, | ||||
|             'format_note': s.get('label'), | ||||
|             'url': s['file'], | ||||
|         } for s in sources] | ||||
|         } for i, s in enumerate(sources)] | ||||
|         for i, s in enumerate(sources): | ||||
|             orig_url = s['file'].replace('.h264_base.mp4', '') | ||||
|             if s['file'] != orig_url: | ||||
|                 formats.append({ | ||||
|                     'format_id': 'original-%s' % i, | ||||
|                     'format_note': s.get('label'), | ||||
|                     'url': orig_url, | ||||
|                     'preference': 1, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor): | ||||
|         'url': 'http://new.livestream.com/tedx/cityenglish', | ||||
|         'info_dict': { | ||||
|             'title': 'TEDCity2.0 (English)', | ||||
|             'id': '2245590', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     }, { | ||||
| @@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor): | ||||
|                   if is_relevant(video_data, video_id)] | ||||
|         if video_id is None: | ||||
|             # This is an event page: | ||||
|             return self.playlist_result(videos, info['id'], info['full_name']) | ||||
|             return self.playlist_result( | ||||
|                 videos, '%s' % info['id'], info['full_name']) | ||||
|         else: | ||||
|             if not videos: | ||||
|                 raise ExtractorError('Cannot find video %s' % video_id) | ||||
|   | ||||
| @@ -6,13 +6,12 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LnkGoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi\-video/(?P<show>[^/]+)/ziurek\-(?P<display_id>[A-Za-z0-9\-]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162', | ||||
|         'info_dict': { | ||||
| @@ -51,8 +50,7 @@ class LnkGoIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             url, display_id, 'Downloading player webpage') | ||||
| @@ -61,6 +59,8 @@ class LnkGoIE(InfoExtractor): | ||||
|             r'data-ep="([^"]+)"', webpage, 'video ID') | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False)) | ||||
|  | ||||
|         thumbnail_w = int_or_none( | ||||
|             self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False)) | ||||
| @@ -75,39 +75,28 @@ class LnkGoIE(InfoExtractor): | ||||
|                 'height': thumbnail_h, | ||||
|             }) | ||||
|  | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'class="meta-item\sair-time">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False)) | ||||
|         duration = int_or_none(self._search_regex( | ||||
|             r'VideoDuration = "([^"]+)"', webpage, 'duration', fatal=False)) | ||||
|         config = self._parse_json(self._search_regex( | ||||
|             r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id) | ||||
|  | ||||
|         pg_rating = self._search_regex( | ||||
|             r'pgrating="([^"]+)"', webpage, 'PG rating', fatal=False, default='') | ||||
|         age_limit = self._AGE_LIMITS.get(pg_rating.upper(), 0) | ||||
|         if config.get('pGeo'): | ||||
|             self.report_warning( | ||||
|                 'This content might not be available in your country due to copyright reasons') | ||||
|  | ||||
|         sources_js = self._search_regex( | ||||
|             r'(?s)sources:\s(\[.*?\]),', webpage, 'sources') | ||||
|         sources = self._parse_json( | ||||
|             sources_js, video_id, transform_source=js_to_json) | ||||
|         formats = [{ | ||||
|             'format_id': 'hls', | ||||
|             'ext': 'mp4', | ||||
|             'url': config['EpisodeVideoLink_HLS'], | ||||
|         }] | ||||
|  | ||||
|         formats = [] | ||||
|         for source in sources: | ||||
|             if source.get('provider') == 'rtmp': | ||||
|                 m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', source['file']) | ||||
|                 if not m: | ||||
|                     continue | ||||
|                 formats.append({ | ||||
|                     'format_id': 'rtmp', | ||||
|                     'ext': 'flv', | ||||
|                     'url': m.group('url'), | ||||
|                     'play_path': m.group('play_path'), | ||||
|                     'page_url': url, | ||||
|                 }) | ||||
|             elif source.get('file').endswith('.m3u8'): | ||||
|                 formats.append({ | ||||
|                     'format_id': 'hls', | ||||
|                     'ext': source.get('type', 'mp4'), | ||||
|                     'url': source['file'], | ||||
|                 }) | ||||
|         m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink']) | ||||
|         if m: | ||||
|             formats.append({ | ||||
|                 'format_id': 'rtmp', | ||||
|                 'ext': 'flv', | ||||
|                 'url': m.group('url'), | ||||
|                 'play_path': m.group('play_path'), | ||||
|                 'page_url': url, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -117,8 +106,8 @@ class LnkGoIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnails': [thumbnail], | ||||
|             'duration': duration, | ||||
|             'duration': int_or_none(config.get('VideoTime')), | ||||
|             'description': description, | ||||
|             'age_limit': age_limit, | ||||
|             'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0), | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|   | ||||
| @@ -85,6 +85,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|                 } for format_id, video_url in prioritized_streams['0'].items() | ||||
|             ]) | ||||
|  | ||||
|         self._check_formats(formats, video_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
| @@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', | ||||
|         'file': '2450.m4v', | ||||
|         'md5': '8649b8ea684b6666b4c5be736ecddc61', | ||||
|         'info_dict': { | ||||
|             'id': '2450', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Crow', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage( | ||||
|             url, video_id, 'Downloading trailer page') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading trailer page') | ||||
|  | ||||
|         if re.search(r'>Missing Media<', webpage) is not None: | ||||
|             raise ExtractorError('Trailer %s does not exist' % video_id, expected=True) | ||||
|         if '>Missing Media<' in webpage: | ||||
|             raise ExtractorError( | ||||
|                 'Trailer %s does not exist' % video_id, expected=True) | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title') | ||||
|   | ||||
| @@ -9,7 +9,7 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     HEADRequest, | ||||
|     int_or_none, | ||||
|     str_to_int, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
| @@ -18,7 +18,7 @@ class MixcloudIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' | ||||
|     IE_NAME = 'mixcloud' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', | ||||
|         'info_dict': { | ||||
|             'id': 'dholbach-cryptkeeper', | ||||
| @@ -33,7 +33,20 @@ class MixcloudIE(InfoExtractor): | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', | ||||
|         'info_dict': { | ||||
|             'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', | ||||
|             'ext': 'm4a', | ||||
|             'title': 'Electric Relaxation vol. 3', | ||||
|             'description': 'md5:2b8aec6adce69f9d41724647c65875e8', | ||||
|             'uploader': 'Daniel Drumz', | ||||
|             'uploader_id': 'gillespeterson', | ||||
|             'thumbnail': 're:https?://.*\.jpg', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _get_url(self, track_id, template_url): | ||||
|         server_count = 30 | ||||
| @@ -60,7 +73,7 @@ class MixcloudIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, track_id) | ||||
|  | ||||
|         preview_url = self._search_regex( | ||||
|             r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') | ||||
|             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') | ||||
|         song_url = preview_url.replace('/previews/', '/c/originals/') | ||||
|         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) | ||||
|         final_song_url = self._get_url(track_id, template_url) | ||||
| @@ -85,15 +98,17 @@ class MixcloudIE(InfoExtractor): | ||||
|         uploader_id = self._search_regex( | ||||
|             r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) | ||||
|         description = self._og_search_description(webpage) | ||||
|         like_count = int_or_none(self._search_regex( | ||||
|             r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', | ||||
|         like_count = str_to_int(self._search_regex( | ||||
|             [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', | ||||
|              r'/favorites/?">([0-9]+)<'], | ||||
|             webpage, 'like count', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | ||||
|              r'/listeners/?">([0-9,.]+)</a>'], | ||||
|             webpage, 'play count', fatal=False)) | ||||
|         timestamp = parse_iso8601(self._search_regex( | ||||
|             r'<time itemprop="dateCreated" datetime="([^"]+)">', | ||||
|             webpage, 'upload date')) | ||||
|             webpage, 'upload date', default=None)) | ||||
|  | ||||
|         return { | ||||
|             'id': track_id, | ||||
|   | ||||
| @@ -1,21 +1,19 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class MporaIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' | ||||
|     _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' | ||||
|     IE_NAME = 'MPORA' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de', | ||||
|         'file': 'AAdo8okx4wiz.mp4', | ||||
|         'md5': 'a7a228473eedd3be741397cf452932eb', | ||||
|         'info_dict': { | ||||
|             'id': 'AAdo8okx4wiz', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Katy Curd -  Winter in the Forest', | ||||
|             'duration': 416, | ||||
|             'uploader': 'Peter Newman Media', | ||||
| @@ -23,14 +21,12 @@ class MporaIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         data_json = self._search_regex( | ||||
|             r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') | ||||
|  | ||||
|         data = json.loads(data_json) | ||||
|         data = self._parse_json(data_json, video_id) | ||||
|  | ||||
|         uploader = data['info_overlay'].get('username') | ||||
|         duration = data['video']['duration'] // 1000 | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user