Compare commits
	
		
			400 Commits
		
	
	
		
			2015.08.23
			...
			2015.10.06
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 86be82610c | ||
|   | 4810c48d6d | ||
|   | c4af7684d8 | ||
|   | fcc2546269 | ||
|   | 40fbb05e1c | ||
|   | dc5756fd77 | ||
|   | 41db733308 | ||
|   | 0bf219889e | ||
|   | f2a7ed77ef | ||
|   | 4853eb63fe | ||
|   | 5820c4a29e | ||
|   | 7fd4ed9939 | ||
|   | 88c86d211b | ||
|   | 5d84b79a30 | ||
|   | 140ac73965 | ||
|   | 2a27e66234 | ||
|   | e759a00119 | ||
|   | 9d5332518c | ||
|   | 90ab741e90 | ||
|   | 96229998c2 | ||
|   | 0659dfccfe | ||
|   | 9c544e2537 | ||
|   | d7fc56318b | ||
|   | 4bba371644 | ||
|   | ef5acfe32d | ||
|   | 85557f635a | ||
|   | 60d23e5e59 | ||
|   | 97d5bfcba6 | ||
|   | bad84757eb | ||
|   | 13118a50b8 | ||
|   | 5495937f46 | ||
|   | b203095d4c | ||
|   | f3b098fb90 | ||
|   | af17794c65 | ||
|   | 3bb3f04108 | ||
|   | 59a9efe85b | ||
|   | 0facd2af3e | ||
|   | 7d0ada5ff9 | ||
|   | 44451f22d5 | ||
|   | 06c6efa970 | ||
|   | e5851b963a | ||
|   | 4de6131090 | ||
|   | 3a1341a7bc | ||
|   | c78e48177c | ||
|   | 6edaa0e25b | ||
|   | fb97809e64 | ||
|   | 0c996b9f48 | ||
|   | acfb717a18 | ||
|   | 647eab4541 | ||
|   | 1e5bcdec02 | ||
|   | e7d8e98a9f | ||
|   | 2b3f951a2e | ||
|   | 6751a1284d | ||
|   | b83831df1f | ||
|   | f540b93706 | ||
|   | 8466336104 | ||
|   | f88f1b40ce | ||
|   | 386a7b52d5 | ||
|   | 2e885de796 | ||
|   | 687c04cbb8 | ||
|   | 40c931de4b | ||
|   | 93bc7ef165 | ||
|   | ee2d190253 | ||
|   | aedb930cfc | ||
|   | c596ce91cd | ||
|   | 8a64969404 | ||
|   | c254f75bbb | ||
|   | 86692c019c | ||
|   | 1ab1c4ef57 | ||
|   | 926fb62eec | ||
|   | 817690ff73 | ||
|   | 98e1c935a1 | ||
|   | f30e9976d6 | ||
|   | 80e98aed69 | ||
|   | 6a24cb3d22 | ||
|   | e13b9e7885 | ||
|   | dd467d33d0 | ||
|   | c6b8f4d0c9 | ||
|   | 95240b8093 | ||
|   | 2f962d0a91 | ||
|   | 3c63e1bb57 | ||
|   | c471b34575 | ||
|   | d045f0bdb7 | ||
|   | 22becac4bd | ||
|   | 9d632b1b27 | ||
|   | 95c5e10103 | ||
|   | a5d09d684e | ||
|   | 8aab976bbd | ||
|   | 26c6d1922e | ||
|   | cd1bb54990 | ||
|   | d4cd06138c | ||
|   | 961c5cbf17 | ||
|   | b65e5bb72f | ||
|   | 54914380c0 | ||
|   | 26ccc68bed | ||
|   | ee3d5a6d47 | ||
|   | 46fde8a1a2 | ||
|   | fe1d858e35 | ||
|   | fc42bc6ec9 | ||
|   | fe6ad195ae | ||
|   | 7193650641 | ||
|   | 5db34f680f | ||
|   | a82ba8d0ce | ||
|   | 3706fb5dc8 | ||
|   | 08bea4adde | ||
|   | 4c917d0314 | ||
|   | 4866b72eb2 | ||
|   | 2d00be0477 | ||
|   | 3d09aa4c82 | ||
|   | c44c7895b8 | ||
|   | 8de28761c4 | ||
|   | 711762f0b7 | ||
|   | 5773803961 | ||
|   | 140359fc2c | ||
|   | 8ddf48d59f | ||
|   | 2e40a12225 | ||
|   | dade7245af | ||
|   | 1f9fb20fcd | ||
|   | 0940c5b4c6 | ||
|   | 42ca72dff3 | ||
|   | 2949a6cda9 | ||
|   | 882fc9052e | ||
|   | 9b166fc1f8 | ||
|   | d4364f30bd | ||
|   | 857421024d | ||
|   | 80faa7a152 | ||
|   | 545a23f11b | ||
|   | caedb0721e | ||
|   | 47024eb564 | ||
|   | 9c58885c70 | ||
|   | 9fbd4b35a2 | ||
|   | 05b476a270 | ||
|   | 4395ca2e04 | ||
|   | 19f93d906e | ||
|   | 57565375c8 | ||
|   | eb11cbe867 | ||
|   | f102819463 | ||
|   | b942db3dc3 | ||
|   | 78f9fb902b | ||
|   | d8fef8faac | ||
|   | 8ea6bd2802 | ||
|   | c659022b5c | ||
|   | 8ca2e93e1a | ||
|   | 5600e214c3 | ||
|   | 6400f8ec0f | ||
|   | c3a4e2ec40 | ||
|   | e28c794699 | ||
|   | da9f180835 | ||
|   | 6b8ce312e3 | ||
|   | de3fc356e1 | ||
|   | d0fed4ac02 | ||
|   | 7ce50a355c | ||
|   | 9612f23399 | ||
|   | cccedc1aa4 | ||
|   | c430802e32 | ||
|   | cb4e421901 | ||
|   | 8e97596b7b | ||
|   | 92085e7099 | ||
|   | c6aa838b51 | ||
|   | 9f5e8d16b3 | ||
|   | 82c06a40ac | ||
|   | 4423eba49b | ||
|   | 5b4c54631a | ||
|   | 5a1a2e9454 | ||
|   | f005f96ea5 | ||
|   | 5e39123b3b | ||
|   | 393ca8c94d | ||
|   | f817adc468 | ||
|   | 6c91a5a7f5 | ||
|   | 749b09616d | ||
|   | 5de5ab89b4 | ||
|   | 1d67c96640 | ||
|   | d1c694ea4a | ||
|   | 06368a232a | ||
|   | 8a7bbd1606 | ||
|   | 131d05033b | ||
|   | 1806a75415 | ||
|   | 659ffe204c | ||
|   | 4647fd8910 | ||
|   | d492dad8f4 | ||
|   | 3368d70dce | ||
|   | 0e1b2566ff | ||
|   | 369e60162e | ||
|   | d5e7657fe2 | ||
|   | f84ce1ebaf | ||
|   | 12bc242944 | ||
|   | 88060cce10 | ||
|   | 272e4db5c7 | ||
|   | 6e21cc3b67 | ||
|   | 0391bc8176 | ||
|   | 3b9264a049 | ||
|   | 2b3c254678 | ||
|   | 287be8c615 | ||
|   | 953fed280f | ||
|   | e2ff3df314 | ||
|   | 31208a07c2 | ||
|   | ac7a1b0dfb | ||
|   | c246773599 | ||
|   | 25cd56a715 | ||
|   | 82c18e2a53 | ||
|   | d5d38d16ae | ||
|   | e1cbf33573 | ||
|   | 2ffe3bc14b | ||
|   | d5867276a9 | ||
|   | f665ef8fc5 | ||
|   | b264c21302 | ||
|   | 349b3a2ea0 | ||
|   | 87813a8570 | ||
|   | aab135516b | ||
|   | 141ba36996 | ||
|   | d434ca5448 | ||
|   | 94e507aea7 | ||
|   | 3ebc121293 | ||
|   | 41ebd6530b | ||
|   | 2ec7b7b79b | ||
|   | 60ed60353b | ||
|   | 586f1cc532 | ||
|   | 73eb13dfc7 | ||
|   | 1721fef28b | ||
|   | 364ca0582e | ||
|   | 133a2b4ac2 | ||
|   | d85187eb74 | ||
|   | cc1ac11017 | ||
|   | 73f536439e | ||
|   | b17e7d9a9b | ||
|   | 2f29b758e0 | ||
|   | 482aa3fecc | ||
|   | d9c19db340 | ||
|   | 6c4d243de5 | ||
|   | d1561ef777 | ||
|   | 7b4137c351 | ||
|   | 1072336249 | ||
|   | 75bb5c7028 | ||
|   | 376e1ad081 | ||
|   | b58a22b963 | ||
|   | 47004d9579 | ||
|   | 12810c9cd3 | ||
|   | 7a459170fa | ||
|   | 3cf0df568a | ||
|   | b88ebd472e | ||
|   | 64997815c4 | ||
|   | 3ecc527209 | ||
|   | b1b7d1ffba | ||
|   | 4003bd82b0 | ||
|   | 8801255d7d | ||
|   | 3b18f539a7 | ||
|   | c67a055d16 | ||
|   | bc973e06d0 | ||
|   | aeb3c8a0e8 | ||
|   | cf33a47df0 | ||
|   | daeb0f04cd | ||
|   | 97243fe395 | ||
|   | 9dbdb65abe | ||
|   | 9af461de35 | ||
|   | 4d71e200c6 | ||
|   | 8e0bdabed2 | ||
|   | bca553caac | ||
|   | a2f42a3baf | ||
|   | 7465222a9c | ||
|   | e28034c5ac | ||
|   | 12bbd32ad0 | ||
|   | 266e466ee4 | ||
|   | cf83f532ae | ||
|   | cd019668dc | ||
|   | 515fc8776f | ||
|   | c7c0996d8c | ||
|   | b3e64671cc | ||
|   | 4abe214499 | ||
|   | e94cb5ae7e | ||
|   | e213c98df1 | ||
|   | 1639282434 | ||
|   | be0e5dbd83 | ||
|   | ad72917274 | ||
|   | 6a3f4c3f82 | ||
|   | a6420bf50c | ||
|   | eb387896e9 | ||
|   | f43c163158 | ||
|   | 673bf566fc | ||
|   | f95c5e1218 | ||
|   | f33f32f159 | ||
|   | 8df5ae15d1 | ||
|   | 75b399f455 | ||
|   | 12439dd5ec | ||
|   | 3513d41436 | ||
|   | cab792abe5 | ||
|   | 8870358b1b | ||
|   | ee087c79ad | ||
|   | 51f579b635 | ||
|   | c23c3d7d7d | ||
|   | 4abf617b9c | ||
|   | 3026164b16 | ||
|   | 9dd73ef4a4 | ||
|   | 75c72a1e67 | ||
|   | 08354db47b | ||
|   | 027eb5a6b0 | ||
|   | f71264490c | ||
|   | 6270239a6d | ||
|   | 1195a38f46 | ||
|   | 66e289bab4 | ||
|   | 52c6f26cab | ||
|   | dc534b674f | ||
|   | f30c2e8e98 | ||
|   | c482b3c69a | ||
|   | 266b0ad676 | ||
|   | 87f70ab39d | ||
|   | 8e636da499 | ||
|   | 22889ab175 | ||
|   | 5d2354f177 | ||
|   | a41fb80ce1 | ||
|   | 2e2575e213 | ||
|   | 26c61e0809 | ||
|   | e7a8c3032d | ||
|   | 725d1c58aa | ||
|   | bd6742137f | ||
|   | e8dcfa3d69 | ||
|   | 88720ed09b | ||
|   | 1e804244d0 | ||
|   | 198492bbf0 | ||
|   | 8f9d522f62 | ||
|   | cbae233aba | ||
|   | b17ca9c945 | ||
|   | ebf4ca39ba | ||
|   | e5e78797e6 | ||
|   | 080997b808 | ||
|   | 77306e8b97 | ||
|   | 6917d2a2f0 | ||
|   | 36c15522c1 | ||
|   | 804c343a4f | ||
|   | cd5d75427e | ||
|   | 5ddc127da6 | ||
|   | f859695b49 | ||
|   | cb3d2eb9e9 | ||
|   | 33eae08f04 | ||
|   | aa3f98677d | ||
|   | fffccaaf41 | ||
|   | cdc8d0c373 | ||
|   | d14f0c45fc | ||
|   | 39955b0451 | ||
|   | 52dfb7ffe2 | ||
|   | 93462856e1 | ||
|   | 615f155a3a | ||
|   | fcd9e423ec | ||
|   | db8f2bfd99 | ||
|   | 55801fc76e | ||
|   | d3d89c3256 | ||
|   | 8875b3d572 | ||
|   | aabc2be693 | ||
|   | c9afb51cea | ||
|   | c0a656876c | ||
|   | 17a647630b | ||
|   | c88e118b3c | ||
|   | ae6a802106 | ||
|   | b184f94413 | ||
|   | ee3ec091f4 | ||
|   | ef49b59053 | ||
|   | 1f8125805e | ||
|   | efd712c69b | ||
|   | 109a4156e1 | ||
|   | 678d33295b | ||
|   | 5e58956d0a | ||
|   | e276fd2cb3 | ||
|   | 9b22cb10c4 | ||
|   | 8ca31a0e05 | ||
|   | 20149a5da1 | ||
|   | 054d43bb11 | ||
|   | 65488b820c | ||
|   | c3c9f87954 | ||
|   | 56f447be9f | ||
|   | 79fa9db0da | ||
|   | 071c10137b | ||
|   | a4962b80d6 | ||
|   | 5307c33232 | ||
|   | 1b660cce12 | ||
|   | 8df8c278b6 | ||
|   | d7e8264517 | ||
|   | f11c316347 | ||
|   | f62e02c24f | ||
|   | 70113c38c9 | ||
|   | 3d8132f5e2 | ||
|   | 39affb5aa4 | ||
|   | a882c5f474 | ||
|   | 61a7ff1622 | ||
|   | 42e7373bd3 | ||
|   | e269d3ae7d | ||
|   | e7ddaef5bd | ||
|   | 62984e4584 | ||
|   | 3c53455d15 | ||
|   | bbb43a39fd | ||
|   | 43e7d3c945 | ||
|   | 2f72e83bbd | ||
|   | 57179b4ca1 | ||
|   | 4bc8eec4eb | ||
|   | baf510bf8c | ||
|   | 6d53cdd6ce | ||
|   | ebbf078c7d | ||
|   | 95e431e9ec | ||
|   | eba470f2f2 | ||
|   | 061f62da54 | ||
|   | bfed4813b2 | ||
|   | 233c1c0e76 | 
| @@ -5,6 +5,7 @@ python: | ||||
|   - "3.2" | ||||
|   - "3.3" | ||||
|   - "3.4" | ||||
|   - "3.5" | ||||
| sudo: false | ||||
| script: nosetests test --verbose | ||||
| notifications: | ||||
|   | ||||
							
								
								
									
										4
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -140,3 +140,7 @@ Behrouz Abbasi | ||||
| ngld | ||||
| nyuszika7h | ||||
| Shaun Walbridge | ||||
| Lee Jenkins | ||||
| Anssi Hannula | ||||
| Lukáš Lalinský | ||||
| Qijiang Fan | ||||
|   | ||||
| @@ -16,15 +16,15 @@ So please elaborate on what feature you are requesting, or what bug you want to | ||||
|  | ||||
| If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. | ||||
|  | ||||
| For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. | ||||
| For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. | ||||
|  | ||||
| If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). | ||||
| If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). | ||||
|  | ||||
| **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. | ||||
|  | ||||
| ###  Are you using the latest version? | ||||
|  | ||||
| Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. | ||||
| Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. | ||||
|  | ||||
| ###  Is the issue already documented? | ||||
|  | ||||
| @@ -125,7 +125,7 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
|     ``` | ||||
| 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). | ||||
| 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. | ||||
| 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want. | ||||
| 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want. | ||||
| 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). | ||||
| 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: | ||||
|  | ||||
|   | ||||
							
								
								
									
										421
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										421
									
								
								README.md
									
									
									
									
									
								
							| @@ -9,6 +9,7 @@ youtube-dl - download videos from youtube.com or other video platforms | ||||
| - [VIDEO SELECTION](#video-selection) | ||||
| - [FAQ](#faq) | ||||
| - [DEVELOPER INSTRUCTIONS](#developer-instructions) | ||||
| - [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl) | ||||
| - [BUGS](#bugs) | ||||
| - [COPYRIGHT](#copyright) | ||||
|  | ||||
| @@ -34,7 +35,7 @@ You can also use pip: | ||||
|  | ||||
|     sudo pip install youtube-dl | ||||
|  | ||||
| Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html . | ||||
| Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html . | ||||
|  | ||||
| # DESCRIPTION | ||||
| **youtube-dl** is a small command-line program to download videos from | ||||
| @@ -48,110 +49,220 @@ which means you can modify it, redistribute it or use it however you like. | ||||
| # OPTIONS | ||||
|     -h, --help                       Print this help text and exit | ||||
|     --version                        Print program version and exit | ||||
|     -U, --update                     Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) | ||||
|     -i, --ignore-errors              Continue on download errors, for example to skip unavailable videos in a playlist | ||||
|     --abort-on-error                 Abort downloading of further videos (in the playlist or the command line) if an error occurs | ||||
|     -U, --update                     Update this program to latest version. Make | ||||
|                                      sure that you have sufficient permissions | ||||
|                                      (run with sudo if needed) | ||||
|     -i, --ignore-errors              Continue on download errors, for example to | ||||
|                                      skip unavailable videos in a playlist | ||||
|     --abort-on-error                 Abort downloading of further videos (in the | ||||
|                                      playlist or the command line) if an error | ||||
|                                      occurs | ||||
|     --dump-user-agent                Display the current browser identification | ||||
|     --list-extractors                List all supported extractors | ||||
|     --extractor-descriptions         Output descriptions of all supported extractors | ||||
|     --force-generic-extractor        Force extraction to use the generic extractor | ||||
|     --default-search PREFIX          Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". | ||||
|                                      Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The | ||||
|                                      default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching. | ||||
|     --ignore-config                  Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration | ||||
|                                      in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows) | ||||
|     --flat-playlist                  Do not extract the videos of a playlist, only list them. | ||||
|     --extractor-descriptions         Output descriptions of all supported | ||||
|                                      extractors | ||||
|     --force-generic-extractor        Force extraction to use the generic | ||||
|                                      extractor | ||||
|     --default-search PREFIX          Use this prefix for unqualified URLs. For | ||||
|                                      example "gvsearch2:" downloads two videos | ||||
|                                      from google videos for youtube-dl "large | ||||
|                                      apple". Use the value "auto" to let | ||||
|                                      youtube-dl guess ("auto_warning" to emit a | ||||
|                                      warning when guessing). "error" just throws | ||||
|                                      an error. The default value "fixup_error" | ||||
|                                      repairs broken URLs, but emits an error if | ||||
|                                      this is not possible instead of searching. | ||||
|     --ignore-config                  Do not read configuration files. When given | ||||
|                                      in the global configuration file /etc | ||||
|                                      /youtube-dl.conf: Do not read the user | ||||
|                                      configuration in ~/.config/youtube- | ||||
|                                      dl/config (%APPDATA%/youtube-dl/config.txt | ||||
|                                      on Windows) | ||||
|     --flat-playlist                  Do not extract the videos of a playlist, | ||||
|                                      only list them. | ||||
|     --no-color                       Do not emit color codes in output | ||||
|  | ||||
| ## Network Options: | ||||
|     --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection | ||||
|     --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in | ||||
|                                      an empty string (--proxy "") for direct | ||||
|                                      connection | ||||
|     --socket-timeout SECONDS         Time to wait before giving up, in seconds | ||||
|     --source-address IP              Client-side IP address to bind to (experimental) | ||||
|     -4, --force-ipv4                 Make all connections via IPv4 (experimental) | ||||
|     -6, --force-ipv6                 Make all connections via IPv6 (experimental) | ||||
|     --cn-verification-proxy URL      Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is | ||||
|                                      not present) is used for the actual downloading. (experimental) | ||||
|     --source-address IP              Client-side IP address to bind to | ||||
|                                      (experimental) | ||||
|     -4, --force-ipv4                 Make all connections via IPv4 | ||||
|                                      (experimental) | ||||
|     -6, --force-ipv6                 Make all connections via IPv6 | ||||
|                                      (experimental) | ||||
|     --cn-verification-proxy URL      Use this proxy to verify the IP address for | ||||
|                                      some Chinese sites. The default proxy | ||||
|                                      specified by --proxy (or none, if the | ||||
|                                      options is not present) is used for the | ||||
|                                      actual downloading. (experimental) | ||||
|  | ||||
| ## Video Selection: | ||||
|     --playlist-start NUMBER          Playlist video to start at (default is 1) | ||||
|     --playlist-end NUMBER            Playlist video to end at (default is last) | ||||
|     --playlist-items ITEM_SPEC       Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" | ||||
|                                      if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will | ||||
|                                      download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13. | ||||
|     --match-title REGEX              Download only matching titles (regex or caseless sub-string) | ||||
|     --reject-title REGEX             Skip download for matching titles (regex or caseless sub-string) | ||||
|     --playlist-items ITEM_SPEC       Playlist video items to download. Specify | ||||
|                                      indices of the videos in the playlist | ||||
|                                      separated by commas like: "--playlist-items | ||||
|                                      1,2,5,8" if you want to download videos | ||||
|                                      indexed 1, 2, 5, 8 in the playlist. You can | ||||
|                                      specify range: "--playlist-items | ||||
|                                      1-3,7,10-13", it will download the videos | ||||
|                                      at index 1, 2, 3, 7, 10, 11, 12 and 13. | ||||
|     --match-title REGEX              Download only matching titles (regex or | ||||
|                                      caseless sub-string) | ||||
|     --reject-title REGEX             Skip download for matching titles (regex or | ||||
|                                      caseless sub-string) | ||||
|     --max-downloads NUMBER           Abort after downloading NUMBER files | ||||
|     --min-filesize SIZE              Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) | ||||
|     --max-filesize SIZE              Do not download any videos larger than SIZE (e.g. 50k or 44.6m) | ||||
|     --min-filesize SIZE              Do not download any videos smaller than | ||||
|                                      SIZE (e.g. 50k or 44.6m) | ||||
|     --max-filesize SIZE              Do not download any videos larger than SIZE | ||||
|                                      (e.g. 50k or 44.6m) | ||||
|     --date DATE                      Download only videos uploaded in this date | ||||
|     --datebefore DATE                Download only videos uploaded on or before this date (i.e. inclusive) | ||||
|     --dateafter DATE                 Download only videos uploaded on or after this date (i.e. inclusive) | ||||
|     --min-views COUNT                Do not download any videos with less than COUNT views | ||||
|     --max-views COUNT                Do not download any videos with more than COUNT views | ||||
|     --match-filter FILTER            Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present, | ||||
|                                      !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against | ||||
|                                      a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the | ||||
|                                      operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike | ||||
|                                      functionality is not available at the given service), but who also have a description, use  --match-filter "like_count > 100 & | ||||
|     --datebefore DATE                Download only videos uploaded on or before | ||||
|                                      this date (i.e. inclusive) | ||||
|     --dateafter DATE                 Download only videos uploaded on or after | ||||
|                                      this date (i.e. inclusive) | ||||
|     --min-views COUNT                Do not download any videos with less than | ||||
|                                      COUNT views | ||||
|     --max-views COUNT                Do not download any videos with more than | ||||
|                                      COUNT views | ||||
|     --match-filter FILTER            Generic video filter (experimental). | ||||
|                                      Specify any key (see help for -o for a list | ||||
|                                      of available keys) to match if the key is | ||||
|                                      present, !key to check if the key is not | ||||
|                                      present,key > NUMBER (like "comment_count > | ||||
|                                      12", also works with >=, <, <=, !=, =) to | ||||
|                                      compare against a number, and & to require | ||||
|                                      multiple matches. Values which are not | ||||
|                                      known are excluded unless you put a | ||||
|                                      question mark (?) after the operator.For | ||||
|                                      example, to only match videos that have | ||||
|                                      been liked more than 100 times and disliked | ||||
|                                      less than 50 times (or the dislike | ||||
|                                      functionality is not available at the given | ||||
|                                      service), but who also have a description, | ||||
|                                      use --match-filter "like_count > 100 & | ||||
|                                      dislike_count <? 50 & description" . | ||||
|     --no-playlist                    Download only the video, if the URL refers to a video and a playlist. | ||||
|     --yes-playlist                   Download the playlist, if the URL refers to a video and a playlist. | ||||
|     --age-limit YEARS                Download only videos suitable for the given age | ||||
|     --download-archive FILE          Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it. | ||||
|     --include-ads                    Download advertisements as well (experimental) | ||||
|     --no-playlist                    Download only the video, if the URL refers | ||||
|                                      to a video and a playlist. | ||||
|     --yes-playlist                   Download the playlist, if the URL refers to | ||||
|                                      a video and a playlist. | ||||
|     --age-limit YEARS                Download only videos suitable for the given | ||||
|                                      age | ||||
|     --download-archive FILE          Download only videos not listed in the | ||||
|                                      archive file. Record the IDs of all | ||||
|                                      downloaded videos in it. | ||||
|     --include-ads                    Download advertisements as well | ||||
|                                      (experimental) | ||||
|  | ||||
| ## Download Options: | ||||
|     -r, --rate-limit LIMIT           Maximum download rate in bytes per second (e.g. 50K or 4.2M) | ||||
|     -R, --retries RETRIES            Number of retries (default is 10), or "infinite". | ||||
|     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K) (default is 1024) | ||||
|     --no-resize-buffer               Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE. | ||||
|     -r, --rate-limit LIMIT           Maximum download rate in bytes per second | ||||
|                                      (e.g. 50K or 4.2M) | ||||
|     -R, --retries RETRIES            Number of retries (default is 10), or | ||||
|                                      "infinite". | ||||
|     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K) | ||||
|                                      (default is 1024) | ||||
|     --no-resize-buffer               Do not automatically adjust the buffer | ||||
|                                      size. By default, the buffer size is | ||||
|                                      automatically resized from an initial value | ||||
|                                      of SIZE. | ||||
|     --playlist-reverse               Download playlist videos in reverse order | ||||
|     --xattr-set-filesize             Set file xattribute ytdl.filesize with expected filesize (experimental) | ||||
|     --hls-prefer-native              Use the native HLS downloader instead of ffmpeg (experimental) | ||||
|     --external-downloader COMMAND    Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget | ||||
|     --external-downloader-args ARGS  Give these arguments to the external downloader | ||||
|     --xattr-set-filesize             Set file xattribute ytdl.filesize with | ||||
|                                      expected filesize (experimental) | ||||
|     --hls-prefer-native              Use the native HLS downloader instead of | ||||
|                                      ffmpeg (experimental) | ||||
|     --external-downloader COMMAND    Use the specified external downloader. | ||||
|                                      Currently supports | ||||
|                                      aria2c,axel,curl,httpie,wget | ||||
|     --external-downloader-args ARGS  Give these arguments to the external | ||||
|                                      downloader | ||||
|  | ||||
| ## Filesystem Options: | ||||
|     -a, --batch-file FILE            File containing URLs to download ('-' for stdin) | ||||
|     -a, --batch-file FILE            File containing URLs to download ('-' for | ||||
|                                      stdin) | ||||
|     --id                             Use only video ID in file name | ||||
|     -o, --output TEMPLATE            Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader | ||||
|                                      nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for | ||||
|                                      the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"), | ||||
|                                      %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, | ||||
|                                      %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, | ||||
|                                      %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. | ||||
|                                      %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. | ||||
|                                      Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' . | ||||
|     --autonumber-size NUMBER         Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given | ||||
|     --restrict-filenames             Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames | ||||
|     -A, --auto-number                [deprecated; use  -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000 | ||||
|     -t, --title                      [deprecated] Use title in file name (default) | ||||
|     -o, --output TEMPLATE            Output filename template. Use %(title)s to | ||||
|                                      get the title, %(uploader)s for the | ||||
|                                      uploader name, %(uploader_id)s for the | ||||
|                                      uploader nickname if different, | ||||
|                                      %(autonumber)s to get an automatically | ||||
|                                      incremented number, %(ext)s for the | ||||
|                                      filename extension, %(format)s for the | ||||
|                                      format description (like "22 - 1280x720" or | ||||
|                                      "HD"), %(format_id)s for the unique id of | ||||
|                                      the format (like YouTube's itags: "137"), | ||||
|                                      %(upload_date)s for the upload date | ||||
|                                      (YYYYMMDD), %(extractor)s for the provider | ||||
|                                      (youtube, metacafe, etc), %(id)s for the | ||||
|                                      video id, %(playlist_title)s, | ||||
|                                      %(playlist_id)s, or %(playlist)s (=title if | ||||
|                                      present, ID otherwise) for the playlist the | ||||
|                                      video is in, %(playlist_index)s for the | ||||
|                                      position in the playlist. %(height)s and | ||||
|                                      %(width)s for the width and height of the | ||||
|                                      video format. %(resolution)s for a textual | ||||
|                                      description of the resolution of the video | ||||
|                                      format. %% for a literal percent. Use - to | ||||
|                                      output to stdout. Can also be used to | ||||
|                                      download to a different directory, for | ||||
|                                      example with -o '/my/downloads/%(uploader)s | ||||
|                                      /%(title)s-%(id)s.%(ext)s' . | ||||
|     --autonumber-size NUMBER         Specify the number of digits in | ||||
|                                      %(autonumber)s when it is present in output | ||||
|                                      filename template or --auto-number option | ||||
|                                      is given | ||||
|     --restrict-filenames             Restrict filenames to only ASCII | ||||
|                                      characters, and avoid "&" and spaces in | ||||
|                                      filenames | ||||
|     -A, --auto-number                [deprecated; use -o | ||||
|                                      "%(autonumber)s-%(title)s.%(ext)s" ] Number | ||||
|                                      downloaded files starting from 00000 | ||||
|     -t, --title                      [deprecated] Use title in file name | ||||
|                                      (default) | ||||
|     -l, --literal                    [deprecated] Alias of --title | ||||
|     -w, --no-overwrites              Do not overwrite files | ||||
|     -c, --continue                   Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible. | ||||
|     --no-continue                    Do not resume partially downloaded files (restart from beginning) | ||||
|     --no-part                        Do not use .part files - write directly into output file | ||||
|     --no-mtime                       Do not use the Last-modified header to set the file modification time | ||||
|     --write-description              Write video description to a .description file | ||||
|     -c, --continue                   Force resume of partially downloaded files. | ||||
|                                      By default, youtube-dl will resume | ||||
|                                      downloads if possible. | ||||
|     --no-continue                    Do not resume partially downloaded files | ||||
|                                      (restart from beginning) | ||||
|     --no-part                        Do not use .part files - write directly | ||||
|                                      into output file | ||||
|     --no-mtime                       Do not use the Last-modified header to set | ||||
|                                      the file modification time | ||||
|     --write-description              Write video description to a .description | ||||
|                                      file | ||||
|     --write-info-json                Write video metadata to a .info.json file | ||||
|     --write-annotations              Write video annotations to a .annotations.xml file | ||||
|     --load-info FILE                 JSON file containing the video information (created with the "--write-info-json" option) | ||||
|     --cookies FILE                   File to read cookies from and dump cookie jar in | ||||
|     --cache-dir DIR                  Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl | ||||
|                                      or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may | ||||
|                                      change. | ||||
|     --write-annotations              Write video annotations to a | ||||
|                                      .annotations.xml file | ||||
|     --load-info FILE                 JSON file containing the video information | ||||
|                                      (created with the "--write-info-json" | ||||
|                                      option) | ||||
|     --cookies FILE                   File to read cookies from and dump cookie | ||||
|                                      jar in | ||||
|     --cache-dir DIR                  Location in the filesystem where youtube-dl | ||||
|                                      can store some downloaded information | ||||
|                                      permanently. By default $XDG_CACHE_HOME | ||||
|                                      /youtube-dl or ~/.cache/youtube-dl . At the | ||||
|                                      moment, only YouTube player files (for | ||||
|                                      videos with obfuscated signatures) are | ||||
|                                      cached, but that may change. | ||||
|     --no-cache-dir                   Disable filesystem caching | ||||
|     --rm-cache-dir                   Delete all filesystem cache files | ||||
|  | ||||
| ## Thumbnail images: | ||||
|     --write-thumbnail                Write thumbnail image to disk | ||||
|     --write-all-thumbnails           Write all thumbnail image formats to disk | ||||
|     --list-thumbnails                Simulate and list all available thumbnail formats | ||||
|     --list-thumbnails                Simulate and list all available thumbnail | ||||
|                                      formats | ||||
|  | ||||
| ## Verbosity / Simulation Options: | ||||
|     -q, --quiet                      Activate quiet mode | ||||
|     --no-warnings                    Ignore warnings | ||||
|     -s, --simulate                   Do not download the video and do not write anything to disk | ||||
|     -s, --simulate                   Do not download the video and do not write | ||||
|                                      anything to disk | ||||
|     --skip-download                  Do not download the video | ||||
|     -g, --get-url                    Simulate, quiet but print URL | ||||
|     -e, --get-title                  Simulate, quiet but print title | ||||
| @@ -161,78 +272,135 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --get-duration                   Simulate, quiet but print video length | ||||
|     --get-filename                   Simulate, quiet but print output filename | ||||
|     --get-format                     Simulate, quiet but print output format | ||||
|     -j, --dump-json                  Simulate, quiet but print JSON information. See --output for a description of available keys. | ||||
|     -J, --dump-single-json           Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist | ||||
|                                      information in a single line. | ||||
|     --print-json                     Be quiet and print the video information as JSON (video is still being downloaded). | ||||
|     -j, --dump-json                  Simulate, quiet but print JSON information. | ||||
|                                      See --output for a description of available | ||||
|                                      keys. | ||||
|     -J, --dump-single-json           Simulate, quiet but print JSON information | ||||
|                                      for each command-line argument. If the URL | ||||
|                                      refers to a playlist, dump the whole | ||||
|                                      playlist information in a single line. | ||||
|     --print-json                     Be quiet and print the video information as | ||||
|                                      JSON (video is still being downloaded). | ||||
|     --newline                        Output progress bar as new lines | ||||
|     --no-progress                    Do not print progress bar | ||||
|     --console-title                  Display progress in console titlebar | ||||
|     -v, --verbose                    Print various debugging information | ||||
|     --dump-pages                     Print downloaded pages encoded using base64 to debug problems (very verbose) | ||||
|     --write-pages                    Write downloaded intermediary pages to files in the current directory to debug problems | ||||
|     --dump-pages                     Print downloaded pages encoded using base64 | ||||
|                                      to debug problems (very verbose) | ||||
|     --write-pages                    Write downloaded intermediary pages to | ||||
|                                      files in the current directory to debug | ||||
|                                      problems | ||||
|     --print-traffic                  Display sent and read HTTP traffic | ||||
|     -C, --call-home                  Contact the youtube-dl server for debugging | ||||
|     --no-call-home                   Do NOT contact the youtube-dl server for debugging | ||||
|     --no-call-home                   Do NOT contact the youtube-dl server for | ||||
|                                      debugging | ||||
|  | ||||
| ## Workarounds: | ||||
|     --encoding ENCODING              Force the specified encoding (experimental) | ||||
|     --no-check-certificate           Suppress HTTPS certificate validation | ||||
|     --prefer-insecure                Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube) | ||||
|     --prefer-insecure                Use an unencrypted connection to retrieve | ||||
|                                      information about the video. (Currently | ||||
|                                      supported only for YouTube) | ||||
|     --user-agent UA                  Specify a custom user agent | ||||
|     --referer URL                    Specify a custom referer, use if the video access is restricted to one domain | ||||
|     --add-header FIELD:VALUE         Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times | ||||
|     --bidi-workaround                Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH | ||||
|     --sleep-interval SECONDS         Number of seconds to sleep before each download. | ||||
|     --referer URL                    Specify a custom referer, use if the video | ||||
|                                      access is restricted to one domain | ||||
|     --add-header FIELD:VALUE         Specify a custom HTTP header and its value, | ||||
|                                      separated by a colon ':'. You can use this | ||||
|                                      option multiple times | ||||
|     --bidi-workaround                Work around terminals that lack | ||||
|                                      bidirectional text support. Requires bidiv | ||||
|                                      or fribidi executable in PATH | ||||
|     --sleep-interval SECONDS         Number of seconds to sleep before each | ||||
|                                      download. | ||||
|  | ||||
| ## Video Format Options: | ||||
|     -f, --format FORMAT              Video format code, see the "FORMAT SELECTION" for all the info | ||||
|     -f, --format FORMAT              Video format code, see the "FORMAT | ||||
|                                      SELECTION" for all the info | ||||
|     --all-formats                    Download all available video formats | ||||
|     --prefer-free-formats            Prefer free video formats unless a specific one is requested | ||||
|     --prefer-free-formats            Prefer free video formats unless a specific | ||||
|                                      one is requested | ||||
|     -F, --list-formats               List all available formats | ||||
|     --youtube-skip-dash-manifest     Do not download the DASH manifests and related data on YouTube videos | ||||
|     --merge-output-format FORMAT     If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no | ||||
|                                      merge is required | ||||
|     --youtube-skip-dash-manifest     Do not download the DASH manifests and | ||||
|                                      related data on YouTube videos | ||||
|     --merge-output-format FORMAT     If a merge is required (e.g. | ||||
|                                      bestvideo+bestaudio), output to given | ||||
|                                      container format. One of mkv, mp4, ogg, | ||||
|                                      webm, flv. Ignored if no merge is required | ||||
|  | ||||
| ## Subtitle Options: | ||||
|     --write-sub                      Write subtitle file | ||||
|     --write-auto-sub                 Write automatic subtitle file (YouTube only) | ||||
|     --all-subs                       Download all the available subtitles of the video | ||||
|     --write-auto-sub                 Write automatic subtitle file (YouTube | ||||
|                                      only) | ||||
|     --all-subs                       Download all the available subtitles of the | ||||
|                                      video | ||||
|     --list-subs                      List all available subtitles for the video | ||||
|     --sub-format FORMAT              Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best" | ||||
|     --sub-lang LANGS                 Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' | ||||
|     --sub-format FORMAT              Subtitle format, accepts formats | ||||
|                                      preference, for example: "srt" or | ||||
|                                      "ass/srt/best" | ||||
|     --sub-lang LANGS                 Languages of the subtitles to download | ||||
|                                      (optional) separated by commas, use IETF | ||||
|                                      language tags like 'en,pt' | ||||
|  | ||||
| ## Authentication Options: | ||||
|     -u, --username USERNAME          Login with this account ID | ||||
|     -p, --password PASSWORD          Account password. If this option is left out, youtube-dl will ask interactively. | ||||
|     -p, --password PASSWORD          Account password. If this option is left | ||||
|                                      out, youtube-dl will ask interactively. | ||||
|     -2, --twofactor TWOFACTOR        Two-factor auth code | ||||
|     -n, --netrc                      Use .netrc authentication data | ||||
|     --video-password PASSWORD        Video password (vimeo, smotri) | ||||
|     --video-password PASSWORD        Video password (vimeo, smotri, youku) | ||||
|  | ||||
| ## Post-processing Options: | ||||
|     -x, --extract-audio              Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) | ||||
|     --audio-format FORMAT            Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default | ||||
|     --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default | ||||
|                                      5) | ||||
|     --recode-video FORMAT            Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi) | ||||
|     -x, --extract-audio              Convert video files to audio-only files | ||||
|                                      (requires ffmpeg or avconv and ffprobe or | ||||
|                                      avprobe) | ||||
|     --audio-format FORMAT            Specify audio format: "best", "aac", | ||||
|                                      "vorbis", "mp3", "m4a", "opus", or "wav"; | ||||
|                                      "best" by default | ||||
|     --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert | ||||
|                                      a value between 0 (better) and 9 (worse) | ||||
|                                      for VBR or a specific bitrate like 128K | ||||
|                                      (default 5) | ||||
|     --recode-video FORMAT            Encode the video to another format if | ||||
|                                      necessary (currently supported: | ||||
|                                      mp4|flv|ogg|webm|mkv|avi) | ||||
|     --postprocessor-args ARGS        Give these arguments to the postprocessor | ||||
|     -k, --keep-video                 Keep the video file on disk after the post-processing; the video is erased by default | ||||
|     --no-post-overwrites             Do not overwrite post-processed files; the post-processed files are overwritten by default | ||||
|     --embed-subs                     Embed subtitles in the video (only for mkv and mp4 videos) | ||||
|     -k, --keep-video                 Keep the video file on disk after the post- | ||||
|                                      processing; the video is erased by default | ||||
|     --no-post-overwrites             Do not overwrite post-processed files; the | ||||
|                                      post-processed files are overwritten by | ||||
|                                      default | ||||
|     --embed-subs                     Embed subtitles in the video (only for mkv | ||||
|                                      and mp4 videos) | ||||
|     --embed-thumbnail                Embed thumbnail in the audio as cover art | ||||
|     --add-metadata                   Write metadata to the video file | ||||
|     --metadata-from-title FORMAT     Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed | ||||
|                                      parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s - | ||||
|                                      %(title)s" matches a title like "Coldplay - Paradise" | ||||
|     --xattrs                         Write metadata to the video file's xattrs (using dublin core and xdg standards) | ||||
|     --fixup POLICY                   Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default; | ||||
|                                      fix file if we can, warn otherwise) | ||||
|     --prefer-avconv                  Prefer avconv over ffmpeg for running the postprocessors (default) | ||||
|     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the postprocessors | ||||
|     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory. | ||||
|     --exec CMD                       Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm | ||||
|                                      {}' | ||||
|     --convert-subtitles FORMAT       Convert the subtitles to other format (currently supported: srt|ass|vtt) | ||||
|     --metadata-from-title FORMAT     Parse additional metadata like song title / | ||||
|                                      artist from the video title. The format | ||||
|                                      syntax is the same as --output, the parsed | ||||
|                                      parameters replace existing values. | ||||
|                                      Additional templates: %(album)s, | ||||
|                                      %(artist)s. Example: --metadata-from-title | ||||
|                                      "%(artist)s - %(title)s" matches a title | ||||
|                                      like "Coldplay - Paradise" | ||||
|     --xattrs                         Write metadata to the video file's xattrs | ||||
|                                      (using dublin core and xdg standards) | ||||
|     --fixup POLICY                   Automatically correct known faults of the | ||||
|                                      file. One of never (do nothing), warn (only | ||||
|                                      emit a warning), detect_or_warn (the | ||||
|                                      default; fix file if we can, warn | ||||
|                                      otherwise) | ||||
|     --prefer-avconv                  Prefer avconv over ffmpeg for running the | ||||
|                                      postprocessors (default) | ||||
|     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the | ||||
|                                      postprocessors | ||||
|     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary; | ||||
|                                      either the path to the binary or its | ||||
|                                      containing directory. | ||||
|     --exec CMD                       Execute a command on the file after | ||||
|                                      downloading, similar to find's -exec | ||||
|                                      syntax. Example: --exec 'adb push {} | ||||
|                                      /sdcard/Music/ && rm {}' | ||||
|     --convert-subtitles FORMAT       Convert the subtitles to other format | ||||
|                                      (currently supported: srt|ass|vtt) | ||||
|  | ||||
| # CONFIGURATION | ||||
|  | ||||
| @@ -261,7 +429,7 @@ For example: | ||||
| machine youtube login myaccount@gmail.com password my_youtube_password | ||||
| machine twitch login my_twitch_account_name password my_twitch_password | ||||
| ``` | ||||
| To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration). | ||||
| To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or place it in [configuration file](#configuration). | ||||
|  | ||||
| On Windows you may also need to setup `%HOME%` environment variable manually. | ||||
|  | ||||
| @@ -277,9 +445,10 @@ The `-o` option allows users to indicate a template for the output file names. T | ||||
|  - `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4). | ||||
|  - `epoch`: The sequence will be replaced by the Unix epoch when creating the file. | ||||
|  - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. | ||||
|  - `playlist`: The name or the id of the playlist that contains the video. | ||||
|  - `playlist_index`: The index of the video in the playlist, a five-digit number. | ||||
|  - `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video. | ||||
|  - `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist. | ||||
|  - `format_id`: The sequence will be replaced by the format code specified by `--format`. | ||||
|  - `duration`: The sequence will be replaced by the length of the video in seconds. | ||||
|  | ||||
| The current default template is `%(title)s-%(id)s.%(ext)s`. | ||||
|  | ||||
| @@ -357,7 +526,7 @@ If you have installed youtube-dl with a package manager, pip, setup.py or a tarb | ||||
|  | ||||
| By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`. | ||||
|  | ||||
| ### Can you please put the -b option back? | ||||
| ### Can you please put the `-b` option back? | ||||
|  | ||||
| Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. | ||||
|  | ||||
| @@ -369,13 +538,13 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much. | ||||
|  | ||||
| Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). | ||||
|  | ||||
| ### I extracted a video URL with -g, but it does not play on another machine / in my webbrowser. | ||||
| ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. | ||||
|  | ||||
| It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies.  Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. | ||||
|  | ||||
| It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule. | ||||
|  | ||||
| Please bear in mind that some URL protocols are **not** supported by browsers out of the box, including RTMP. If you are using -g, your own downloader must support these as well. | ||||
| Please bear in mind that some URL protocols are **not** supported by browsers out of the box, including RTMP. If you are using `-g`, your own downloader must support these as well. | ||||
|  | ||||
| If you want to play the video on a machine that is not running youtube-dl, you can relay the video content from the machine that runs youtube-dl. You can use `-o -` to let youtube-dl stream a video to stdout, or simply allow the player to download the files written by youtube-dl in turn. | ||||
|  | ||||
| @@ -552,7 +721,7 @@ If you want to add support for a new site, you can follow this quick list (assum | ||||
|     ``` | ||||
| 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). | ||||
| 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. | ||||
| 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want. | ||||
| 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want. | ||||
| 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). | ||||
| 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: | ||||
|  | ||||
| @@ -641,15 +810,15 @@ So please elaborate on what feature you are requesting, or what bug you want to | ||||
|  | ||||
| If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. | ||||
|  | ||||
| For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. | ||||
| For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. | ||||
|  | ||||
| If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). | ||||
| If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). | ||||
|  | ||||
| **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. | ||||
|  | ||||
| ###  Are you using the latest version? | ||||
|  | ||||
| Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. | ||||
| Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. | ||||
|  | ||||
| ###  Is the issue already documented? | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,7 @@ import os | ||||
| from os.path import dirname as dirn | ||||
| import sys | ||||
|  | ||||
| sys.path.append(dirn(dirn((os.path.abspath(__file__))))) | ||||
| sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) | ||||
| import youtube_dl | ||||
|  | ||||
| BASH_COMPLETION_FILE = "youtube-dl.bash-completion" | ||||
|   | ||||
| @@ -6,7 +6,7 @@ import os | ||||
| from os.path import dirname as dirn | ||||
| import sys | ||||
|  | ||||
| sys.path.append(dirn(dirn((os.path.abspath(__file__))))) | ||||
| sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) | ||||
| import youtube_dl | ||||
| from youtube_dl.utils import shell_quote | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,7 @@ import os | ||||
| import textwrap | ||||
|  | ||||
| # We must be able to import youtube_dl | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) | ||||
|  | ||||
| import youtube_dl | ||||
|  | ||||
|   | ||||
| @@ -9,7 +9,7 @@ import sys | ||||
|  | ||||
| # Import youtube_dl | ||||
| ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') | ||||
| sys.path.append(ROOT_DIR) | ||||
| sys.path.insert(0, ROOT_DIR) | ||||
| import youtube_dl | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -8,6 +8,35 @@ import re | ||||
| ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||||
| README_FILE = os.path.join(ROOT_DIR, 'README.md') | ||||
|  | ||||
|  | ||||
| def filter_options(readme): | ||||
|     ret = '' | ||||
|     in_options = False | ||||
|     for line in readme.split('\n'): | ||||
|         if line.startswith('# '): | ||||
|             if line[2:].startswith('OPTIONS'): | ||||
|                 in_options = True | ||||
|             else: | ||||
|                 in_options = False | ||||
|  | ||||
|         if in_options: | ||||
|             if line.lstrip().startswith('-'): | ||||
|                 option, description = re.split(r'\s{2,}', line.lstrip()) | ||||
|                 split_option = option.split(' ') | ||||
|  | ||||
|                 if not split_option[-1].startswith('-'):  # metavar | ||||
|                     option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]]) | ||||
|  | ||||
|                 # Pandoc's definition_lists. See http://pandoc.org/README.html | ||||
|                 # for more information. | ||||
|                 ret += '\n%s\n:   %s\n' % (option, description) | ||||
|             else: | ||||
|                 ret += line.lstrip() + '\n' | ||||
|         else: | ||||
|             ret += line + '\n' | ||||
|  | ||||
|     return ret | ||||
|  | ||||
| with io.open(README_FILE, encoding='utf-8') as f: | ||||
|     readme = f.read() | ||||
|  | ||||
| @@ -26,6 +55,8 @@ readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) | ||||
| readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) | ||||
| readme = PREFIX + readme | ||||
|  | ||||
| readme = filter_options(readme) | ||||
|  | ||||
| if sys.version_info < (3, 0): | ||||
|     print(readme.encode('utf-8')) | ||||
| else: | ||||
|   | ||||
| @@ -5,7 +5,7 @@ import os | ||||
| from os.path import dirname as dirn | ||||
| import sys | ||||
|  | ||||
| sys.path.append(dirn(dirn((os.path.abspath(__file__))))) | ||||
| sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) | ||||
| import youtube_dl | ||||
|  | ||||
| ZSH_COMPLETION_FILE = "youtube-dl.zsh" | ||||
|   | ||||
| @@ -101,7 +101,7 @@ | ||||
|  - **ComCarCoff** | ||||
|  - **ComedyCentral** | ||||
|  - **ComedyCentralShows**: The Daily Show / The Colbert Report | ||||
|  - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED | ||||
|  - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED | ||||
|  - **Cracked** | ||||
|  - **Criterion** | ||||
|  - **CrooksAndLiars** | ||||
| @@ -122,7 +122,6 @@ | ||||
|  - **defense.gouv.fr** | ||||
|  - **DHM**: Filmarchiv - Deutsches Historisches Museum | ||||
|  - **Discovery** | ||||
|  - **divxstage**: DivxStage | ||||
|  - **Dotsub** | ||||
|  - **DouyuTV**: 斗鱼 | ||||
|  - **dramafever** | ||||
| @@ -151,6 +150,7 @@ | ||||
|  - **Escapist** | ||||
|  - **ESPN** (Currently broken) | ||||
|  - **EsriVideo** | ||||
|  - **Europa** | ||||
|  - **EveryonesMixtape** | ||||
|  - **exfm**: ex.fm | ||||
|  - **ExpoTV** | ||||
| @@ -159,14 +159,13 @@ | ||||
|  - **faz.net** | ||||
|  - **fc2** | ||||
|  - **fernsehkritik.tv** | ||||
|  - **fernsehkritik.tv:postecke** | ||||
|  - **Firstpost** | ||||
|  - **FiveTV** | ||||
|  - **Flickr** | ||||
|  - **Folketinget**: Folketinget (ft.dk; Danish parliament) | ||||
|  - **FootyRoom** | ||||
|  - **Foxgay** | ||||
|  - **FoxNews** | ||||
|  - **FoxNews**: Fox News and Fox Business Video | ||||
|  - **FoxSports** | ||||
|  - **france2.fr:generation-quoi** | ||||
|  - **FranceCulture** | ||||
| @@ -195,7 +194,7 @@ | ||||
|  - **GodTube** | ||||
|  - **GoldenMoustache** | ||||
|  - **Golem** | ||||
|  - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net | ||||
|  - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com | ||||
|  - **Goshgay** | ||||
|  - **Groupon** | ||||
|  - **Hark** | ||||
| @@ -209,7 +208,6 @@ | ||||
|  - **hitbox** | ||||
|  - **hitbox:live** | ||||
|  - **HornBunny** | ||||
|  - **HostingBulk** | ||||
|  - **HotNewHipHop** | ||||
|  - **Howcast** | ||||
|  - **HowStuffWorks** | ||||
| @@ -220,6 +218,7 @@ | ||||
|  - **imdb**: Internet Movie Database trailers | ||||
|  - **imdb:list**: Internet Movie Database lists | ||||
|  - **Imgur** | ||||
|  - **ImgurAlbum** | ||||
|  - **Ina** | ||||
|  - **Indavideo** | ||||
|  - **IndavideoEmbed** | ||||
| @@ -265,6 +264,9 @@ | ||||
|  - **Libsyn** | ||||
|  - **life:embed** | ||||
|  - **lifenews**: LIFE | NEWS | ||||
|  - **limelight** | ||||
|  - **limelight:channel** | ||||
|  - **limelight:channel_list** | ||||
|  - **LiveLeak** | ||||
|  - **livestream** | ||||
|  - **livestream:original** | ||||
| @@ -285,7 +287,7 @@ | ||||
|  - **Minhateca** | ||||
|  - **MinistryGrid** | ||||
|  - **miomio.tv** | ||||
|  - **mitele.es** | ||||
|  - **MiTele**: mitele.es | ||||
|  - **mixcloud** | ||||
|  - **MLB** | ||||
|  - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net | ||||
| @@ -303,11 +305,11 @@ | ||||
|  - **MPORA** | ||||
|  - **MSNBC** | ||||
|  - **MTV** | ||||
|  - **mtv.de** | ||||
|  - **mtviggy.com** | ||||
|  - **mtvservices:embedded** | ||||
|  - **MuenchenTV**: münchen.tv | ||||
|  - **MusicPlayOn** | ||||
|  - **MusicVault** | ||||
|  - **muzu.tv** | ||||
|  - **Mwave** | ||||
|  - **MySpace** | ||||
| @@ -316,7 +318,6 @@ | ||||
|  - **Myvi** | ||||
|  - **myvideo** | ||||
|  - **MyVidster** | ||||
|  - **N-JOY** | ||||
|  - **n-tv.de** | ||||
|  - **NationalGeographic** | ||||
|  - **Naver** | ||||
| @@ -325,7 +326,9 @@ | ||||
|  - **NBCNews** | ||||
|  - **NBCSports** | ||||
|  - **NBCSportsVPlayer** | ||||
|  - **ndr**: NDR.de - Mediathek | ||||
|  - **ndr**: NDR.de - Norddeutscher Rundfunk | ||||
|  - **ndr:embed** | ||||
|  - **ndr:embed:base** | ||||
|  - **NDTV** | ||||
|  - **NerdCubedFeed** | ||||
|  - **Nerdist** | ||||
| @@ -348,12 +351,16 @@ | ||||
|  - **nhl.com:videocenter**: NHL videocenter category | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
|  - **njoy**: N-JOY | ||||
|  - **njoy:embed** | ||||
|  - **Noco** | ||||
|  - **Normalboots** | ||||
|  - **NosVideo** | ||||
|  - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz | ||||
|  - **novamov**: NovaMov | ||||
|  - **Nowness** | ||||
|  - **nowness** | ||||
|  - **nowness:playlist** | ||||
|  - **nowness:series** | ||||
|  - **NowTV** | ||||
|  - **nowvideo**: NowVideo | ||||
|  - **npo**: npo.nl and ntr.nl | ||||
| @@ -374,7 +381,6 @@ | ||||
|  - **OnionStudios** | ||||
|  - **Ooyala** | ||||
|  - **OoyalaExternal** | ||||
|  - **OpenFilm** | ||||
|  - **orf:fm4**: radio FM4 | ||||
|  - **orf:iptv**: iptv.ORF.at | ||||
|  - **orf:oe1**: Radio Österreich 1 | ||||
| @@ -465,7 +471,7 @@ | ||||
|  - **Sexu** | ||||
|  - **SexyKarma**: Sexy Karma and Watch Indian Porn | ||||
|  - **Shahid** | ||||
|  - **Shared** | ||||
|  - **Shared**: shared.sx and vivo.sx | ||||
|  - **ShareSix** | ||||
|  - **Sina** | ||||
|  - **Slideshare** | ||||
| @@ -529,7 +535,7 @@ | ||||
|  - **techtv.mit.edu** | ||||
|  - **ted** | ||||
|  - **TeleBruxelles** | ||||
|  - **telecinco.es** | ||||
|  - **Telecinco**: telecinco.es, cuatro.com and mediaset.es | ||||
|  - **Telegraaf** | ||||
|  - **TeleMB** | ||||
|  - **TeleTask** | ||||
| @@ -631,6 +637,7 @@ | ||||
|  - **vine:user** | ||||
|  - **vk**: VK | ||||
|  - **vk:uservideos**: VK - User's Videos | ||||
|  - **vlive** | ||||
|  - **Vodlocker** | ||||
|  - **VoiceRepublic** | ||||
|  - **Vporn** | ||||
|   | ||||
							
								
								
									
										129
									
								
								test/helper.py
									
									
									
									
									
								
							
							
						
						
									
										129
									
								
								test/helper.py
									
									
									
									
									
								
							| @@ -89,66 +89,81 @@ def gettestcases(include_onlymatching=False): | ||||
| md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
|  | ||||
| def expect_info_dict(self, got_dict, expected_dict): | ||||
| def expect_value(self, got, expected, field): | ||||
|     if isinstance(expected, compat_str) and expected.startswith('re:'): | ||||
|         match_str = expected[len('re:'):] | ||||
|         match_rex = re.compile(match_str) | ||||
|  | ||||
|         self.assertTrue( | ||||
|             isinstance(got, compat_str), | ||||
|             'Expected a %s object, but got %s for field %s' % ( | ||||
|                 compat_str.__name__, type(got).__name__, field)) | ||||
|         self.assertTrue( | ||||
|             match_rex.match(got), | ||||
|             'field %s (value: %r) should match %r' % (field, got, match_str)) | ||||
|     elif isinstance(expected, compat_str) and expected.startswith('startswith:'): | ||||
|         start_str = expected[len('startswith:'):] | ||||
|         self.assertTrue( | ||||
|             isinstance(got, compat_str), | ||||
|             'Expected a %s object, but got %s for field %s' % ( | ||||
|                 compat_str.__name__, type(got).__name__, field)) | ||||
|         self.assertTrue( | ||||
|             got.startswith(start_str), | ||||
|             'field %s (value: %r) should start with %r' % (field, got, start_str)) | ||||
|     elif isinstance(expected, compat_str) and expected.startswith('contains:'): | ||||
|         contains_str = expected[len('contains:'):] | ||||
|         self.assertTrue( | ||||
|             isinstance(got, compat_str), | ||||
|             'Expected a %s object, but got %s for field %s' % ( | ||||
|                 compat_str.__name__, type(got).__name__, field)) | ||||
|         self.assertTrue( | ||||
|             contains_str in got, | ||||
|             'field %s (value: %r) should contain %r' % (field, got, contains_str)) | ||||
|     elif isinstance(expected, type): | ||||
|         self.assertTrue( | ||||
|             isinstance(got, expected), | ||||
|             'Expected type %r for field %s, but got value %r of type %r' % (expected, field, got, type(got))) | ||||
|     elif isinstance(expected, dict) and isinstance(got, dict): | ||||
|         expect_dict(self, got, expected) | ||||
|     elif isinstance(expected, list) and isinstance(got, list): | ||||
|         self.assertEqual( | ||||
|             len(expected), len(got), | ||||
|             'Expect a list of length %d, but got a list of length %d for field %s' % ( | ||||
|                 len(expected), len(got), field)) | ||||
|         for index, (item_got, item_expected) in enumerate(zip(got, expected)): | ||||
|             type_got = type(item_got) | ||||
|             type_expected = type(item_expected) | ||||
|             self.assertEqual( | ||||
|                 type_expected, type_got, | ||||
|                 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( | ||||
|                     index, field, type_expected, type_got)) | ||||
|             expect_value(self, item_got, item_expected, field) | ||||
|     else: | ||||
|         if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|             got = 'md5:' + md5(got) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('mincount:'): | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, (list, dict)), | ||||
|                 'Expected field %s to be a list or a dict, but it is of type %s' % ( | ||||
|                     field, type(got).__name__)) | ||||
|             expected_num = int(expected.partition(':')[2]) | ||||
|             assertGreaterEqual( | ||||
|                 self, len(got), expected_num, | ||||
|                 'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got))) | ||||
|             return | ||||
|         self.assertEqual( | ||||
|             expected, got, | ||||
|             'Invalid value for field %s, expected %r, got %r' % (field, expected, got)) | ||||
|  | ||||
|  | ||||
| def expect_dict(self, got_dict, expected_dict): | ||||
|     for info_field, expected in expected_dict.items(): | ||||
|         if isinstance(expected, compat_str) and expected.startswith('re:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             match_str = expected[len('re:'):] | ||||
|             match_rex = re.compile(match_str) | ||||
|         got = got_dict.get(info_field) | ||||
|         expect_value(self, got, expected, info_field) | ||||
|  | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 match_rex.match(got), | ||||
|                 'field %s (value: %r) should match %r' % (info_field, got, match_str)) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('startswith:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             start_str = expected[len('startswith:'):] | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 got.startswith(start_str), | ||||
|                 'field %s (value: %r) should start with %r' % (info_field, got, start_str)) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('contains:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             contains_str = expected[len('contains:'):] | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 contains_str in got, | ||||
|                 'field %s (value: %r) should contain %r' % (info_field, got, contains_str)) | ||||
|         elif isinstance(expected, type): | ||||
|             got = got_dict.get(info_field) | ||||
|             self.assertTrue(isinstance(got, expected), | ||||
|                             'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got))) | ||||
|         else: | ||||
|             if isinstance(expected, compat_str) and expected.startswith('md5:'): | ||||
|                 got = 'md5:' + md5(got_dict.get(info_field)) | ||||
|             elif isinstance(expected, compat_str) and expected.startswith('mincount:'): | ||||
|                 got = got_dict.get(info_field) | ||||
|                 self.assertTrue( | ||||
|                     isinstance(got, (list, dict)), | ||||
|                     'Expected field %s to be a list or a dict, but it is of type %s' % ( | ||||
|                         info_field, type(got).__name__)) | ||||
|                 expected_num = int(expected.partition(':')[2]) | ||||
|                 assertGreaterEqual( | ||||
|                     self, len(got), expected_num, | ||||
|                     'Expected %d items in field %s, but only got %d' % ( | ||||
|                         expected_num, info_field, len(got) | ||||
|                     ) | ||||
|                 ) | ||||
|                 continue | ||||
|             else: | ||||
|                 got = got_dict.get(info_field) | ||||
|             self.assertEqual(expected, got, | ||||
|                              'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) | ||||
|  | ||||
| def expect_info_dict(self, got_dict, expected_dict): | ||||
|     expect_dict(self, got_dict, expected_dict) | ||||
|     # Check for the presence of mandatory fields | ||||
|     if got_dict.get('_type') not in ('playlist', 'multi_video'): | ||||
|         for key in ('id', 'url', 'title', 'ext'): | ||||
|   | ||||
| @@ -14,6 +14,7 @@ from youtube_dl.utils import get_filesystem_encoding | ||||
| from youtube_dl.compat import ( | ||||
|     compat_getenv, | ||||
|     compat_expanduser, | ||||
|     compat_shlex_split, | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urllib_parse_unquote_plus, | ||||
| ) | ||||
| @@ -67,5 +68,8 @@ class TestCompat(unittest.TestCase): | ||||
|         self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def') | ||||
|         self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def') | ||||
|  | ||||
|     def test_compat_shlex_split(self): | ||||
|         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -57,11 +57,16 @@ from youtube_dl.utils import ( | ||||
|     urlencode_postdata, | ||||
|     version_tuple, | ||||
|     xpath_with_ns, | ||||
|     xpath_element, | ||||
|     xpath_text, | ||||
|     xpath_attr, | ||||
|     render_table, | ||||
|     match_str, | ||||
|     parse_dfxp_time_expr, | ||||
|     dfxp2srt, | ||||
|     cli_option, | ||||
|     cli_valueless_option, | ||||
|     cli_bool_option, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -264,6 +269,16 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(find('media:song/media:author').text, 'The Author') | ||||
|         self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') | ||||
|  | ||||
|     def test_xpath_element(self): | ||||
|         doc = xml.etree.ElementTree.Element('root') | ||||
|         div = xml.etree.ElementTree.SubElement(doc, 'div') | ||||
|         p = xml.etree.ElementTree.SubElement(div, 'p') | ||||
|         p.text = 'Foo' | ||||
|         self.assertEqual(xpath_element(doc, 'div/p'), p) | ||||
|         self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default') | ||||
|         self.assertTrue(xpath_element(doc, 'div/bar') is None) | ||||
|         self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True) | ||||
|  | ||||
|     def test_xpath_text(self): | ||||
|         testxml = '''<root> | ||||
|             <div> | ||||
| @@ -272,9 +287,25 @@ class TestUtil(unittest.TestCase): | ||||
|         </root>''' | ||||
|         doc = xml.etree.ElementTree.fromstring(testxml) | ||||
|         self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') | ||||
|         self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default') | ||||
|         self.assertTrue(xpath_text(doc, 'div/bar') is None) | ||||
|         self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True) | ||||
|  | ||||
|     def test_xpath_attr(self): | ||||
|         testxml = '''<root> | ||||
|             <div> | ||||
|                 <p x="a">Foo</p> | ||||
|             </div> | ||||
|         </root>''' | ||||
|         doc = xml.etree.ElementTree.fromstring(testxml) | ||||
|         self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a') | ||||
|         self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None) | ||||
|         self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None) | ||||
|         self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default') | ||||
|         self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default') | ||||
|         self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True) | ||||
|         self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True) | ||||
|  | ||||
|     def test_smuggle_url(self): | ||||
|         data = {"ö": "ö", "abc": [3]} | ||||
|         url = 'https://foo.bar/baz?x=y#a' | ||||
| @@ -646,6 +677,51 @@ The first line | ||||
| ''' | ||||
|         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) | ||||
|  | ||||
|     def test_cli_option(self): | ||||
|         self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) | ||||
|         self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) | ||||
|         self.assertEqual(cli_option({}, '--proxy', 'proxy'), []) | ||||
|  | ||||
|     def test_cli_valueless_option(self): | ||||
|         self.assertEqual(cli_valueless_option( | ||||
|             {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader']) | ||||
|         self.assertEqual(cli_valueless_option( | ||||
|             {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), []) | ||||
|         self.assertEqual(cli_valueless_option( | ||||
|             {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate']) | ||||
|         self.assertEqual(cli_valueless_option( | ||||
|             {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), []) | ||||
|         self.assertEqual(cli_valueless_option( | ||||
|             {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), []) | ||||
|         self.assertEqual(cli_valueless_option( | ||||
|             {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate']) | ||||
|  | ||||
|     def test_cli_bool_option(self): | ||||
|         self.assertEqual( | ||||
|             cli_bool_option( | ||||
|                 {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), | ||||
|             ['--no-check-certificate', 'true']) | ||||
|         self.assertEqual( | ||||
|             cli_bool_option( | ||||
|                 {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='), | ||||
|             ['--no-check-certificate=true']) | ||||
|         self.assertEqual( | ||||
|             cli_bool_option( | ||||
|                 {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), | ||||
|             ['--check-certificate', 'false']) | ||||
|         self.assertEqual( | ||||
|             cli_bool_option( | ||||
|                 {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), | ||||
|             ['--check-certificate=false']) | ||||
|         self.assertEqual( | ||||
|             cli_bool_option( | ||||
|                 {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), | ||||
|             ['--check-certificate', 'true']) | ||||
|         self.assertEqual( | ||||
|             cli_bool_option( | ||||
|                 {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), | ||||
|             ['--check-certificate=true']) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -33,7 +33,7 @@ params = get_params({ | ||||
|  | ||||
|  | ||||
| TEST_ID = 'gr51aVj-mLg' | ||||
| ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml' | ||||
| ANNOTATIONS_FILE = TEST_ID + '.annotations.xml' | ||||
| EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										2
									
								
								tox.ini
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								tox.ini
									
									
									
									
									
								
							| @@ -1,5 +1,5 @@ | ||||
| [tox] | ||||
| envlist = py26,py27,py33,py34 | ||||
| envlist = py26,py27,py33,py34,py35 | ||||
| [testenv] | ||||
| deps = | ||||
|    nose | ||||
|   | ||||
| @@ -69,6 +69,7 @@ from .utils import ( | ||||
|     version_tuple, | ||||
|     write_json_file, | ||||
|     write_string, | ||||
|     YoutubeDLCookieProcessor, | ||||
|     YoutubeDLHandler, | ||||
|     prepend_extension, | ||||
|     replace_extension, | ||||
| @@ -284,7 +285,11 @@ class YoutubeDL(object): | ||||
|         self._num_downloads = 0 | ||||
|         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] | ||||
|         self._err_file = sys.stderr | ||||
|         self.params = params | ||||
|         self.params = { | ||||
|             # Default parameters | ||||
|             'nocheckcertificate': False, | ||||
|         } | ||||
|         self.params.update(params) | ||||
|         self.cache = Cache(self) | ||||
|  | ||||
|         if params.get('bidi_workaround', False): | ||||
| @@ -1227,13 +1232,20 @@ class YoutubeDL(object): | ||||
|             except (ValueError, OverflowError, OSError): | ||||
|                 pass | ||||
|  | ||||
|         subtitles = info_dict.get('subtitles') | ||||
|         if subtitles: | ||||
|             for _, subtitle in subtitles.items(): | ||||
|                 for subtitle_format in subtitle: | ||||
|                     if 'ext' not in subtitle_format: | ||||
|                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() | ||||
|  | ||||
|         if self.params.get('listsubtitles', False): | ||||
|             if 'automatic_captions' in info_dict: | ||||
|                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') | ||||
|             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') | ||||
|             self.list_subtitles(info_dict['id'], subtitles, 'subtitles') | ||||
|             return | ||||
|         info_dict['requested_subtitles'] = self.process_subtitles( | ||||
|             info_dict['id'], info_dict.get('subtitles'), | ||||
|             info_dict['id'], subtitles, | ||||
|             info_dict.get('automatic_captions')) | ||||
|  | ||||
|         # We now pick which formats have to be downloaded | ||||
| @@ -1939,8 +1951,7 @@ class YoutubeDL(object): | ||||
|             if os.access(opts_cookiefile, os.R_OK): | ||||
|                 self.cookiejar.load() | ||||
|  | ||||
|         cookie_processor = compat_urllib_request.HTTPCookieProcessor( | ||||
|             self.cookiejar) | ||||
|         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) | ||||
|         if opts_proxy is not None: | ||||
|             if opts_proxy == '': | ||||
|                 proxies = {} | ||||
| @@ -2009,7 +2020,7 @@ class YoutubeDL(object): | ||||
|                                (info_dict['extractor'], info_dict['id'], thumb_display_id)) | ||||
|                 try: | ||||
|                     uf = self.urlopen(t['url']) | ||||
|                     with open(thumb_filename, 'wb') as thumbf: | ||||
|                     with open(encodeFilename(thumb_filename), 'wb') as thumbf: | ||||
|                         shutil.copyfileobj(uf, thumbf) | ||||
|                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % | ||||
|                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) | ||||
|   | ||||
| @@ -9,7 +9,6 @@ import codecs | ||||
| import io | ||||
| import os | ||||
| import random | ||||
| import shlex | ||||
| import sys | ||||
|  | ||||
|  | ||||
| @@ -20,6 +19,7 @@ from .compat import ( | ||||
|     compat_expanduser, | ||||
|     compat_getpass, | ||||
|     compat_print, | ||||
|     compat_shlex_split, | ||||
|     workaround_optparse_bug9161, | ||||
| ) | ||||
| from .utils import ( | ||||
| @@ -262,10 +262,10 @@ def _real_main(argv=None): | ||||
|             parser.error('setting filesize xattr requested but python-xattr is not available') | ||||
|     external_downloader_args = None | ||||
|     if opts.external_downloader_args: | ||||
|         external_downloader_args = shlex.split(opts.external_downloader_args) | ||||
|         external_downloader_args = compat_shlex_split(opts.external_downloader_args) | ||||
|     postprocessor_args = None | ||||
|     if opts.postprocessor_args: | ||||
|         postprocessor_args = shlex.split(opts.postprocessor_args) | ||||
|         postprocessor_args = compat_shlex_split(opts.postprocessor_args) | ||||
|     match_filter = ( | ||||
|         None if opts.match_filter is None | ||||
|         else match_filter_func(opts.match_filter)) | ||||
|   | ||||
| @@ -11,7 +11,7 @@ if __package__ is None and not hasattr(sys, "frozen"): | ||||
|     # direct call of __main__.py | ||||
|     import os.path | ||||
|     path = os.path.realpath(os.path.abspath(__file__)) | ||||
|     sys.path.append(os.path.dirname(os.path.dirname(path))) | ||||
|     sys.path.insert(0, os.path.dirname(os.path.dirname(path))) | ||||
|  | ||||
| import youtube_dl | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import getpass | ||||
| import optparse | ||||
| import os | ||||
| import re | ||||
| import shlex | ||||
| import shutil | ||||
| import socket | ||||
| import subprocess | ||||
| @@ -79,6 +80,11 @@ try: | ||||
| except ImportError: | ||||
|     import BaseHTTPServer as compat_http_server | ||||
|  | ||||
| try: | ||||
|     compat_str = unicode  # Python 2 | ||||
| except NameError: | ||||
|     compat_str = str | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes | ||||
|     from urllib.parse import unquote as compat_urllib_parse_unquote | ||||
| @@ -99,7 +105,7 @@ except ImportError:  # Python 2 | ||||
|             # Is it a string-like object? | ||||
|             string.split | ||||
|             return b'' | ||||
|         if isinstance(string, unicode): | ||||
|         if isinstance(string, compat_str): | ||||
|             string = string.encode('utf-8') | ||||
|         bits = string.split(b'%') | ||||
|         if len(bits) == 1: | ||||
| @@ -149,11 +155,6 @@ except ImportError:  # Python 2 | ||||
|         string = string.replace('+', ' ') | ||||
|         return compat_urllib_parse_unquote(string, encoding, errors) | ||||
|  | ||||
| try: | ||||
|     compat_str = unicode  # Python 2 | ||||
| except NameError: | ||||
|     compat_str = str | ||||
|  | ||||
| try: | ||||
|     compat_basestring = basestring  # Python 2 | ||||
| except NameError: | ||||
| @@ -227,6 +228,17 @@ except ImportError:  # Python < 3.3 | ||||
|             return "'" + s.replace("'", "'\"'\"'") + "'" | ||||
|  | ||||
|  | ||||
| if sys.version_info >= (2, 7, 3): | ||||
|     compat_shlex_split = shlex.split | ||||
| else: | ||||
|     # Working around shlex issue with unicode strings on some python 2 | ||||
|     # versions (see http://bugs.python.org/issue1548891) | ||||
|     def compat_shlex_split(s, comments=False, posix=True): | ||||
|         if isinstance(s, compat_str): | ||||
|             s = s.encode('utf-8') | ||||
|         return shlex.split(s, comments, posix) | ||||
|  | ||||
|  | ||||
| def compat_ord(c): | ||||
|     if type(c) is int: | ||||
|         return c | ||||
| @@ -404,26 +416,32 @@ if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3 | ||||
| else: | ||||
|     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) | ||||
|  | ||||
|     def compat_get_terminal_size(): | ||||
|         columns = compat_getenv('COLUMNS', None) | ||||
|     def compat_get_terminal_size(fallback=(80, 24)): | ||||
|         columns = compat_getenv('COLUMNS') | ||||
|         if columns: | ||||
|             columns = int(columns) | ||||
|         else: | ||||
|             columns = None | ||||
|         lines = compat_getenv('LINES', None) | ||||
|         lines = compat_getenv('LINES') | ||||
|         if lines: | ||||
|             lines = int(lines) | ||||
|         else: | ||||
|             lines = None | ||||
|  | ||||
|         try: | ||||
|             sp = subprocess.Popen( | ||||
|                 ['stty', 'size'], | ||||
|                 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|             out, err = sp.communicate() | ||||
|             lines, columns = map(int, out.split()) | ||||
|         except Exception: | ||||
|             pass | ||||
|         if columns is None or lines is None or columns <= 0 or lines <= 0: | ||||
|             try: | ||||
|                 sp = subprocess.Popen( | ||||
|                     ['stty', 'size'], | ||||
|                     stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|                 out, err = sp.communicate() | ||||
|                 _columns, _lines = map(int, out.split()) | ||||
|             except Exception: | ||||
|                 _columns, _lines = _terminal_size(*fallback) | ||||
|  | ||||
|             if columns is None or columns <= 0: | ||||
|                 columns = _columns | ||||
|             if lines is None or lines <= 0: | ||||
|                 lines = _lines | ||||
|         return _terminal_size(columns, lines) | ||||
|  | ||||
| try: | ||||
| @@ -459,6 +477,7 @@ __all__ = [ | ||||
|     'compat_ord', | ||||
|     'compat_parse_qs', | ||||
|     'compat_print', | ||||
|     'compat_shlex_split', | ||||
|     'compat_socket_create_connection', | ||||
|     'compat_str', | ||||
|     'compat_subprocess_get_DEVNULL', | ||||
|   | ||||
| @@ -5,6 +5,10 @@ import subprocess | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..utils import ( | ||||
|     cli_option, | ||||
|     cli_valueless_option, | ||||
|     cli_bool_option, | ||||
|     cli_configuration_args, | ||||
|     encodeFilename, | ||||
|     encodeArgument, | ||||
| ) | ||||
| @@ -46,19 +50,16 @@ class ExternalFD(FileDownloader): | ||||
|         return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') | ||||
|  | ||||
|     def _option(self, command_option, param): | ||||
|         param = self.params.get(param) | ||||
|         if param is None: | ||||
|             return [] | ||||
|         if isinstance(param, bool): | ||||
|             return [command_option] | ||||
|         return [command_option, param] | ||||
|         return cli_option(self.params, command_option, param) | ||||
|  | ||||
|     def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): | ||||
|         return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) | ||||
|  | ||||
|     def _valueless_option(self, command_option, param, expected_value=True): | ||||
|         return cli_valueless_option(self.params, command_option, param, expected_value) | ||||
|  | ||||
|     def _configuration_args(self, default=[]): | ||||
|         ex_args = self.params.get('external_downloader_args') | ||||
|         if ex_args is None: | ||||
|             return default | ||||
|         assert isinstance(ex_args, list) | ||||
|         return ex_args | ||||
|         return cli_configuration_args(self.params, 'external_downloader_args', default) | ||||
|  | ||||
|     def _call_downloader(self, tmpfilename, info_dict): | ||||
|         """ Either overwrite this or implement _make_cmd """ | ||||
| @@ -80,6 +81,8 @@ class CurlFD(ExternalFD): | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._option('--interface', 'source_address') | ||||
|         cmd += self._option('--proxy', 'proxy') | ||||
|         cmd += self._valueless_option('--insecure', 'nocheckcertificate') | ||||
|         cmd += self._configuration_args() | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
| @@ -102,7 +105,7 @@ class WgetFD(ExternalFD): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._option('--bind-address', 'source_address') | ||||
|         cmd += self._option('--proxy', 'proxy') | ||||
|         cmd += self._option('--no-check-certificate', 'nocheckcertificate') | ||||
|         cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') | ||||
|         cmd += self._configuration_args() | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
| @@ -121,6 +124,7 @@ class Aria2cFD(ExternalFD): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._option('--interface', 'source_address') | ||||
|         cmd += self._option('--all-proxy', 'proxy') | ||||
|         cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|   | ||||
| @@ -13,6 +13,8 @@ from ..compat import ( | ||||
|     compat_urllib_error, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     struct_pack, | ||||
|     struct_unpack, | ||||
|     xpath_text, | ||||
| @@ -343,18 +345,19 @@ class F4mFD(FragmentFD): | ||||
|                 success = ctx['dl'].download(frag_filename, {'url': url}) | ||||
|                 if not success: | ||||
|                     return False | ||||
|                 with open(frag_filename, 'rb') as down: | ||||
|                     down_data = down.read() | ||||
|                     reader = FlvReader(down_data) | ||||
|                     while True: | ||||
|                         _, box_type, box_data = reader.read_box_info() | ||||
|                         if box_type == b'mdat': | ||||
|                             dest_stream.write(box_data) | ||||
|                             break | ||||
|                 (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') | ||||
|                 down_data = down.read() | ||||
|                 down.close() | ||||
|                 reader = FlvReader(down_data) | ||||
|                 while True: | ||||
|                     _, box_type, box_data = reader.read_box_info() | ||||
|                     if box_type == b'mdat': | ||||
|                         dest_stream.write(box_data) | ||||
|                         break | ||||
|                 if live: | ||||
|                     os.remove(frag_filename) | ||||
|                     os.remove(encodeFilename(frag_sanitized)) | ||||
|                 else: | ||||
|                     frags_filenames.append(frag_filename) | ||||
|                     frags_filenames.append(frag_sanitized) | ||||
|             except (compat_urllib_error.HTTPError, ) as err: | ||||
|                 if live and (err.code == 404 or err.code == 410): | ||||
|                     # We didn't keep up with the live window. Continue | ||||
| @@ -375,6 +378,6 @@ class F4mFD(FragmentFD): | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
|             os.remove(encodeFilename(frag_file)) | ||||
|  | ||||
|         return True | ||||
|   | ||||
| @@ -12,6 +12,7 @@ from ..postprocessor.ffmpeg import FFmpegPostProcessor | ||||
| from ..utils import ( | ||||
|     encodeArgument, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -27,10 +28,19 @@ class HlsFD(FileDownloader): | ||||
|             return False | ||||
|         ffpp.check_version() | ||||
|  | ||||
|         args = [ | ||||
|             encodeArgument(opt) | ||||
|             for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] | ||||
|         args.append(encodeFilename(tmpfilename, True)) | ||||
|         args = [ffpp.executable, '-y'] | ||||
|  | ||||
|         if info_dict['http_headers']: | ||||
|             # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: | ||||
|             # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. | ||||
|             args += [ | ||||
|                 '-headers', | ||||
|                 ''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items())] | ||||
|  | ||||
|         args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc'] | ||||
|  | ||||
|         args = [encodeArgument(opt) for opt in args] | ||||
|         args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) | ||||
|  | ||||
|         self._debug_cmd(args) | ||||
|  | ||||
| @@ -89,13 +99,14 @@ class NativeHlsFD(FragmentFD): | ||||
|             success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|             if not success: | ||||
|                 return False | ||||
|             with open(frag_filename, 'rb') as down: | ||||
|                 ctx['dest_stream'].write(down.read()) | ||||
|             frags_filenames.append(frag_filename) | ||||
|             down, frag_sanitized = sanitize_open(frag_filename, 'rb') | ||||
|             ctx['dest_stream'].write(down.read()) | ||||
|             down.close() | ||||
|             frags_filenames.append(frag_sanitized) | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
|             os.remove(encodeFilename(frag_file)) | ||||
|  | ||||
|         return True | ||||
|   | ||||
| @@ -138,7 +138,6 @@ from .dump import DumpIE | ||||
| from .dumpert import DumpertIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| from .discovery import DiscoveryIE | ||||
| from .divxstage import DivxStageIE | ||||
| from .dropbox import DropboxIE | ||||
| from .eagleplatform import EaglePlatformIE | ||||
| from .ebaumsworld import EbaumsWorldIE | ||||
| @@ -159,6 +158,7 @@ from .eroprofile import EroProfileIE | ||||
| from .escapist import EscapistIE | ||||
| from .espn import ESPNIE | ||||
| from .esri import EsriVideoIE | ||||
| from .europa import EuropaIE | ||||
| from .everyonesmixtape import EveryonesMixtapeIE | ||||
| from .exfm import ExfmIE | ||||
| from .expotv import ExpoTVIE | ||||
| @@ -170,10 +170,7 @@ from .firstpost import FirstpostIE | ||||
| from .firsttv import FirstTVIE | ||||
| from .fivemin import FiveMinIE | ||||
| from .fivetv import FiveTVIE | ||||
| from .fktv import ( | ||||
|     FKTVIE, | ||||
|     FKTVPosteckeIE, | ||||
| ) | ||||
| from .fktv import FKTVIE | ||||
| from .flickr import FlickrIE | ||||
| from .folketinget import FolketingetIE | ||||
| from .footyroom import FootyRoomIE | ||||
| @@ -229,7 +226,6 @@ from .historicfilms import HistoricFilmsIE | ||||
| from .history import HistoryIE | ||||
| from .hitbox import HitboxIE, HitboxLiveIE | ||||
| from .hornbunny import HornBunnyIE | ||||
| from .hostingbulk import HostingBulkIE | ||||
| from .hotnewhiphop import HotNewHipHopIE | ||||
| from .howcast import HowcastIE | ||||
| from .howstuffworks import HowStuffWorksIE | ||||
| @@ -241,7 +237,10 @@ from .imdb import ( | ||||
|     ImdbIE, | ||||
|     ImdbListIE | ||||
| ) | ||||
| from .imgur import ImgurIE | ||||
| from .imgur import ( | ||||
|     ImgurIE, | ||||
|     ImgurAlbumIE, | ||||
| ) | ||||
| from .ina import InaIE | ||||
| from .indavideo import ( | ||||
|     IndavideoIE, | ||||
| @@ -296,6 +295,11 @@ from .lifenews import ( | ||||
|     LifeNewsIE, | ||||
|     LifeEmbedIE, | ||||
| ) | ||||
| from .limelight import ( | ||||
|     LimelightMediaIE, | ||||
|     LimelightChannelIE, | ||||
|     LimelightChannelListIE, | ||||
| ) | ||||
| from .liveleak import LiveLeakIE | ||||
| from .livestream import ( | ||||
|     LivestreamIE, | ||||
| @@ -340,10 +344,10 @@ from .mtv import ( | ||||
|     MTVIE, | ||||
|     MTVServicesEmbeddedIE, | ||||
|     MTVIggyIE, | ||||
|     MTVDEIE, | ||||
| ) | ||||
| from .muenchentv import MuenchenTVIE | ||||
| from .musicplayon import MusicPlayOnIE | ||||
| from .musicvault import MusicVaultIE | ||||
| from .muzu import MuzuTVIE | ||||
| from .mwave import MwaveIE | ||||
| from .myspace import MySpaceIE, MySpaceAlbumIE | ||||
| @@ -364,6 +368,9 @@ from .nbc import ( | ||||
| from .ndr import ( | ||||
|     NDRIE, | ||||
|     NJoyIE, | ||||
|     NDREmbedBaseIE, | ||||
|     NDREmbedIE, | ||||
|     NJoyEmbedIE, | ||||
| ) | ||||
| from .ndtv import NDTVIE | ||||
| from .netzkino import NetzkinoIE | ||||
| @@ -399,7 +406,11 @@ from .normalboots import NormalbootsIE | ||||
| from .nosvideo import NosVideoIE | ||||
| from .nova import NovaIE | ||||
| from .novamov import NovaMovIE | ||||
| from .nowness import NownessIE | ||||
| from .nowness import ( | ||||
|     NownessIE, | ||||
|     NownessPlaylistIE, | ||||
|     NownessSeriesIE, | ||||
| ) | ||||
| from .nowtv import NowTVIE | ||||
| from .nowvideo import NowVideoIE | ||||
| from .npo import ( | ||||
| @@ -429,7 +440,6 @@ from .ooyala import ( | ||||
|     OoyalaIE, | ||||
|     OoyalaExternalIE, | ||||
| ) | ||||
| from .openfilm import OpenFilmIE | ||||
| from .orf import ( | ||||
|     ORFTVthekIE, | ||||
|     ORFOE1IE, | ||||
| @@ -739,6 +749,7 @@ from .vk import ( | ||||
|     VKIE, | ||||
|     VKUserVideosIE, | ||||
| ) | ||||
| from .vlive import VLiveIE | ||||
| from .vodlocker import VodlockerIE | ||||
| from .voicerepublic import VoiceRepublicIE | ||||
| from .vporn import VpornIE | ||||
|   | ||||
| @@ -1,16 +1,20 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     js_to_json, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ABCIE(InfoExtractor): | ||||
|     IE_NAME = 'abc.net.au' | ||||
|     _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', | ||||
|         'md5': 'cb3dd03b18455a661071ee1e28344d9f', | ||||
|         'info_dict': { | ||||
| @@ -19,22 +23,47 @@ class ABCIE(InfoExtractor): | ||||
|             'title': 'Australia to help staff Ebola treatment centre in Sierra Leone', | ||||
|             'description': 'md5:809ad29c67a05f54eb41f2a105693a67', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326', | ||||
|         'md5': 'db2a5369238b51f9811ad815b69dc086', | ||||
|         'info_dict': { | ||||
|             'id': 'NvqvPeNZsHU', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20150816', | ||||
|             'uploader': 'ABC News (Australia)', | ||||
|             'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef', | ||||
|             'uploader_id': 'NewsOnABC', | ||||
|             'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         urls_info_json = self._search_regex( | ||||
|             r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls', | ||||
|             flags=re.DOTALL) | ||||
|         urls_info = json.loads(urls_info_json.replace('\'', '"')) | ||||
|         mobj = re.search( | ||||
|             r'inline(?P<type>Video|YouTube)Data\.push\((?P<json_data>[^)]+)\);', | ||||
|             webpage) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError('Unable to extract video urls') | ||||
|  | ||||
|         urls_info = self._parse_json( | ||||
|             mobj.group('json_data'), video_id, transform_source=js_to_json) | ||||
|  | ||||
|         if not isinstance(urls_info, list): | ||||
|             urls_info = [urls_info] | ||||
|  | ||||
|         if mobj.group('type') == 'YouTube': | ||||
|             return self.playlist_result([ | ||||
|                 self.url_result(url_info['url']) for url_info in urls_info]) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': url_info['url'], | ||||
|             'width': int(url_info['width']), | ||||
|             'height': int(url_info['height']), | ||||
|             'tbr': int(url_info['bitrate']), | ||||
|             'filesize': int(url_info['filesize']), | ||||
|             'width': int_or_none(url_info.get('width')), | ||||
|             'height': int_or_none(url_info.get('height')), | ||||
|             'tbr': int_or_none(url_info.get('bitrate')), | ||||
|             'filesize': int_or_none(url_info.get('filesize')), | ||||
|         } for url_info in urls_info] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,7 @@ class AcademicEarthCourseIE(InfoExtractor): | ||||
|             'title': 'Laws of Nature', | ||||
|             'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', | ||||
|         }, | ||||
|         'playlist_count': 4, | ||||
|         'playlist_count': 3, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     xpath_text, | ||||
| @@ -123,7 +124,6 @@ class AdultSwimIE(InfoExtractor): | ||||
|         else: | ||||
|             collections = bootstrapped_data['show']['collections'] | ||||
|             collection, video_info = self.find_collection_containing_video(collections, episode_path) | ||||
|  | ||||
|             # Video wasn't found in the collections, let's try `slugged_video`. | ||||
|             if video_info is None: | ||||
|                 if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: | ||||
| @@ -133,7 +133,9 @@ class AdultSwimIE(InfoExtractor): | ||||
|  | ||||
|             show = bootstrapped_data['show'] | ||||
|             show_title = show['title'] | ||||
|             segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] | ||||
|             stream = video_info.get('stream') | ||||
|             clips = [stream] if stream else video_info['clips'] | ||||
|             segment_ids = [clip['videoPlaybackID'] for clip in clips] | ||||
|  | ||||
|         episode_id = video_info['id'] | ||||
|         episode_title = video_info['title'] | ||||
| @@ -142,7 +144,7 @@ class AdultSwimIE(InfoExtractor): | ||||
|  | ||||
|         entries = [] | ||||
|         for part_num, segment_id in enumerate(segment_ids): | ||||
|             segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id | ||||
|             segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id | ||||
|  | ||||
|             segment_title = '%s - %s' % (show_title, episode_title) | ||||
|             if len(segment_ids) > 1: | ||||
| @@ -156,19 +158,32 @@ class AdultSwimIE(InfoExtractor): | ||||
|                 xpath_text(idoc, './/trt', 'segment duration').strip()) | ||||
|  | ||||
|             formats = [] | ||||
|             file_els = idoc.findall('.//files/file') | ||||
|             file_els = idoc.findall('.//files/file') or idoc.findall('./files/file') | ||||
|  | ||||
|             unique_urls = [] | ||||
|             unique_file_els = [] | ||||
|             for file_el in file_els: | ||||
|                 media_url = file_el.text | ||||
|                 if not media_url or determine_ext(media_url) == 'f4m': | ||||
|                     continue | ||||
|                 if file_el.text not in unique_urls: | ||||
|                     unique_urls.append(file_el.text) | ||||
|                     unique_file_els.append(file_el) | ||||
|  | ||||
|             for file_el in unique_file_els: | ||||
|                 bitrate = file_el.attrib.get('bitrate') | ||||
|                 ftype = file_el.attrib.get('type') | ||||
|  | ||||
|                 formats.append({ | ||||
|                     'format_id': '%s_%s' % (bitrate, ftype), | ||||
|                     'url': file_el.text.strip(), | ||||
|                     # The bitrate may not be a number (for example: 'iphone') | ||||
|                     'tbr': int(bitrate) if bitrate.isdigit() else None, | ||||
|                     'quality': 1 if ftype == 'hd' else -1 | ||||
|                 }) | ||||
|                 media_url = file_el.text | ||||
|                 if determine_ext(media_url) == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         media_url, segment_title, 'mp4', 'm3u8_native', preference=0, m3u8_id='hls')) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'format_id': '%s_%s' % (bitrate, ftype), | ||||
|                         'url': file_el.text.strip(), | ||||
|                         # The bitrate may not be a number (for example: 'iphone') | ||||
|                         'tbr': int(bitrate) if bitrate.isdigit() else None, | ||||
|                     }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -20,14 +20,14 @@ class AirMozillaIE(InfoExtractor): | ||||
|             'id': '6x4q2w', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', | ||||
|             'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+', | ||||
|             'thumbnail': 're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster', | ||||
|             'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', | ||||
|             'timestamp': 1422487800, | ||||
|             'upload_date': '20150128', | ||||
|             'location': 'SFO Commons', | ||||
|             'duration': 3780, | ||||
|             'view_count': int, | ||||
|             'categories': ['Main'], | ||||
|             'categories': ['Main', 'Privacy'], | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -16,6 +16,7 @@ class AlJazeeraIE(InfoExtractor): | ||||
|             'uploader': 'Al Jazeera English', | ||||
|         }, | ||||
|         'add_ie': ['Brightcove'], | ||||
|         'skip': 'Not accessible from Travis CI server', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -13,53 +13,53 @@ from ..utils import ( | ||||
| class AppleTrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         "url": "http://trailers.apple.com/trailers/wb/manofsteel/", | ||||
|         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', | ||||
|         'info_dict': { | ||||
|             'id': 'manofsteel', | ||||
|         }, | ||||
|         "playlist": [ | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 "md5": "d97a8e575432dbcb81b7c3acb741f8a8", | ||||
|                 "info_dict": { | ||||
|                     "id": "manofsteel-trailer4", | ||||
|                     "ext": "mov", | ||||
|                     "duration": 111, | ||||
|                     "title": "Trailer 4", | ||||
|                     "upload_date": "20130523", | ||||
|                     "uploader_id": "wb", | ||||
|                 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', | ||||
|                 'info_dict': { | ||||
|                     'id': 'manofsteel-trailer4', | ||||
|                     'ext': 'mov', | ||||
|                     'duration': 111, | ||||
|                     'title': 'Trailer 4', | ||||
|                     'upload_date': '20130523', | ||||
|                     'uploader_id': 'wb', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "md5": "b8017b7131b721fb4e8d6f49e1df908c", | ||||
|                 "info_dict": { | ||||
|                     "id": "manofsteel-trailer3", | ||||
|                     "ext": "mov", | ||||
|                     "duration": 182, | ||||
|                     "title": "Trailer 3", | ||||
|                     "upload_date": "20130417", | ||||
|                     "uploader_id": "wb", | ||||
|                 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', | ||||
|                 'info_dict': { | ||||
|                     'id': 'manofsteel-trailer3', | ||||
|                     'ext': 'mov', | ||||
|                     'duration': 182, | ||||
|                     'title': 'Trailer 3', | ||||
|                     'upload_date': '20130417', | ||||
|                     'uploader_id': 'wb', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "md5": "d0f1e1150989b9924679b441f3404d48", | ||||
|                 "info_dict": { | ||||
|                     "id": "manofsteel-trailer", | ||||
|                     "ext": "mov", | ||||
|                     "duration": 148, | ||||
|                     "title": "Trailer", | ||||
|                     "upload_date": "20121212", | ||||
|                     "uploader_id": "wb", | ||||
|                 'md5': 'd0f1e1150989b9924679b441f3404d48', | ||||
|                 'info_dict': { | ||||
|                     'id': 'manofsteel-trailer', | ||||
|                     'ext': 'mov', | ||||
|                     'duration': 148, | ||||
|                     'title': 'Trailer', | ||||
|                     'upload_date': '20121212', | ||||
|                     'uploader_id': 'wb', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 "md5": "5fe08795b943eb2e757fa95cb6def1cb", | ||||
|                 "info_dict": { | ||||
|                     "id": "manofsteel-teaser", | ||||
|                     "ext": "mov", | ||||
|                     "duration": 93, | ||||
|                     "title": "Teaser", | ||||
|                     "upload_date": "20120721", | ||||
|                     "uploader_id": "wb", | ||||
|                 'md5': '5fe08795b943eb2e757fa95cb6def1cb', | ||||
|                 'info_dict': { | ||||
|                     'id': 'manofsteel-teaser', | ||||
|                     'ext': 'mov', | ||||
|                     'duration': 93, | ||||
|                     'title': 'Teaser', | ||||
|                     'upload_date': '20120721', | ||||
|                     'uploader_id': 'wb', | ||||
|                 }, | ||||
|             }, | ||||
|         ] | ||||
|   | ||||
| @@ -4,6 +4,10 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     find_xpath_attr, | ||||
|     unified_strdate, | ||||
| @@ -77,7 +81,13 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|     def _extract_from_webpage(self, webpage, video_id, lang): | ||||
|         json_url = self._html_search_regex( | ||||
|             [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'], | ||||
|             webpage, 'json vp url') | ||||
|             webpage, 'json vp url', default=None) | ||||
|         if not json_url: | ||||
|             iframe_url = self._html_search_regex( | ||||
|                 r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1', | ||||
|                 webpage, 'iframe url', group='url') | ||||
|             json_url = compat_parse_qs( | ||||
|                 compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] | ||||
|         return self._extract_from_json_url(json_url, video_id, lang) | ||||
|  | ||||
|     def _extract_from_json_url(self, json_url, video_id, lang): | ||||
|   | ||||
| @@ -21,6 +21,10 @@ class BBCCoUkIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' | ||||
|  | ||||
|     _MEDIASELECTOR_URLS = [ | ||||
|         # Provides HQ HLS streams with even better quality that pc mediaset but fails | ||||
|         # with geolocation in some cases when it's even not geo restricted at all (e.g. | ||||
|         # http://www.bbc.co.uk/programmes/b06bp7lf) | ||||
|         'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s', | ||||
|         'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s', | ||||
|     ] | ||||
|  | ||||
| @@ -152,6 +156,21 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'geolocation', | ||||
|         }, { | ||||
|             # iptv-all mediaset fails with geolocation however there is no geo restriction | ||||
|             # for this programme at all | ||||
|             'url': 'http://www.bbc.co.uk/programmes/b06bp7lf', | ||||
|             'info_dict': { | ||||
|                 'id': 'b06bp7kf', | ||||
|                 'ext': 'flv', | ||||
|                 'title': "Annie Mac's Friday Night, B.Traits sits in for Annie", | ||||
|                 'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.', | ||||
|                 'duration': 10800, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, { | ||||
|             'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', | ||||
|             'only_matching': True, | ||||
| @@ -189,6 +208,12 @@ class BBCCoUkIE(InfoExtractor): | ||||
|             # Skip DASH until supported | ||||
|             elif transfer_format == 'dash': | ||||
|                 pass | ||||
|             elif transfer_format == 'hls': | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     href, programme_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id=supplier, fatal=False) | ||||
|                 if m3u8_formats: | ||||
|                     formats.extend(m3u8_formats) | ||||
|             # Direct link | ||||
|             else: | ||||
|                 formats.append({ | ||||
| @@ -287,7 +312,7 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                 return self._download_media_selector_url( | ||||
|                     mediaselector_url % programme_id, programme_id) | ||||
|             except BBCCoUkIE.MediaSelectionError as e: | ||||
|                 if e.id == 'notukerror': | ||||
|                 if e.id in ('notukerror', 'geolocation'): | ||||
|                     last_exception = e | ||||
|                     continue | ||||
|                 self._raise_extractor_error(e) | ||||
|   | ||||
| @@ -17,55 +17,81 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class CeskaTelevizeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | ||||
|             'info_dict': { | ||||
|                 'id': '214411058091220', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Hyde Park Civilizace', | ||||
|                 'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'duration': 3350, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | ||||
|         'info_dict': { | ||||
|             'id': '61924494876951776', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hyde Park Civilizace', | ||||
|             'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'duration': 3350, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | ||||
|             'info_dict': { | ||||
|                 'id': '14716', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'První republika: Zpěvačka z Dupárny Bobina', | ||||
|                 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'duration': 88.4, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     ] | ||||
|     }, { | ||||
|         'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | ||||
|         'info_dict': { | ||||
|             'id': '61924494876844374', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'První republika: Zpěvačka z Dupárny Bobina', | ||||
|             'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'duration': 88.4, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # video with 18+ caution trailer | ||||
|         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | ||||
|         'info_dict': { | ||||
|             'id': '215562210900007-bogotart', | ||||
|             'title': 'Queer: Bogotart', | ||||
|             'description': 'Alternativní průvodce současným queer světem', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '61924494876844842', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Queer: Bogotart (Varování 18+)', | ||||
|                 'duration': 10.2, | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': '61924494877068022', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Queer: Bogotart (Queer)', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'duration': 1558.3, | ||||
|             }, | ||||
|         }], | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         playlist_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | ||||
|         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | ||||
|             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | ||||
|  | ||||
|         typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | ||||
|         episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | ||||
|         typ = self._html_search_regex( | ||||
|             r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | ||||
|         episode_id = self._html_search_regex( | ||||
|             r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | ||||
|  | ||||
|         data = { | ||||
|             'playlist[0][type]': typ, | ||||
| @@ -83,7 +109,7 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
|         req.add_header('X-Requested-With', 'XMLHttpRequest') | ||||
|         req.add_header('Referer', url) | ||||
|  | ||||
|         playlistpage = self._download_json(req, video_id) | ||||
|         playlistpage = self._download_json(req, playlist_id) | ||||
|  | ||||
|         playlist_url = playlistpage['url'] | ||||
|         if playlist_url == 'error_region': | ||||
| @@ -92,33 +118,43 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
|         req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url)) | ||||
|         req.add_header('Referer', url) | ||||
|  | ||||
|         playlist = self._download_json(req, video_id) | ||||
|         playlist_title = self._og_search_title(webpage) | ||||
|         playlist_description = self._og_search_description(webpage) | ||||
|  | ||||
|         item = playlist['playlist'][0] | ||||
|         formats = [] | ||||
|         for format_id, stream_url in item['streamUrls'].items(): | ||||
|             formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4')) | ||||
|         self._sort_formats(formats) | ||||
|         playlist = self._download_json(req, playlist_id)['playlist'] | ||||
|         playlist_len = len(playlist) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         duration = float_or_none(item.get('duration')) | ||||
|         thumbnail = item.get('previewImageUrl') | ||||
|         entries = [] | ||||
|         for item in playlist: | ||||
|             formats = [] | ||||
|             for format_id, stream_url in item['streamUrls'].items(): | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native')) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         subs = item.get('subtitles') | ||||
|         if subs: | ||||
|             subtitles = self.extract_subtitles(episode_id, subs) | ||||
|             item_id = item.get('id') or item['assetId'] | ||||
|             title = item['title'] | ||||
|  | ||||
|         return { | ||||
|             'id': episode_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|             duration = float_or_none(item.get('duration')) | ||||
|             thumbnail = item.get('previewImageUrl') | ||||
|  | ||||
|             subtitles = {} | ||||
|             if item.get('type') == 'VOD': | ||||
|                 subs = item.get('subtitles') | ||||
|                 if subs: | ||||
|                     subtitles = self.extract_subtitles(episode_id, subs) | ||||
|  | ||||
|             entries.append({ | ||||
|                 'id': item_id, | ||||
|                 'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title), | ||||
|                 'description': playlist_description if playlist_len == 1 else None, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'duration': duration, | ||||
|                 'formats': formats, | ||||
|                 'subtitles': subtitles, | ||||
|             }) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | ||||
|  | ||||
|     def _get_subtitles(self, episode_id, subs): | ||||
|         original_subtitles = self._download_webpage( | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from .bliptv import BlipTVIE | ||||
| from .screenwavemedia import ScreenwaveMediaIE | ||||
|  | ||||
|  | ||||
| class CinemassacreIE(InfoExtractor): | ||||
| @@ -83,10 +84,10 @@ class CinemassacreIE(InfoExtractor): | ||||
|  | ||||
|         playerdata_url = self._search_regex( | ||||
|             [ | ||||
|                 r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', | ||||
|                 r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', | ||||
|                 ScreenwaveMediaIE.EMBED_PATTERN, | ||||
|                 r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', | ||||
|             ], | ||||
|             webpage, 'player data URL', default=None) | ||||
|             webpage, 'player data URL', default=None, group='url') | ||||
|         if not playerdata_url: | ||||
|             playerdata_url = BlipTVIE._extract_url(webpage) | ||||
|         if not playerdata_url: | ||||
|   | ||||
| @@ -12,9 +12,9 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class ClubicIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html' | ||||
|     _VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', | ||||
|         'md5': '1592b694ba586036efac1776b0b43cd3', | ||||
|         'info_dict': { | ||||
| @@ -24,7 +24,10 @@ class ClubicIE(InfoExtractor): | ||||
|             'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', | ||||
|             'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|   | ||||
| @@ -151,12 +151,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         if mobj.group('shortname'): | ||||
|             if mobj.group('shortname') in ('tds', 'thedailyshow'): | ||||
|                 url = 'http://thedailyshow.cc.com/full-episodes/' | ||||
|             else: | ||||
|                 url = 'http://thecolbertreport.cc.com/full-episodes/' | ||||
|             mobj = re.match(self._VALID_URL, url, re.VERBOSE) | ||||
|             assert mobj is not None | ||||
|             return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes') | ||||
|  | ||||
|         if mobj.group('clip'): | ||||
|             if mobj.group('videotitle'): | ||||
|   | ||||
| @@ -39,6 +39,7 @@ from ..utils import ( | ||||
|     RegexNotFoundError, | ||||
|     sanitize_filename, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     url_basename, | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
| @@ -152,6 +153,7 @@ class InfoExtractor(object): | ||||
|     description:    Full video description. | ||||
|     uploader:       Full name of the video uploader. | ||||
|     creator:        The main artist who created the video. | ||||
|     release_date:   The date (YYYYMMDD) when the video was released. | ||||
|     timestamp:      UNIX timestamp of the moment the video became available. | ||||
|     upload_date:    Video upload date (YYYYMMDD). | ||||
|                     If not explicitly set, calculated from timestamp. | ||||
| @@ -163,6 +165,7 @@ class InfoExtractor(object): | ||||
|                     with the "ext" entry and one of: | ||||
|                         * "data": The subtitles file contents | ||||
|                         * "url": A URL pointing to the subtitles file | ||||
|                     "ext" will be calculated from URL if missing | ||||
|     automatic_captions: Like 'subtitles', used by the YoutubeIE for | ||||
|                     automatically generated captions | ||||
|     duration:       Length of the video in seconds, as an integer. | ||||
| @@ -510,6 +513,18 @@ class InfoExtractor(object): | ||||
|         """Report attempt to log in.""" | ||||
|         self.to_screen('Logging in') | ||||
|  | ||||
|     @staticmethod | ||||
|     def raise_login_required(msg='This video is only available for registered users'): | ||||
|         raise ExtractorError( | ||||
|             '%s. Use --username and --password or --netrc to provide account credentials.' % msg, | ||||
|             expected=True) | ||||
|  | ||||
|     @staticmethod | ||||
|     def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'): | ||||
|         raise ExtractorError( | ||||
|             '%s. You might want to use --proxy to workaround.' % msg, | ||||
|             expected=True) | ||||
|  | ||||
|     # Methods for following #608 | ||||
|     @staticmethod | ||||
|     def url_result(url, ie=None, video_id=None, video_title=None): | ||||
| @@ -725,9 +740,10 @@ class InfoExtractor(object): | ||||
|  | ||||
|     @staticmethod | ||||
|     def _hidden_inputs(html): | ||||
|         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) | ||||
|         hidden_inputs = {} | ||||
|         for input in re.findall(r'<input([^>]+)>', html): | ||||
|             if not re.search(r'type=(["\'])hidden\1', input): | ||||
|         for input in re.findall(r'(?i)<input([^>]+)>', html): | ||||
|             if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): | ||||
|                 continue | ||||
|             name = re.search(r'name=(["\'])(?P<value>.+?)\1', input) | ||||
|             if not name: | ||||
| @@ -740,7 +756,7 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def _form_hidden_inputs(self, form_id, html): | ||||
|         form = self._search_regex( | ||||
|             r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id, | ||||
|             r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id, | ||||
|             html, '%s form' % form_id, group='form') | ||||
|         return self._hidden_inputs(form) | ||||
|  | ||||
| @@ -855,13 +871,18 @@ class InfoExtractor(object): | ||||
|         time.sleep(timeout) | ||||
|  | ||||
|     def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, | ||||
|                              transform_source=lambda s: fix_xml_ampersands(s).strip()): | ||||
|                              transform_source=lambda s: fix_xml_ampersands(s).strip(), | ||||
|                              fatal=True): | ||||
|         manifest = self._download_xml( | ||||
|             manifest_url, video_id, 'Downloading f4m manifest', | ||||
|             'Unable to download f4m manifest', | ||||
|             # Some manifests may be malformed, e.g. prosiebensat1 generated manifests | ||||
|             # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244) | ||||
|             transform_source=transform_source) | ||||
|             transform_source=transform_source, | ||||
|             fatal=fatal) | ||||
|  | ||||
|         if manifest is False: | ||||
|             return manifest | ||||
|  | ||||
|         formats = [] | ||||
|         manifest_version = '1.0' | ||||
| @@ -882,7 +903,10 @@ class InfoExtractor(object): | ||||
|                 # may differ leading to inability to resolve the format by requested | ||||
|                 # bitrate in f4m downloader | ||||
|                 if determine_ext(manifest_url) == 'f4m': | ||||
|                     formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id)) | ||||
|                     f4m_formats = self._extract_f4m_formats( | ||||
|                         manifest_url, video_id, preference, f4m_id, fatal=fatal) | ||||
|                     if f4m_formats: | ||||
|                         formats.extend(f4m_formats) | ||||
|                     continue | ||||
|             tbr = int_or_none(media_el.attrib.get('bitrate')) | ||||
|             formats.append({ | ||||
| @@ -1030,6 +1054,7 @@ class InfoExtractor(object): | ||||
|         video_id = os.path.splitext(url_basename(smil_url))[0] | ||||
|         title = None | ||||
|         description = None | ||||
|         upload_date = None | ||||
|         for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): | ||||
|             name = meta.attrib.get('name') | ||||
|             content = meta.attrib.get('content') | ||||
| @@ -1039,11 +1064,22 @@ class InfoExtractor(object): | ||||
|                 title = content | ||||
|             elif not description and name in ('description', 'abstract'): | ||||
|                 description = content | ||||
|             elif not upload_date and name == 'date': | ||||
|                 upload_date = unified_strdate(content) | ||||
|  | ||||
|         thumbnails = [{ | ||||
|             'id': image.get('type'), | ||||
|             'url': image.get('src'), | ||||
|             'width': int_or_none(image.get('width')), | ||||
|             'height': int_or_none(image.get('height')), | ||||
|         } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title or video_id, | ||||
|             'description': description, | ||||
|             'upload_date': upload_date, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
| @@ -1070,7 +1106,7 @@ class InfoExtractor(object): | ||||
|             if not src: | ||||
|                 continue | ||||
|  | ||||
|             bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) | ||||
|             bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) | ||||
|             filesize = int_or_none(video.get('size') or video.get('fileSize')) | ||||
|             width = int_or_none(video.get('width')) | ||||
|             height = int_or_none(video.get('height')) | ||||
| @@ -1102,8 +1138,10 @@ class InfoExtractor(object): | ||||
|             src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) | ||||
|  | ||||
|             if proto == 'm3u8' or src_ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src_url, video_id, ext or 'mp4', m3u8_id='hls')) | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False) | ||||
|                 if m3u8_formats: | ||||
|                     formats.extend(m3u8_formats) | ||||
|                 continue | ||||
|  | ||||
|             if src_ext == 'f4m': | ||||
| @@ -1115,10 +1153,12 @@ class InfoExtractor(object): | ||||
|                     } | ||||
|                 f4m_url += '&' if '?' in f4m_url else '?' | ||||
|                 f4m_url += compat_urllib_parse.urlencode(f4m_params) | ||||
|                 formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds')) | ||||
|                 f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False) | ||||
|                 if f4m_formats: | ||||
|                     formats.extend(f4m_formats) | ||||
|                 continue | ||||
|  | ||||
|             if src_url.startswith('http'): | ||||
|             if src_url.startswith('http') and self._is_valid_url(src, video_id): | ||||
|                 http_count += 1 | ||||
|                 formats.append({ | ||||
|                     'url': src_url, | ||||
|   | ||||
| @@ -2,7 +2,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
| @@ -12,6 +11,7 @@ from ..compat import ( | ||||
| ) | ||||
| from ..utils import ( | ||||
|     orderedSet, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -24,21 +24,33 @@ class CondeNastIE(InfoExtractor): | ||||
|     # The keys are the supported sites and the values are the name to be shown | ||||
|     # to the user and in the extractor description. | ||||
|     _SITES = { | ||||
|         'wired': 'WIRED', | ||||
|         'gq': 'GQ', | ||||
|         'vogue': 'Vogue', | ||||
|         'glamour': 'Glamour', | ||||
|         'wmagazine': 'W Magazine', | ||||
|         'vanityfair': 'Vanity Fair', | ||||
|         'allure': 'Allure', | ||||
|         'architecturaldigest': 'Architectural Digest', | ||||
|         'arstechnica': 'Ars Technica', | ||||
|         'bonappetit': 'Bon Appétit', | ||||
|         'brides': 'Brides', | ||||
|         'cnevids': 'Condé Nast', | ||||
|         'cntraveler': 'Condé Nast Traveler', | ||||
|         'details': 'Details', | ||||
|         'epicurious': 'Epicurious', | ||||
|         'glamour': 'Glamour', | ||||
|         'golfdigest': 'Golf Digest', | ||||
|         'gq': 'GQ', | ||||
|         'newyorker': 'The New Yorker', | ||||
|         'self': 'SELF', | ||||
|         'teenvogue': 'Teen Vogue', | ||||
|         'vanityfair': 'Vanity Fair', | ||||
|         'vogue': 'Vogue', | ||||
|         'wired': 'WIRED', | ||||
|         'wmagazine': 'W Magazine', | ||||
|     } | ||||
|  | ||||
|     _VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) | ||||
|     _VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) | ||||
|     IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) | ||||
|  | ||||
|     EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys()) | ||||
|     EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys()) | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', | ||||
|         'md5': '1921f713ed48aabd715691f774c451f7', | ||||
|         'info_dict': { | ||||
| @@ -47,7 +59,16 @@ class CondeNastIE(InfoExtractor): | ||||
|             'title': '3D Printed Speakers Lit With LED', | ||||
|             'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         # JS embed | ||||
|         'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js', | ||||
|         'md5': 'f1a6f9cafb7083bab74a710f65d08999', | ||||
|         'info_dict': { | ||||
|             'id': '55f9cf8b61646d1acf00000c', | ||||
|             'ext': 'mp4', | ||||
|             'title': '3D printed TSA Travel Sentry keys really do open TSA locks', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _extract_series(self, url, webpage): | ||||
|         title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>', | ||||
| @@ -86,8 +107,8 @@ class CondeNastIE(InfoExtractor): | ||||
|         info_url = base_info_url + data | ||||
|         info_page = self._download_webpage(info_url, video_id, | ||||
|                                            'Downloading video info') | ||||
|         video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info') | ||||
|         video_info = json.loads(video_info) | ||||
|         video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info') | ||||
|         video_info = self._parse_json(video_info, video_id) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']), | ||||
| @@ -111,6 +132,13 @@ class CondeNastIE(InfoExtractor): | ||||
|         url_type = mobj.group('type') | ||||
|         item_id = mobj.group('id') | ||||
|  | ||||
|         # Convert JS embed to regular embed | ||||
|         if url_type == 'embedjs': | ||||
|             parsed_url = compat_urlparse.urlparse(url) | ||||
|             url = compat_urlparse.urlunparse(parsed_url._replace( | ||||
|                 path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/'))) | ||||
|             url_type = 'embed' | ||||
|  | ||||
|         self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site]) | ||||
|         webpage = self._download_webpage(url, item_id) | ||||
|  | ||||
|   | ||||
| @@ -20,16 +20,34 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     bytes_to_intlist, | ||||
|     intlist_to_bytes, | ||||
|     int_or_none, | ||||
|     remove_end, | ||||
|     unified_strdate, | ||||
|     urlencode_postdata, | ||||
|     xpath_text, | ||||
| ) | ||||
| from ..aes import ( | ||||
|     aes_cbc_decrypt, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CrunchyrollIE(InfoExtractor): | ||||
| class CrunchyrollBaseIE(InfoExtractor): | ||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): | ||||
|         request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) | ||||
|                    else compat_urllib_request.Request(url_or_request)) | ||||
|         # Accept-Language must be set explicitly to accept any language to avoid issues | ||||
|         # similar to https://github.com/rg3/youtube-dl/issues/6797. | ||||
|         # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction | ||||
|         # should be imposed or not (from what I can see it just takes the first language | ||||
|         # ignoring the priority and requires it to correspond the IP). By the way this causes | ||||
|         # Crunchyroll to not work in georestriction cases in some browsers that don't place | ||||
|         # the locale lang first in header. However allowing any language seems to workaround the issue. | ||||
|         request.add_header('Accept-Language', '*') | ||||
|         return super(CrunchyrollBaseIE, self)._download_webpage( | ||||
|             request, video_id, note, errnote, fatal, tries, timeout, encoding) | ||||
|  | ||||
|  | ||||
| class CrunchyrollIE(CrunchyrollBaseIE): | ||||
|     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||
|     _NETRC_MACHINE = 'crunchyroll' | ||||
|     _TESTS = [{ | ||||
| @@ -237,7 +255,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             webpage_url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage') | ||||
|         note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='') | ||||
|         note_m = self._html_search_regex( | ||||
|             r'<div class="showmedia-trailer-notice">(.+?)</div>', | ||||
|             webpage, 'trailer-notice', default='') | ||||
|         if note_m: | ||||
|             raise ExtractorError(note_m) | ||||
|  | ||||
| @@ -247,15 +267,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             if msg.get('type') == 'error': | ||||
|                 raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) | ||||
|  | ||||
|         if 'To view this, please log in to verify you are 18 or older.' in webpage: | ||||
|             self.raise_login_required() | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) | ||||
|         video_title = re.sub(r' {2,}', ' ', video_title) | ||||
|         video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') | ||||
|         if not video_description: | ||||
|             video_description = None | ||||
|         video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) | ||||
|         video_upload_date = self._html_search_regex( | ||||
|             [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'], | ||||
|             webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) | ||||
|         if video_upload_date: | ||||
|             video_upload_date = unified_strdate(video_upload_date) | ||||
|         video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage, | ||||
|             'video_uploader', fatal=False) | ||||
|  | ||||
|         playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) | ||||
|         playerdata_req = compat_urllib_request.Request(playerdata_url) | ||||
| @@ -281,6 +308,13 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             stream_info = streamdata.find('./{default}preload/stream_info') | ||||
|             video_url = stream_info.find('./host').text | ||||
|             video_play_path = stream_info.find('./file').text | ||||
|             metadata = stream_info.find('./metadata') | ||||
|             format_info = { | ||||
|                 'format': video_format, | ||||
|                 'format_id': video_format, | ||||
|                 'height': int_or_none(xpath_text(metadata, './height')), | ||||
|                 'width': int_or_none(xpath_text(metadata, './width')), | ||||
|             } | ||||
|  | ||||
|             if '.fplive.net/' in video_url: | ||||
|                 video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) | ||||
| @@ -289,19 +323,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|                     netloc='v.lvlt.crcdn.net', | ||||
|                     path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1]))) | ||||
|                 if self._is_valid_url(direct_video_url, video_id, video_format): | ||||
|                     formats.append({ | ||||
|                     format_info.update({ | ||||
|                         'url': direct_video_url, | ||||
|                         'format_id': video_format, | ||||
|                     }) | ||||
|                     formats.append(format_info) | ||||
|                     continue | ||||
|  | ||||
|             formats.append({ | ||||
|             format_info.update({ | ||||
|                 'url': video_url, | ||||
|                 'play_path': video_play_path, | ||||
|                 'ext': 'flv', | ||||
|                 'format': video_format, | ||||
|                 'format_id': video_format, | ||||
|             }) | ||||
|             formats.append(format_info) | ||||
|  | ||||
|         subtitles = self.extract_subtitles(video_id, webpage) | ||||
|  | ||||
| @@ -317,7 +350,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CrunchyrollShowPlaylistIE(InfoExtractor): | ||||
| class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): | ||||
|     IE_NAME = "crunchyroll:playlist" | ||||
|     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$' | ||||
|  | ||||
|   | ||||
| @@ -44,8 +44,8 @@ class DCNIE(InfoExtractor): | ||||
|         title = video.get('title_en') or video['title_ar'] | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' | ||||
|             + compat_urllib_parse.urlencode({ | ||||
|             'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + | ||||
|             compat_urllib_parse.urlencode({ | ||||
|                 'id': video['id'], | ||||
|                 'user_id': video['user_id'], | ||||
|                 'signature': video['signature'], | ||||
|   | ||||
| @@ -1,27 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .novamov import NovaMovIE | ||||
|  | ||||
|  | ||||
| class DivxStageIE(NovaMovIE): | ||||
|     IE_NAME = 'divxstage' | ||||
|     IE_DESC = 'DivxStage' | ||||
|  | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'} | ||||
|  | ||||
|     _HOST = 'www.divxstage.eu' | ||||
|  | ||||
|     _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' | ||||
|     _TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>' | ||||
|     _DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.divxstage.eu/video/57f238e2e5e01', | ||||
|         'md5': '63969f6eb26533a1968c4d325be63e72', | ||||
|         'info_dict': { | ||||
|             'id': '57f238e2e5e01', | ||||
|             'ext': 'flv', | ||||
|             'title': 'youtubedl test video', | ||||
|             'description': 'This is a test video for youtubedl.', | ||||
|         } | ||||
|     } | ||||
| @@ -9,8 +9,8 @@ from ..utils import qualities | ||||
|  | ||||
|  | ||||
| class DumpertIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', | ||||
|         'md5': '1b9318d7d5054e7dcb9dc7654f21d643', | ||||
|         'info_dict': { | ||||
| @@ -20,11 +20,15 @@ class DumpertIE(InfoExtractor): | ||||
|             'description': 'Niet schrikken hoor', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         url = 'https://www.dumpert.nl/mediabase/' + video_id | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'nsfw=1; cpc=10') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|   | ||||
| @@ -21,7 +21,7 @@ class EaglePlatformIE(InfoExtractor): | ||||
|     _TESTS = [{ | ||||
|         # http://lenta.ru/news/2015/03/06/navalny/ | ||||
|         'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', | ||||
|         'md5': '0b7994faa2bd5c0f69a3db6db28d078d', | ||||
|         'md5': '70f5187fb620f2c1d503b3b22fd4efe3', | ||||
|         'info_dict': { | ||||
|             'id': '227304', | ||||
|             'ext': 'mp4', | ||||
| @@ -36,7 +36,7 @@ class EaglePlatformIE(InfoExtractor): | ||||
|         # http://muz-tv.ru/play/7129/ | ||||
|         # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true | ||||
|         'url': 'eagleplatform:media.clipyou.ru:12820', | ||||
|         'md5': '6c2ebeab03b739597ce8d86339d5a905', | ||||
|         'md5': '90b26344ba442c8e44aa4cf8f301164a', | ||||
|         'info_dict': { | ||||
|             'id': '12820', | ||||
|             'ext': 'mp4', | ||||
| @@ -48,7 +48,8 @@ class EaglePlatformIE(InfoExtractor): | ||||
|         'skip': 'Georestricted', | ||||
|     }] | ||||
|  | ||||
|     def _handle_error(self, response): | ||||
|     @staticmethod | ||||
|     def _handle_error(response): | ||||
|         status = int_or_none(response.get('status', 200)) | ||||
|         if status != 200: | ||||
|             raise ExtractorError(' '.join(response['errors']), expected=True) | ||||
| @@ -58,6 +59,9 @@ class EaglePlatformIE(InfoExtractor): | ||||
|         self._handle_error(response) | ||||
|         return response | ||||
|  | ||||
|     def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): | ||||
|         return self._download_json(url_or_request, video_id, note)['data'][0] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') | ||||
| @@ -69,7 +73,7 @@ class EaglePlatformIE(InfoExtractor): | ||||
|  | ||||
|         title = media['title'] | ||||
|         description = media.get('description') | ||||
|         thumbnail = media.get('snapshot') | ||||
|         thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') | ||||
|         duration = int_or_none(media.get('duration')) | ||||
|         view_count = int_or_none(media.get('views')) | ||||
|  | ||||
| @@ -78,13 +82,20 @@ class EaglePlatformIE(InfoExtractor): | ||||
|         if age_restriction: | ||||
|             age_limit = 0 if age_restriction == 'allow_all' else 18 | ||||
|  | ||||
|         m3u8_data = self._download_json( | ||||
|             media['sources']['secure_m3u8']['auto'], | ||||
|             video_id, 'Downloading m3u8 JSON') | ||||
|         secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') | ||||
|  | ||||
|         m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             m3u8_data['data'][0], video_id, | ||||
|             m3u8_url, video_id, | ||||
|             'mp4', entry_protocol='m3u8_native') | ||||
|  | ||||
|         mp4_url = self._get_video_url( | ||||
|             # Secure mp4 URL is constructed according to Player.prototype.mp4 from | ||||
|             # http://lentaru.media.eagleplatform.com/player/player.js | ||||
|             re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8), | ||||
|             video_id, 'Downloading mp4 JSON') | ||||
|         formats.append({'url': mp4_url, 'format_id': 'mp4'}) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from ..utils import ( | ||||
|  | ||||
| class EngadgetIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://www.engadget.com/ | ||||
|         (?:video/5min/(?P<id>\d+)| | ||||
|         (?:video(?:/5min)?/(?P<id>\d+)| | ||||
|             [\d/]+/.*?) | ||||
|         ''' | ||||
|  | ||||
|   | ||||
| @@ -71,8 +71,7 @@ class EroProfileIE(InfoExtractor): | ||||
|  | ||||
|         m = re.search(r'You must be logged in to view this video\.', webpage) | ||||
|         if m: | ||||
|             raise ExtractorError( | ||||
|                 'This video requires login. Please specify a username and password and try again.', expected=True) | ||||
|             self.raise_login_required('This video requires login') | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], | ||||
|   | ||||
							
								
								
									
										93
									
								
								youtube_dl/extractor/europa.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								youtube_dl/extractor/europa.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     orderedSet, | ||||
|     parse_duration, | ||||
|     qualities, | ||||
|     unified_strdate, | ||||
|     xpath_text | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EuropaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', | ||||
|         'md5': '574f080699ddd1e19a675b0ddf010371', | ||||
|         'info_dict': { | ||||
|             'id': 'I107758', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'TRADE - Wikileaks on TTIP', | ||||
|             'description': 'NEW  LIVE EC Midday press briefing of 11/08/2015', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'upload_date': '20150811', | ||||
|             'duration': 34, | ||||
|             'view_count': int, | ||||
|             'formats': 'mincount:3', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         playlist = self._download_xml( | ||||
|             'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id) | ||||
|  | ||||
|         def get_item(type_, preference): | ||||
|             items = {} | ||||
|             for item in playlist.findall('./info/%s/item' % type_): | ||||
|                 lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None) | ||||
|                 if lang and label: | ||||
|                     items[lang] = label.strip() | ||||
|             for p in preference: | ||||
|                 if items.get(p): | ||||
|                     return items[p] | ||||
|  | ||||
|         query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|         preferred_lang = query.get('sitelang', ('en', ))[0] | ||||
|  | ||||
|         preferred_langs = orderedSet((preferred_lang, 'en', 'int')) | ||||
|  | ||||
|         title = get_item('title', preferred_langs) or video_id | ||||
|         description = get_item('description', preferred_langs) | ||||
|         thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') | ||||
|         upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) | ||||
|         duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) | ||||
|         view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) | ||||
|  | ||||
|         language_preference = qualities(preferred_langs[::-1]) | ||||
|  | ||||
|         formats = [] | ||||
|         for file_ in playlist.findall('./files/file'): | ||||
|             video_url = xpath_text(file_, './url') | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             lang = xpath_text(file_, './lg') | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': lang, | ||||
|                 'format_note': xpath_text(file_, './lglabel'), | ||||
|                 'language_preference': language_preference(lang) | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnmail, | ||||
|             'upload_date': upload_date, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats | ||||
|         } | ||||
| @@ -10,12 +10,13 @@ from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     encode_dict, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FC2IE(InfoExtractor): | ||||
|     _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)' | ||||
|     IE_NAME = 'fc2' | ||||
|     _NETRC_MACHINE = 'fc2' | ||||
|     _TESTS = [{ | ||||
| @@ -37,6 +38,9 @@ class FC2IE(InfoExtractor): | ||||
|             'password': '(snip)', | ||||
|             'skip': 'requires actual password' | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _login(self): | ||||
| @@ -52,10 +56,7 @@ class FC2IE(InfoExtractor): | ||||
|             'Submit': ' Login ', | ||||
|         } | ||||
|  | ||||
|         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') | ||||
|         login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') | ||||
|         request = compat_urllib_request.Request( | ||||
|             'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) | ||||
|  | ||||
| @@ -80,7 +81,7 @@ class FC2IE(InfoExtractor): | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         refer = url.replace('/content/', '/a/content/') | ||||
|         refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url | ||||
|  | ||||
|         mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() | ||||
|  | ||||
|   | ||||
| @@ -1,13 +1,12 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import random | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     get_element_by_id, | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -17,66 +16,40 @@ class FKTVIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://fernsehkritik.tv/folge-1', | ||||
|         'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79', | ||||
|         'info_dict': { | ||||
|             'id': '00011', | ||||
|             'ext': 'flv', | ||||
|             'id': '1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Folge 1 vom 10. April 2007', | ||||
|             'description': 'md5:fb4818139c7cfe6907d4b83412a6864f', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         episode = int(self._match_id(url)) | ||||
|         episode = self._match_id(url) | ||||
|  | ||||
|         video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode | ||||
|         start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode, | ||||
|                                                episode) | ||||
|         playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, | ||||
|                                       'playlist', flags=re.DOTALL) | ||||
|         files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) | ||||
|         webpage = self._download_webpage( | ||||
|             'http://fernsehkritik.tv/folge-%s/play' % episode, episode) | ||||
|         title = clean_html(self._html_search_regex( | ||||
|             '<h3>([^<]+)</h3>', webpage, 'title')) | ||||
|         matches = re.search( | ||||
|             r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>', | ||||
|             webpage) | ||||
|         if matches is None: | ||||
|             raise ExtractorError('Unable to extract the video') | ||||
|  | ||||
|         videos = [] | ||||
|         for i, _ in enumerate(files, 1): | ||||
|             video_id = '%04d%d' % (episode, i) | ||||
|             video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i) | ||||
|             videos.append({ | ||||
|                 'ext': 'flv', | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'title': clean_html(get_element_by_id('eptitle', start_webpage)), | ||||
|                 'description': clean_html(get_element_by_id('contentlist', start_webpage)), | ||||
|                 'thumbnail': video_thumbnail | ||||
|             }) | ||||
|         poster, sources = matches.groups() | ||||
|         if poster is None: | ||||
|             self.report_warning('unable to extract thumbnail') | ||||
|  | ||||
|         urls = re.findall(r'<source[^>]+src="([^"]+)"', sources) | ||||
|         formats = [{ | ||||
|             'url': furl, | ||||
|             'format_id': determine_ext(furl), | ||||
|         } for furl in urls] | ||||
|         return { | ||||
|             '_type': 'multi_video', | ||||
|             'entries': videos, | ||||
|             'id': 'folge-%s' % episode, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class FKTVPosteckeIE(InfoExtractor): | ||||
|     IE_NAME = 'fernsehkritik.tv:postecke' | ||||
|     _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)' | ||||
|     _TEST = { | ||||
|         'url': 'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', | ||||
|         'md5': '262f0adbac80317412f7e57b4808e5c4', | ||||
|         'info_dict': { | ||||
|             'id': '0120', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Postecke 120', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         episode = int(mobj.group('ep')) | ||||
|  | ||||
|         server = random.randint(2, 4) | ||||
|         video_id = '%04d' % episode | ||||
|         video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode) | ||||
|         video_title = 'Postecke %d' % episode | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': video_title, | ||||
|             'id': episode, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': poster, | ||||
|         } | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
| @@ -8,7 +10,8 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class FoxNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' | ||||
|     IE_DESC = 'Fox News and Fox Business Video' | ||||
|     _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', | ||||
| @@ -42,13 +45,19 @@ class FoxNewsIE(InfoExtractor): | ||||
|             'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://video.foxbusiness.com/v/4442309889001', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         host = mobj.group('host') | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id) | ||||
|             'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id) | ||||
|  | ||||
|         item = video['channel']['item'] | ||||
|         title = item['title'] | ||||
|   | ||||
| @@ -78,9 +78,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor): | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = info['titre'] | ||||
|         subtitle = info.get('sous_titre') | ||||
|         if subtitle: | ||||
|             title += ' - %s' % subtitle | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['titre'], | ||||
|             'title': title, | ||||
|             'description': clean_html(info['synopsis']), | ||||
|             'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), | ||||
|             'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']), | ||||
| @@ -214,15 +219,15 @@ class FranceTVIE(FranceTVBaseInfoExtractor): | ||||
|         }, | ||||
|         # france5 | ||||
|         { | ||||
|             'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968', | ||||
|             'md5': '78f0f4064f9074438e660785bbf2c5d9', | ||||
|             'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1', | ||||
|             'md5': 'f6c577df3806e26471b3d21631241fd0', | ||||
|             'info_dict': { | ||||
|                 'id': '108961659', | ||||
|                 'id': '123327454', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'C à dire ?!', | ||||
|                 'description': 'md5:1a4aeab476eb657bf57c4ff122129f81', | ||||
|                 'upload_date': '20140915', | ||||
|                 'timestamp': 1410795000, | ||||
|                 'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?', | ||||
|                 'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4', | ||||
|                 'upload_date': '20150831', | ||||
|                 'timestamp': 1441035120, | ||||
|             }, | ||||
|         }, | ||||
|         # franceo | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import sys | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| @@ -48,6 +49,8 @@ from .vimeo import VimeoIE | ||||
| from .dailymotion import DailymotionCloudIE | ||||
| from .onionstudios import OnionStudiosIE | ||||
| from .snagfilms import SnagFilmsEmbedIE | ||||
| from .screenwavemedia import ScreenwaveMediaIE | ||||
| from .mtv import MTVServicesEmbeddedIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -229,6 +232,22 @@ class GenericIE(InfoExtractor): | ||||
|                 'skip_download': False, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             # redirect in Refresh HTTP header | ||||
|             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1', | ||||
|             'info_dict': { | ||||
|                 'id': 'pO8h3EaFRdo', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set', | ||||
|                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5', | ||||
|                 'upload_date': '20150917', | ||||
|                 'uploader_id': 'brtvofficial', | ||||
|                 'uploader': 'Boiler Room', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': False, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', | ||||
|             'md5': '85b90ccc9d73b4acd9138d3af4c27f89', | ||||
| @@ -1001,6 +1020,16 @@ class GenericIE(InfoExtractor): | ||||
|                 'description': 'New experience with Acrobat DC', | ||||
|                 'duration': 248.667, | ||||
|             }, | ||||
|         }, | ||||
|         # ScreenwaveMedia embed | ||||
|         { | ||||
|             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1', | ||||
|             'md5': '24ace5baba0d35d55c6810b51f34e9e0', | ||||
|             'info_dict': { | ||||
|                 'id': 'cinemasnob-55d26273809dd', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'cinemasnob', | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -1583,12 +1612,9 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(url, ie='Vulture') | ||||
|  | ||||
|         # Look for embedded mtvservices player | ||||
|         mobj = re.search( | ||||
|             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             url = unescapeHTML(mobj.group('url')) | ||||
|             return self.url_result(url, ie='MTVServicesEmbedded') | ||||
|         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) | ||||
|         if mtvservices_url: | ||||
|             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded') | ||||
|  | ||||
|         # Look for embedded yahoo player | ||||
|         mobj = re.search( | ||||
| @@ -1627,7 +1653,7 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(mobj.group('url'), 'MLB') | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL, | ||||
|             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL, | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') | ||||
| @@ -1718,6 +1744,11 @@ class GenericIE(InfoExtractor): | ||||
|         if snagfilms_url: | ||||
|             return self.url_result(snagfilms_url) | ||||
|  | ||||
|         # Look for ScreenwaveMedia embeds | ||||
|         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia') | ||||
|  | ||||
|         # Look for AdobeTVVideo embeds | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', | ||||
| @@ -1781,7 +1812,7 @@ class GenericIE(InfoExtractor): | ||||
|                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)) | ||||
|         if not found: | ||||
|             # HTML5 video | ||||
|             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage) | ||||
|             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage) | ||||
|         if not found: | ||||
|             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' | ||||
|             found = re.search( | ||||
| @@ -1792,6 +1823,9 @@ class GenericIE(InfoExtractor): | ||||
|                 # Look also in Refresh HTTP header | ||||
|                 refresh_header = head_response.headers.get('Refresh') | ||||
|                 if refresh_header: | ||||
|                     # In python 2 response HTTP headers are bytestrings | ||||
|                     if sys.version_info < (3, 0) and isinstance(refresh_header, str): | ||||
|                         refresh_header = refresh_header.decode('iso-8859-1') | ||||
|                     found = re.search(REDIRECT_REGEX, refresh_header) | ||||
|             if found: | ||||
|                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) | ||||
|   | ||||
| @@ -13,6 +13,7 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -359,13 +360,8 @@ class GloboIE(InfoExtractor): | ||||
|             self._API_URL_TEMPLATE % video_id, video_id)['videos'][0] | ||||
|  | ||||
|         title = video['title'] | ||||
|         duration = float_or_none(video['duration'], 1000) | ||||
|         like_count = video['likes'] | ||||
|         uploader = video['channel'] | ||||
|         uploader_id = video['channel_id'] | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for resource in video['resources']: | ||||
|             resource_id = resource.get('_id') | ||||
|             if not resource_id: | ||||
| @@ -407,6 +403,11 @@ class GloboIE(InfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         duration = float_or_none(video.get('duration'), 1000) | ||||
|         like_count = int_or_none(video.get('likes')) | ||||
|         uploader = video.get('channel') | ||||
|         uploader_id = video.get('channel_id') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|   | ||||
| @@ -10,15 +10,16 @@ from ..compat import ( | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     encode_dict, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GorillaVidIE(InfoExtractor): | ||||
|     IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net' | ||||
|     IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?P<host>(?:www\.)? | ||||
|             (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net))/ | ||||
|             (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com))/ | ||||
|         (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? | ||||
|     ''' | ||||
|  | ||||
| @@ -67,13 +68,22 @@ class GorillaVidIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://movpod.in/0wguyyxi1yca', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://filehoot.com/3ivfabn7573c.html', | ||||
|         'info_dict': { | ||||
|             'id': '3ivfabn7573c', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id) | ||||
|         url = 'http://%s/%s' % (mobj.group('host'), video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: | ||||
|             raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
| @@ -87,7 +97,7 @@ class GorillaVidIE(InfoExtractor): | ||||
|             if countdown: | ||||
|                 self._sleep(countdown, video_id) | ||||
|  | ||||
|             post = compat_urllib_parse.urlencode(fields) | ||||
|             post = compat_urllib_parse.urlencode(encode_dict(fields)) | ||||
|  | ||||
|             req = compat_urllib_request.Request(url, post) | ||||
|             req.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
| @@ -95,7 +105,7 @@ class GorillaVidIE(InfoExtractor): | ||||
|             webpage = self._download_webpage(req, video_id, 'Downloading video page') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             [r'style="z-index: [0-9]+;">([^<]+)</span>', r'>Watch (.+) '], | ||||
|             [r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'>Watch (.+) '], | ||||
|             webpage, 'title', default=None) or self._og_search_title(webpage) | ||||
|         video_url = self._search_regex( | ||||
|             r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url') | ||||
|   | ||||
| @@ -1,80 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HostingBulkIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?:www\.)?hostingbulk\.com/ | ||||
|         (?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html''' | ||||
|     _FILE_DELETED_REGEX = r'<b>File Not Found</b>' | ||||
|     _TEST = { | ||||
|         'url': 'http://hostingbulk.com/n0ulw1hv20fm.html', | ||||
|         'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f', | ||||
|         'info_dict': { | ||||
|             'id': 'n0ulw1hv20fm', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'md5:5afeba33f48ec87219c269e054afd622', | ||||
|             'filesize': 6816081, | ||||
|             'thumbnail': 're:^http://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         url = 'http://hostingbulk.com/{0:}.html'.format(video_id) | ||||
|  | ||||
|         # Custom request with cookie to set language to English, so our file | ||||
|         # deleted regex would work. | ||||
|         request = compat_urllib_request.Request( | ||||
|             url, headers={'Cookie': 'lang=english'}) | ||||
|         webpage = self._download_webpage(request, video_id) | ||||
|  | ||||
|         if re.search(self._FILE_DELETED_REGEX, webpage) is not None: | ||||
|             raise ExtractorError('Video %s does not exist' % video_id, | ||||
|                                  expected=True) | ||||
|  | ||||
|         title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title') | ||||
|         filesize = int_or_none( | ||||
|             self._search_regex( | ||||
|                 r'<small>\((\d+)\sbytes?\)</small>', | ||||
|                 webpage, | ||||
|                 'filesize', | ||||
|                 fatal=False | ||||
|             ) | ||||
|         ) | ||||
|         thumbnail = self._search_regex( | ||||
|             r'<img src="([^"]+)".+?class="pic"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         fields = self._hidden_inputs(webpage) | ||||
|  | ||||
|         request = compat_urllib_request.Request(url, urlencode_postdata(fields)) | ||||
|         request.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
|         response = self._request_webpage(request, video_id, | ||||
|                                          'Submiting download request') | ||||
|         video_url = response.geturl() | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|             'filesize': filesize, | ||||
|             'url': video_url, | ||||
|         }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,7 +1,11 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     get_element_by_id, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class IconosquareIE(InfoExtractor): | ||||
| @@ -12,7 +16,7 @@ class IconosquareIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': '522207370455279102_24101272', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)', | ||||
|             'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)', | ||||
|             'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', | ||||
|             'timestamp': 1376471991, | ||||
|             'upload_date': '20130814', | ||||
| @@ -29,8 +33,7 @@ class IconosquareIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         media = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'window\.media\s*=\s*({.+?});\n', webpage, 'media'), | ||||
|             get_element_by_id('mediaJson', webpage), | ||||
|             video_id) | ||||
|  | ||||
|         formats = [{ | ||||
| @@ -41,9 +44,7 @@ class IconosquareIE(InfoExtractor): | ||||
|         } for format_id, f in media['videos'].items()] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)</title>', | ||||
|             webpage, 'title') | ||||
|         title = remove_end(self._og_search_title(webpage), ' - via Iconosquare') | ||||
|  | ||||
|         timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time')) | ||||
|         description = media.get('caption', {}).get('text') | ||||
| @@ -61,6 +62,14 @@ class IconosquareIE(InfoExtractor): | ||||
|             'height': int_or_none(t.get('height')) | ||||
|         } for thumbnail_id, t in media.get('images', {}).items()] | ||||
|  | ||||
|         comments = [{ | ||||
|             'id': comment.get('id'), | ||||
|             'text': comment['text'], | ||||
|             'timestamp': int_or_none(comment.get('created_time')), | ||||
|             'author': comment.get('from', {}).get('full_name'), | ||||
|             'author_id': comment.get('from', {}).get('username'), | ||||
|         } for comment in media.get('comments', {}).get('data', []) if 'text' in comment] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
| @@ -72,4 +81,5 @@ class IconosquareIE(InfoExtractor): | ||||
|             'comment_count': comment_count, | ||||
|             'like_count': like_count, | ||||
|             'formats': formats, | ||||
|             'comments': comments, | ||||
|         } | ||||
|   | ||||
| @@ -13,7 +13,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class ImgurIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://i.imgur.com/A61SaA1.gifv', | ||||
| @@ -97,3 +97,28 @@ class ImgurIE(InfoExtractor): | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': self._og_search_title(webpage), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ImgurAlbumIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://imgur.com/gallery/Q95ko', | ||||
|         'info_dict': { | ||||
|             'id': 'Q95ko', | ||||
|         }, | ||||
|         'playlist_count': 25, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         album_id = self._match_id(url) | ||||
|  | ||||
|         album_images = self._download_json( | ||||
|             'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, | ||||
|             album_id)['data']['images'] | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('http://imgur.com/%s' % image['hash']) | ||||
|             for image in album_images if image.get('hash')] | ||||
|  | ||||
|         return self.playlist_result(entries, album_id) | ||||
|   | ||||
| @@ -95,6 +95,10 @@ class IqiyiIE(InfoExtractor): | ||||
|         ('10', 'h1'), | ||||
|     ] | ||||
|  | ||||
|     @staticmethod | ||||
|     def md5_text(text): | ||||
|         return hashlib.md5(text.encode('utf-8')).hexdigest() | ||||
|  | ||||
|     def construct_video_urls(self, data, video_id, _uuid): | ||||
|         def do_xor(x, y): | ||||
|             a = y % 3 | ||||
| @@ -121,7 +125,7 @@ class IqiyiIE(InfoExtractor): | ||||
|                 note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) | ||||
|             )['t'] | ||||
|             t = str(int(math.floor(int(tm) / (600.0)))) | ||||
|             return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() | ||||
|             return self.md5_text(t + mg + x) | ||||
|  | ||||
|         video_urls_dict = {} | ||||
|         for format_item in data['vp']['tkl'][0]['vs']: | ||||
| @@ -179,20 +183,19 @@ class IqiyiIE(InfoExtractor): | ||||
|  | ||||
|     def get_raw_data(self, tvid, video_id, enc_key, _uuid): | ||||
|         tm = str(int(time.time())) | ||||
|         tail = tm + tvid | ||||
|         param = { | ||||
|             'key': 'fvip', | ||||
|             'src': hashlib.md5(b'youtube-dl').hexdigest(), | ||||
|             'src': self.md5_text('youtube-dl'), | ||||
|             'tvId': tvid, | ||||
|             'vid': video_id, | ||||
|             'vinfo': 1, | ||||
|             'tm': tm, | ||||
|             'enc': hashlib.md5( | ||||
|                 (enc_key + tm + tvid).encode('utf8')).hexdigest(), | ||||
|             'enc': self.md5_text((enc_key + tail)[1:64:2] + tail), | ||||
|             'qyid': _uuid, | ||||
|             'tn': random.random(), | ||||
|             'um': 0, | ||||
|             'authkey': hashlib.md5( | ||||
|                 (tm + tvid).encode('utf8')).hexdigest() | ||||
|             'authkey': self.md5_text(self.md5_text('') + tail), | ||||
|         } | ||||
|  | ||||
|         api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ | ||||
| @@ -201,7 +204,8 @@ class IqiyiIE(InfoExtractor): | ||||
|         return raw_data | ||||
|  | ||||
|     def get_enc_key(self, swf_url, video_id): | ||||
|         enc_key = '3601ba290e4f4662848c710e2122007e'  # last update at 2015-08-10 for Zombie | ||||
|         # TODO: automatic key extraction | ||||
|         enc_key = 'eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc'  # last update at 2015-09-23-23 for Zombie::bite | ||||
|         return enc_key | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -13,12 +13,24 @@ from ..utils import ( | ||||
|  | ||||
| class KalturaIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|     (?:kaltura:| | ||||
|        https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_ | ||||
|     )(?P<partner_id>\d+) | ||||
|     (?::| | ||||
|        /(?:[^/]+/)*?entry_id/ | ||||
|     )(?P<id>[0-9a-z_]+)''' | ||||
|                 (?: | ||||
|                     kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)| | ||||
|                     https?:// | ||||
|                         (:?(?:www|cdnapisec)\.)?kaltura\.com/ | ||||
|                         (?: | ||||
|                             (?: | ||||
|                                 # flash player | ||||
|                                 index\.php/kwidget/ | ||||
|                                 (?:[^/]+/)*?wid/_(?P<partner_id>\d+)/ | ||||
|                                 (?:[^/]+/)*?entry_id/(?P<id>[0-9a-z_]+)| | ||||
|                                 # html5 player | ||||
|                                 html5/html5lib/ | ||||
|                                 (?:[^/]+/)*?entry_id/(?P<id_html5>[0-9a-z_]+) | ||||
|                                 .*\?.*\bwid=_(?P<partner_id_html5>\d+) | ||||
|                             ) | ||||
|                         ) | ||||
|                 ) | ||||
|                 ''' | ||||
|     _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?' | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -43,6 +55,10 @@ class KalturaIE(InfoExtractor): | ||||
|             'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _kaltura_api_call(self, video_id, actions, *args, **kwargs): | ||||
| @@ -105,9 +121,9 @@ class KalturaIE(InfoExtractor): | ||||
|             video_id, actions, note='Downloading video info JSON') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         partner_id, entry_id = mobj.group('partner_id'), mobj.group('id') | ||||
|         partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5') | ||||
|         entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5') | ||||
|  | ||||
|         info, source_data = self._get_video_info(entry_id, partner_id) | ||||
|  | ||||
| @@ -126,7 +142,7 @@ class KalturaIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'id': entry_id, | ||||
|             'title': info['name'], | ||||
|             'formats': formats, | ||||
|             'description': info.get('description'), | ||||
|   | ||||
| @@ -1,46 +1,39 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class KeekIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<id>\w+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)' | ||||
|     IE_NAME = 'keek' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.keek.com/ytdl/keeks/NODfbab', | ||||
|         'md5': '09c5c109067536c1cec8bac8c21fea05', | ||||
|         'url': 'https://www.keek.com/keek/NODfbab', | ||||
|         'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83', | ||||
|         'info_dict': { | ||||
|             'id': 'NODfbab', | ||||
|             'ext': 'mp4', | ||||
|             'uploader': 'youtube-dl project', | ||||
|             'uploader_id': 'ytdl', | ||||
|             'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', | ||||
|             'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896', | ||||
|             'uploader': 'ytdl', | ||||
|             'uploader_id': 'eGT5bab', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video_url = 'http://cdn.keek.com/keek/video/%s' % video_id | ||||
|         thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         raw_desc = self._html_search_meta('description', webpage) | ||||
|         if raw_desc: | ||||
|             uploader = self._html_search_regex( | ||||
|                 r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False) | ||||
|             uploader_id = self._html_search_regex( | ||||
|                 r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False) | ||||
|         else: | ||||
|             uploader = None | ||||
|             uploader_id = None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'url': self._og_search_video_url(webpage), | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'title': self._og_search_description(webpage).strip(), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader': self._search_regex( | ||||
|                 r'data-username=(["\'])(?P<uploader>.+?)\1', webpage, | ||||
|                 'uploader', fatal=False, group='uploader'), | ||||
|             'uploader_id': self._search_regex( | ||||
|                 r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage, | ||||
|                 'uploader id', fatal=False, group='uploader_id'), | ||||
|         } | ||||
|   | ||||
| @@ -25,6 +25,9 @@ class KrasViewIE(InfoExtractor): | ||||
|             'duration': 27, | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'Not accessible from Travis CI server', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -57,6 +57,7 @@ class KuwoIE(KuwoBaseIE): | ||||
|             'upload_date': '20080122', | ||||
|             'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c' | ||||
|         }, | ||||
|         'skip': 'this song has been offline because of copyright issues', | ||||
|     }, { | ||||
|         'url': 'http://www.kuwo.cn/yinyue/6446136/', | ||||
|         'info_dict': { | ||||
| @@ -76,9 +77,11 @@ class KuwoIE(KuwoBaseIE): | ||||
|         webpage = self._download_webpage( | ||||
|             url, song_id, note='Download song detail info', | ||||
|             errnote='Unable to get song detail info') | ||||
|         if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: | ||||
|             raise ExtractorError('this song has been offline because of copyright issues', expected=True) | ||||
|  | ||||
|         song_name = self._html_search_regex( | ||||
|             r'<h1[^>]+title="([^"]+)">', webpage, 'song name') | ||||
|             r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name') | ||||
|         singer_name = self._html_search_regex( | ||||
|             r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"', | ||||
|             webpage, 'singer name', fatal=False) | ||||
| @@ -202,6 +205,7 @@ class KuwoSingerIE(InfoExtractor): | ||||
|             'title': 'Ali', | ||||
|         }, | ||||
|         'playlist_mincount': 95, | ||||
|         'skip': 'Regularly stalls travis build',  # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540 | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										229
									
								
								youtube_dl/extractor/limelight.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										229
									
								
								youtube_dl/extractor/limelight.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,229 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LimelightBaseIE(InfoExtractor): | ||||
|     _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' | ||||
|     _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' | ||||
|  | ||||
|     def _call_playlist_service(self, item_id, method, fatal=True): | ||||
|         return self._download_json( | ||||
|             self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), | ||||
|             item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal) | ||||
|  | ||||
|     def _call_api(self, organization_id, item_id, method): | ||||
|         return self._download_json( | ||||
|             self._API_URL % (organization_id, self._API_PATH, item_id, method), | ||||
|             item_id, 'Downloading API %s JSON' % method) | ||||
|  | ||||
|     def _extract(self, item_id, pc_method, mobile_method, meta_method): | ||||
|         pc = self._call_playlist_service(item_id, pc_method) | ||||
|         metadata = self._call_api(pc['orgId'], item_id, meta_method) | ||||
|         mobile = self._call_playlist_service(item_id, mobile_method, fatal=False) | ||||
|         return pc, mobile, metadata | ||||
|  | ||||
|     def _extract_info(self, streams, mobile_urls, properties): | ||||
|         video_id = properties['media_id'] | ||||
|         formats = [] | ||||
|  | ||||
|         for stream in streams: | ||||
|             stream_url = stream.get('url') | ||||
|             if not stream_url: | ||||
|                 continue | ||||
|             if '.f4m' in stream_url: | ||||
|                 formats.extend(self._extract_f4m_formats(stream_url, video_id)) | ||||
|             else: | ||||
|                 fmt = { | ||||
|                     'url': stream_url, | ||||
|                     'abr': float_or_none(stream.get('audioBitRate')), | ||||
|                     'vbr': float_or_none(stream.get('videoBitRate')), | ||||
|                     'fps': float_or_none(stream.get('videoFrameRate')), | ||||
|                     'width': int_or_none(stream.get('videoWidthInPixels')), | ||||
|                     'height': int_or_none(stream.get('videoHeightInPixels')), | ||||
|                     'ext': determine_ext(stream_url) | ||||
|                 } | ||||
|                 rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url) | ||||
|                 if rtmp: | ||||
|                     format_id = 'rtmp' | ||||
|                     if stream.get('videoBitRate'): | ||||
|                         format_id += '-%d' % int_or_none(stream['videoBitRate']) | ||||
|                     fmt.update({ | ||||
|                         'url': rtmp.group('url'), | ||||
|                         'play_path': rtmp.group('playpath'), | ||||
|                         'app': rtmp.group('app'), | ||||
|                         'ext': 'flv', | ||||
|                         'format_id': format_id, | ||||
|                     }) | ||||
|                 formats.append(fmt) | ||||
|  | ||||
|         for mobile_url in mobile_urls: | ||||
|             media_url = mobile_url.get('mobileUrl') | ||||
|             if not media_url: | ||||
|                 continue | ||||
|             format_id = mobile_url.get('targetMediaPlatform') | ||||
|             if determine_ext(media_url) == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     media_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     preference=-1, m3u8_id=format_id)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': media_url, | ||||
|                     'format_id': format_id, | ||||
|                     'preference': -1, | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = properties['title'] | ||||
|         description = properties.get('description') | ||||
|         timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date')) | ||||
|         duration = float_or_none(properties.get('duration_in_milliseconds'), 1000) | ||||
|         filesize = int_or_none(properties.get('total_storage_in_bytes')) | ||||
|         categories = [properties.get('category')] | ||||
|         tags = properties.get('tags', []) | ||||
|         thumbnails = [{ | ||||
|             'url': thumbnail['url'], | ||||
|             'width': int_or_none(thumbnail.get('width')), | ||||
|             'height': int_or_none(thumbnail.get('height')), | ||||
|         } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')] | ||||
|  | ||||
|         subtitles = {} | ||||
|         for caption in properties.get('captions', {}): | ||||
|             lang = caption.get('language_code') | ||||
|             subtitles_url = caption.get('url') | ||||
|             if lang and subtitles_url: | ||||
|                 subtitles[lang] = [{ | ||||
|                     'url': subtitles_url, | ||||
|                 }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'formats': formats, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'filesize': filesize, | ||||
|             'categories': categories, | ||||
|             'tags': tags, | ||||
|             'thumbnails': thumbnails, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class LimelightMediaIE(LimelightBaseIE): | ||||
|     IE_NAME = 'limelight' | ||||
|     _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', | ||||
|         'info_dict': { | ||||
|             'id': '3ffd040b522b4485b6d84effc750cd86', | ||||
|             'ext': 'flv', | ||||
|             'title': 'HaP and the HB Prince Trailer', | ||||
|             'description': 'md5:8005b944181778e313d95c1237ddb640', | ||||
|             'thumbnail': 're:^https?://.*\.jpeg$', | ||||
|             'duration': 144.23, | ||||
|             'timestamp': 1244136834, | ||||
|             'upload_date': '20090604', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # video with subtitles | ||||
|         'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335', | ||||
|         'info_dict': { | ||||
|             'id': 'a3e00274d4564ec4a9b29b9466432335', | ||||
|             'ext': 'flv', | ||||
|             'title': '3Play Media Overview Video', | ||||
|             'description': '', | ||||
|             'thumbnail': 're:^https?://.*\.jpeg$', | ||||
|             'duration': 78.101, | ||||
|             'timestamp': 1338929955, | ||||
|             'upload_date': '20120605', | ||||
|             'subtitles': 'mincount:9', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|     _PLAYLIST_SERVICE_PATH = 'media' | ||||
|     _API_PATH = 'media' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         pc, mobile, metadata = self._extract( | ||||
|             video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties') | ||||
|  | ||||
|         return self._extract_info( | ||||
|             pc['playlistItems'][0].get('streams', []), | ||||
|             mobile['mediaList'][0].get('mobileUrls', []) if mobile else [], | ||||
|             metadata) | ||||
|  | ||||
|  | ||||
| class LimelightChannelIE(LimelightBaseIE): | ||||
|     IE_NAME = 'limelight:channel' | ||||
|     _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' | ||||
|     _TEST = { | ||||
|         'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', | ||||
|         'info_dict': { | ||||
|             'id': 'ab6a524c379342f9b23642917020c082', | ||||
|             'title': 'Javascript Sample Code', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     } | ||||
|     _PLAYLIST_SERVICE_PATH = 'channel' | ||||
|     _API_PATH = 'channels' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_id = self._match_id(url) | ||||
|  | ||||
|         pc, mobile, medias = self._extract( | ||||
|             channel_id, 'getPlaylistByChannelId', | ||||
|             'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media') | ||||
|  | ||||
|         entries = [ | ||||
|             self._extract_info( | ||||
|                 pc['playlistItems'][i].get('streams', []), | ||||
|                 mobile['mediaList'][i].get('mobileUrls', []) if mobile else [], | ||||
|                 medias['media_list'][i]) | ||||
|             for i in range(len(medias['media_list']))] | ||||
|  | ||||
|         return self.playlist_result(entries, channel_id, pc['title']) | ||||
|  | ||||
|  | ||||
| class LimelightChannelListIE(LimelightBaseIE): | ||||
|     IE_NAME = 'limelight:channel_list' | ||||
|     _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' | ||||
|     _TEST = { | ||||
|         'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', | ||||
|         'info_dict': { | ||||
|             'id': '301b117890c4465c8179ede21fd92e2b', | ||||
|             'title': 'Website - Hero Player', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|     } | ||||
|     _PLAYLIST_SERVICE_PATH = 'channel_list' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_list_id = self._match_id(url) | ||||
|  | ||||
|         channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById') | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel') | ||||
|             for channel in channel_list['channelList']] | ||||
|  | ||||
|         return self.playlist_result(entries, channel_list_id, channel_list['title']) | ||||
| @@ -118,9 +118,7 @@ class LyndaIE(LyndaBaseIE): | ||||
|                 'lynda returned error: %s' % video_json['Message'], expected=True) | ||||
|  | ||||
|         if video_json['HasAccess'] is False: | ||||
|             raise ExtractorError( | ||||
|                 'Video %s is only available for members. ' | ||||
|                 % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True) | ||||
|             self.raise_login_required('Video %s is only available for members' % video_id) | ||||
|  | ||||
|         video_id = compat_str(video_json['ID']) | ||||
|         duration = video_json['DurationInSeconds'] | ||||
|   | ||||
| @@ -25,6 +25,7 @@ class MailRuIE(InfoExtractor): | ||||
|                 'uploader_id': 'sonypicturesrus@mail.ru', | ||||
|                 'duration': 184, | ||||
|             }, | ||||
|             'skip': 'Not accessible from Travis CI server', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html', | ||||
| @@ -39,6 +40,7 @@ class MailRuIE(InfoExtractor): | ||||
|                 'uploader_id': 'hitech@corp.mail.ru', | ||||
|                 'duration': 245, | ||||
|             }, | ||||
|             'skip': 'Not accessible from Travis CI server', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|   | ||||
| @@ -18,12 +18,12 @@ class TechTVMITIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', | ||||
|         'md5': '1f8cb3e170d41fd74add04d3c9330e5f', | ||||
|         'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7', | ||||
|         'info_dict': { | ||||
|             'id': '25418', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'MIT DNA Learning Center Set', | ||||
|             'description': 'md5:82313335e8a8a3f243351ba55bc1b474', | ||||
|             'title': 'MIT DNA and Protein Sets', | ||||
|             'description': 'md5:46f5c69ce434f0a97e7c628cc142802d', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -33,8 +33,8 @@ class TechTVMITIE(InfoExtractor): | ||||
|             'http://techtv.mit.edu/videos/%s' % video_id, video_id) | ||||
|         clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page) | ||||
|  | ||||
|         base_url = self._search_regex( | ||||
|             r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url') | ||||
|         base_url = self._proto_relative_url(self._search_regex( | ||||
|             r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:') | ||||
|         formats_json = self._search_regex( | ||||
|             r'bitrates: (\[.+?\])', raw_page, 'video formats') | ||||
|         formats_mit = json.loads(formats_json) | ||||
|   | ||||
| @@ -1,74 +1,85 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..compat import compat_urllib_parse | ||||
| from ..utils import ( | ||||
|     encode_dict, | ||||
|     get_element_by_attribute, | ||||
|     parse_duration, | ||||
|     strip_jsonp, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MiTeleIE(InfoExtractor): | ||||
|     IE_NAME = 'mitele.es' | ||||
|     IE_DESC = 'mitele.es' | ||||
|     _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', | ||||
|         'md5': 'ace7635b2a0b286aaa37d3ff192d2a8a', | ||||
|         'info_dict': { | ||||
|             'id': '0fce117d', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Programa 144 - Tor, la web invisible', | ||||
|             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | ||||
|             'id': '0NF1jJnxS1Wu3pHrmvFyw2', | ||||
|             'display_id': 'programa-144', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Tor, la web invisible', | ||||
|             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | ||||
|             'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             'duration': 2913, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         episode = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, episode) | ||||
|         embed_data_json = self._search_regex( | ||||
|             r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', | ||||
|         ).replace('\'', '"') | ||||
|         embed_data = json.loads(embed_data_json) | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         domain = embed_data['mediaUrl'] | ||||
|         if not domain.startswith('http'): | ||||
|             # only happens in telecinco.es videos | ||||
|             domain = 'http://' + domain | ||||
|         info_url = compat_urlparse.urljoin( | ||||
|             domain, | ||||
|             compat_urllib_parse_unquote(embed_data['flashvars']['host']) | ||||
|         ) | ||||
|         info_el = self._download_xml(info_url, episode).find('./video/info') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_link = info_el.find('videoUrl/link').text | ||||
|         token_query = compat_urllib_parse.urlencode({'id': video_link}) | ||||
|         token_info = self._download_json( | ||||
|             embed_data['flashvars']['ov_tk'] + '?' + token_query, | ||||
|             episode, | ||||
|             transform_source=strip_jsonp | ||||
|         ) | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             token_info['tokenizedUrl'], episode, ext='mp4') | ||||
|         config_url = self._search_regex( | ||||
|             r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url') | ||||
|  | ||||
|         config = self._download_json( | ||||
|             config_url, display_id, 'Downloading config JSON') | ||||
|  | ||||
|         mmc = self._download_json( | ||||
|             config['services']['mmc'], display_id, 'Downloading mmc JSON') | ||||
|  | ||||
|         formats = [] | ||||
|         for location in mmc['locations']: | ||||
|             gat = self._proto_relative_url(location.get('gat'), 'http:') | ||||
|             bas = location.get('bas') | ||||
|             loc = location.get('loc') | ||||
|             ogn = location.get('ogn') | ||||
|             if None in (gat, bas, loc, ogn): | ||||
|                 continue | ||||
|             token_data = { | ||||
|                 'bas': bas, | ||||
|                 'icd': loc, | ||||
|                 'ogn': ogn, | ||||
|                 'sta': '0', | ||||
|             } | ||||
|             media = self._download_json( | ||||
|                 '%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data)).encode('utf-8')), | ||||
|                 display_id, 'Downloading %s JSON' % location['loc']) | ||||
|             file_ = media.get('file') | ||||
|             if not file_: | ||||
|                 continue | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', | ||||
|                 display_id, f4m_id=loc)) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title') | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-media-id\s*=\s*"([^"]+)"', webpage, | ||||
|             'data media id', default=None) or display_id | ||||
|         thumbnail = config.get('poster', {}).get('imageUrl') | ||||
|         duration = int_or_none(mmc.get('duration')) | ||||
|  | ||||
|         return { | ||||
|             'id': embed_data['videoId'], | ||||
|             'display_id': episode, | ||||
|             'title': info_el.find('title').text, | ||||
|             'formats': formats, | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': get_element_by_attribute('class', 'text', webpage), | ||||
|             'thumbnail': info_el.find('thumb').text, | ||||
|             'duration': parse_duration(info_el.find('duration').text), | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -67,7 +67,7 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         return [{'url': url, 'ext': 'mp4'}] | ||||
|  | ||||
|     def _extract_video_formats(self, mdoc, mtvn_id): | ||||
|         if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: | ||||
|         if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None: | ||||
|             if mtvn_id is not None and self._MOBILE_TEMPLATE is not None: | ||||
|                 self.to_screen('The normal version is not available from your ' | ||||
|                                'country, trying with the mobile version') | ||||
| @@ -114,7 +114,8 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         # Remove the templates, like &device={device} | ||||
|         mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url) | ||||
|         if 'acceptMethods' not in mediagen_url: | ||||
|             mediagen_url += '&acceptMethods=fms' | ||||
|             mediagen_url += '&' if '?' in mediagen_url else '?' | ||||
|             mediagen_url += 'acceptMethods=fms' | ||||
|  | ||||
|         mediagen_doc = self._download_xml(mediagen_url, video_id, | ||||
|                                           'Downloading video urls') | ||||
| @@ -141,7 +142,7 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//title') | ||||
|             title_el = itemdoc.find('.//title') or itemdoc.find('./title') | ||||
|             if title_el.text is None: | ||||
|                 title_el = None | ||||
|  | ||||
| @@ -174,8 +175,11 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         if self._LANG: | ||||
|             info_url += 'lang=%s&' % self._LANG | ||||
|         info_url += data | ||||
|         return self._get_videos_info_from_url(info_url, video_id) | ||||
|  | ||||
|     def _get_videos_info_from_url(self, url, video_id): | ||||
|         idoc = self._download_xml( | ||||
|             info_url, video_id, | ||||
|             url, video_id, | ||||
|             'Downloading info', transform_source=fix_xml_ampersands) | ||||
|         return self.playlist_result( | ||||
|             [self._get_video_info(item) for item in idoc.findall('.//item')]) | ||||
| @@ -196,7 +200,13 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         if mgid is None or ':' not in mgid: | ||||
|             mgid = self._search_regex( | ||||
|                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], | ||||
|                 webpage, 'mgid') | ||||
|                 webpage, 'mgid', default=None) | ||||
|  | ||||
|         if not mgid: | ||||
|             sm4_embed = self._html_search_meta( | ||||
|                 'sm4:video:embed', webpage, 'sm4 embed', default='') | ||||
|             mgid = self._search_regex( | ||||
|                 r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') | ||||
|  | ||||
|         videos_info = self._get_videos_info(mgid) | ||||
|         return videos_info | ||||
| @@ -218,6 +228,13 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _get_feed_url(self, uri): | ||||
|         video_id = self._id_from_uri(uri) | ||||
|         site_id = uri.replace(video_id, '') | ||||
| @@ -288,3 +305,65 @@ class MTVIggyIE(MTVServicesInfoExtractor): | ||||
|         } | ||||
|     } | ||||
|     _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/' | ||||
|  | ||||
|  | ||||
| class MTVDEIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = 'mtv.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', | ||||
|         'info_dict': { | ||||
|             'id': 'music_video-a50bc5f0b3aa4b3190aa', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'MusicVideo_cro-traum', | ||||
|             'description': 'Cro - Traum', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97) | ||||
|         'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen', | ||||
|         'info_dict': { | ||||
|             'id': 'local_playlist-f5ae778b9832cc837189', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # single video in pagePlaylist with different id | ||||
|         'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3', | ||||
|         'info_dict': { | ||||
|             'id': 'local_playlist-4e760566473c4c8c5344', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1', | ||||
|             'description': 'MTV Movies Supercut', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         playlist = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'), | ||||
|             video_id) | ||||
|  | ||||
|         # news pages contain single video in playlist with different id | ||||
|         if len(playlist) == 1: | ||||
|             return self._get_videos_info_from_url(playlist[0]['mrss'], video_id) | ||||
|  | ||||
|         for item in playlist: | ||||
|             item_id = item.get('id') | ||||
|             if item_id and compat_str(item_id) == video_id: | ||||
|                 return self._get_videos_info_from_url(item['mrss'], video_id) | ||||
|   | ||||
| @@ -1,63 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class MusicVaultIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html', | ||||
|         'md5': '3adcbdb3dcc02d647539e53f284ba171', | ||||
|         'info_dict': { | ||||
|             'id': '1010863', | ||||
|             'ext': 'mp4', | ||||
|             'uploader_id': 'the-allman-brothers-band', | ||||
|             'title': 'Straight from the Heart', | ||||
|             'duration': 244, | ||||
|             'uploader': 'The Allman Brothers Band', | ||||
|             'thumbnail': 're:^https?://.*/thumbnail/.*', | ||||
|             'upload_date': '20131219', | ||||
|             'location': 'Capitol Theatre (Passaic, NJ)', | ||||
|             'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981', | ||||
|             'timestamp': int, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         thumbnail = self._search_regex( | ||||
|             r'<meta itemprop="thumbnail" content="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         data_div = self._search_regex( | ||||
|             r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields') | ||||
|         uploader = self._html_search_regex( | ||||
|             r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False) | ||||
|         title = self._html_search_regex( | ||||
|             r'<h2.*?>(.*?)</h2>', data_div, 'title') | ||||
|         location = self._html_search_regex( | ||||
|             r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False) | ||||
|  | ||||
|         kaltura_id = self._search_regex( | ||||
|             r'<div id="video-detail-player" data-kaltura-id="([^"]+)"', | ||||
|             webpage, 'kaltura ID') | ||||
|         wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid') | ||||
|  | ||||
|         return { | ||||
|             'id': mobj.group('id'), | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'kaltura:%s:%s' % (wid, kaltura_id), | ||||
|             'ie_key': 'Kaltura', | ||||
|             'display_id': display_id, | ||||
|             'uploader_id': mobj.group('uploader_id'), | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': self._html_search_meta('description', webpage), | ||||
|             'location': location, | ||||
|             'title': title, | ||||
|             'uploader': uploader, | ||||
|         } | ||||
| @@ -10,7 +10,6 @@ from ..compat import ( | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -46,11 +45,11 @@ class NaverIE(InfoExtractor): | ||||
|         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', | ||||
|                          webpage) | ||||
|         if m_id is None: | ||||
|             m_error = re.search( | ||||
|                 r'(?s)<div class="(?:nation_error|nation_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', | ||||
|                 webpage) | ||||
|             if m_error: | ||||
|                 raise ExtractorError(clean_html(m_error.group('msg')), expected=True) | ||||
|             error = self._html_search_regex( | ||||
|                 r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', | ||||
|                 webpage, 'error', default=None) | ||||
|             if error: | ||||
|                 raise ExtractorError(error, expected=True) | ||||
|             raise ExtractorError('couldn\'t extract vid and key') | ||||
|         vid = m_id.group(1) | ||||
|         key = m_id.group(2) | ||||
|   | ||||
| @@ -1,130 +1,380 @@ | ||||
| # encoding: utf-8 | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     qualities, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NDRBaseIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         title = self._og_search_title(page).strip() | ||||
|         description = self._og_search_description(page) | ||||
|         if description: | ||||
|             description = description.strip() | ||||
|  | ||||
|         duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None)) | ||||
|         if not duration: | ||||
|             duration = parse_duration(self._html_search_regex( | ||||
|                 r'(<span class="min">\d+</span>:<span class="sec">\d+</span>)', | ||||
|                 page, 'duration', default=None)) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page) | ||||
|         if mp3_url: | ||||
|             formats.append({ | ||||
|                 'url': mp3_url.group('audio'), | ||||
|                 'format_id': 'mp3', | ||||
|             }) | ||||
|  | ||||
|         thumbnail = None | ||||
|  | ||||
|         video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page) | ||||
|         if video_url: | ||||
|             thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page) | ||||
|             if thumbnails: | ||||
|                 quality_key = qualities(['xs', 's', 'm', 'l', 'xl']) | ||||
|                 largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1])) | ||||
|                 thumbnail = 'http://www.ndr.de' + largest[0] | ||||
|  | ||||
|             for format_id in 'lo', 'hi', 'hq': | ||||
|                 formats.append({ | ||||
|                     'url': '%s.%s.mp4' % (video_url.group('video'), format_id), | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|  | ||||
|         if not formats: | ||||
|             raise ExtractorError('No media links available for %s' % video_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         return self._extract_embed(webpage, display_id) | ||||
|  | ||||
|  | ||||
| class NDRIE(NDRBaseIE): | ||||
|     IE_NAME = 'ndr' | ||||
|     IE_DESC = 'NDR.de - Mediathek' | ||||
|     _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html' | ||||
|     IE_DESC = 'NDR.de - Norddeutscher Rundfunk' | ||||
|     _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html' | ||||
|     _TESTS = [{ | ||||
|         # httpVideo, same content id | ||||
|         'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', | ||||
|         'md5': '6515bc255dc5c5f8c85bbc38e035a659', | ||||
|         'info_dict': { | ||||
|             'id': 'hafengeburtstag988', | ||||
|             'display_id': 'Party-Poette-und-Parade', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Party, Pötte und Parade', | ||||
|             'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c', | ||||
|             'uploader': 'ndrtv', | ||||
|             'timestamp': 1431108900, | ||||
|             'upload_date': '20150510', | ||||
|             'duration': 3498, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # httpVideo, different content id | ||||
|         'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html', | ||||
|         'md5': '1043ff203eab307f0c51702ec49e9a71', | ||||
|         'info_dict': { | ||||
|             'id': 'osna272', | ||||
|             'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights', | ||||
|             'description': 'md5:32e9b800b3d2d4008103752682d5dc01', | ||||
|             'uploader': 'ndrtv', | ||||
|             'timestamp': 1442059200, | ||||
|             'upload_date': '20150912', | ||||
|             'duration': 510, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # httpAudio, same content id | ||||
|         'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', | ||||
|         'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', | ||||
|         'info_dict': { | ||||
|             'id': 'audio51535', | ||||
|             'display_id': 'La-Valette-entgeht-der-Hinrichtung', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'La Valette entgeht der Hinrichtung', | ||||
|             'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', | ||||
|             'uploader': 'ndrinfo', | ||||
|             'timestamp': 1290626100, | ||||
|             'upload_date': '20140729', | ||||
|             'duration': 884, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html', | ||||
|             'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c', | ||||
|             'note': 'Video file', | ||||
|             'info_dict': { | ||||
|                 'id': '25866', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Kartoffeltage in der Lewitz', | ||||
|                 'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8', | ||||
|                 'duration': 166, | ||||
|             }, | ||||
|             'skip': '404 Not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', | ||||
|             'md5': 'dadc003c55ae12a5d2f6bd436cd73f59', | ||||
|             'info_dict': { | ||||
|                 'id': '988', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Party, Pötte und Parade', | ||||
|                 'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.', | ||||
|                 'duration': 3498, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ndr.de/info/audio51535.html', | ||||
|             'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', | ||||
|             'note': 'Audio file', | ||||
|             'info_dict': { | ||||
|                 'id': '51535', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'La Valette entgeht der Hinrichtung', | ||||
|                 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', | ||||
|                 'duration': 884, | ||||
|             } | ||||
|     def _extract_embed(self, webpage, display_id): | ||||
|         embed_url = self._html_search_meta( | ||||
|             'embedURL', webpage, 'embed URL', fatal=True) | ||||
|         description = self._search_regex( | ||||
|             r'<p[^>]+itemprop="description">([^<]+)</p>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         timestamp = parse_iso8601( | ||||
|             self._search_regex( | ||||
|                 r'<span itemprop="datePublished" content="([^"]+)">', | ||||
|                 webpage, 'upload date', fatal=False)) | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': embed_url, | ||||
|             'display_id': display_id, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|  | ||||
| class NJoyIE(NDRBaseIE): | ||||
|     IE_NAME = 'N-JOY' | ||||
|     _VALID_URL = r'https?://www\.n-joy\.de/.+?(?P<id>\d+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|     IE_NAME = 'njoy' | ||||
|     IE_DESC = 'N-JOY' | ||||
|     _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html' | ||||
|     _TESTS = [{ | ||||
|         # httpVideo, same content id | ||||
|         'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', | ||||
|         'md5': 'cb63be60cd6f9dd75218803146d8dc67', | ||||
|         'info_dict': { | ||||
|             'id': '2480', | ||||
|             'id': 'comedycontest2480', | ||||
|             'display_id': 'Benaissa-beim-NDR-Comedy-Contest', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Benaissa beim NDR Comedy Contest', | ||||
|             'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.', | ||||
|             'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39', | ||||
|             'uploader': 'ndrtv', | ||||
|             'upload_date': '20141129', | ||||
|             'duration': 654, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # httpVideo, different content id | ||||
|         'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html', | ||||
|         'md5': '417660fffa90e6df2fda19f1b40a64d8', | ||||
|         'info_dict': { | ||||
|             'id': 'dockville882', | ||||
|             'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-', | ||||
|             'ext': 'mp4', | ||||
|             'title': '"Ich hab noch nie" mit Felix Jaehn', | ||||
|             'description': 'md5:85dd312d53be1b99e1f998a16452a2f3', | ||||
|             'uploader': 'njoy', | ||||
|             'upload_date': '20150822', | ||||
|             'duration': 211, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _extract_embed(self, webpage, display_id): | ||||
|         video_id = self._search_regex( | ||||
|             r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id') | ||||
|         description = self._search_regex( | ||||
|             r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'NDREmbedBase', | ||||
|             'url': 'ndr:%s' % video_id, | ||||
|             'display_id': display_id, | ||||
|             'description': description, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
| class NDREmbedBaseIE(InfoExtractor): | ||||
|     IE_NAME = 'ndr:embed:base' | ||||
|     _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'ndr:soundcheck3366', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/soundcheck3366-ppjson.json', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') or mobj.group('id_s') | ||||
|  | ||||
|         ppjson = self._download_json( | ||||
|             'http://www.ndr.de/%s-ppjson.json' % video_id, video_id) | ||||
|  | ||||
|         playlist = ppjson['playlist'] | ||||
|  | ||||
|         formats = [] | ||||
|         quality_key = qualities(('xs', 's', 'm', 'l', 'xl')) | ||||
|  | ||||
|         for format_id, f in playlist.items(): | ||||
|             src = f.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             ext = determine_ext(src, None) | ||||
|             if ext == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds')) | ||||
|             elif ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, video_id, m3u8_id='hls', entry_protocol='m3u8_native')) | ||||
|             else: | ||||
|                 quality = f.get('quality') | ||||
|                 ff = { | ||||
|                     'url': src, | ||||
|                     'format_id': quality or format_id, | ||||
|                     'quality': quality_key(quality), | ||||
|                 } | ||||
|                 type_ = f.get('type') | ||||
|                 if type_ and type_.split('/')[0] == 'audio': | ||||
|                     ff['vcodec'] = 'none' | ||||
|                     ff['ext'] = ext or 'mp3' | ||||
|                 formats.append(ff) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         config = playlist['config'] | ||||
|  | ||||
|         live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive'] | ||||
|         title = config['title'] | ||||
|         if live: | ||||
|             title = self._live_title(title) | ||||
|         uploader = ppjson.get('config', {}).get('branding') | ||||
|         upload_date = ppjson.get('config', {}).get('publicationDate') | ||||
|         duration = int_or_none(config.get('duration')) | ||||
|  | ||||
|         thumbnails = [{ | ||||
|             'id': thumbnail.get('quality') or thumbnail_id, | ||||
|             'url': thumbnail['src'], | ||||
|             'preference': quality_key(thumbnail.get('quality')), | ||||
|         } for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'is_live': live, | ||||
|             'uploader': uploader if uploader != '-' else None, | ||||
|             'upload_date': upload_date[0:8] if upload_date else None, | ||||
|             'duration': duration, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NDREmbedIE(NDREmbedBaseIE): | ||||
|     IE_NAME = 'ndr:embed' | ||||
|     _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', | ||||
|         'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', | ||||
|         'info_dict': { | ||||
|             'id': 'ndraktuell28488', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Norddeutschland begrüßt Flüchtlinge', | ||||
|             'is_live': False, | ||||
|             'uploader': 'ndrtv', | ||||
|             'upload_date': '20150907', | ||||
|             'duration': 132, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html', | ||||
|         'md5': '002085c44bae38802d94ae5802a36e78', | ||||
|         'info_dict': { | ||||
|             'id': 'soundcheck3366', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen', | ||||
|             'is_live': False, | ||||
|             'uploader': 'ndr2', | ||||
|             'upload_date': '20150912', | ||||
|             'duration': 3554, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/info/audio51535-player.html', | ||||
|         'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', | ||||
|         'info_dict': { | ||||
|             'id': 'audio51535', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'La Valette entgeht der Hinrichtung', | ||||
|             'is_live': False, | ||||
|             'uploader': 'ndrinfo', | ||||
|             'upload_date': '20140729', | ||||
|             'duration': 884, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html', | ||||
|         'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c', | ||||
|         'info_dict': { | ||||
|             'id': 'visite11010', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Visite - die ganze Sendung', | ||||
|             'is_live': False, | ||||
|             'uploader': 'ndrtv', | ||||
|             'upload_date': '20150902', | ||||
|             'duration': 3525, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # httpVideoLive | ||||
|         'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html', | ||||
|         'info_dict': { | ||||
|             'id': 'livestream217', | ||||
|             'ext': 'flv', | ||||
|             'title': 're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | ||||
|             'is_live': True, | ||||
|             'upload_date': '20150910', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ndr.de/fernsehen/doku952-player.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|  | ||||
| class NJoyEmbedIE(NDREmbedBaseIE): | ||||
|     IE_NAME = 'njoy:embed' | ||||
|     _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' | ||||
|     _TESTS = [{ | ||||
|         # httpVideo | ||||
|         'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', | ||||
|         'md5': '8483cbfe2320bd4d28a349d62d88bd74', | ||||
|         'info_dict': { | ||||
|             'id': 'doku948', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Zehn Jahre Reeperbahn Festival - die Doku', | ||||
|             'is_live': False, | ||||
|             'upload_date': '20150807', | ||||
|             'duration': 1011, | ||||
|         }, | ||||
|     }, { | ||||
|         # httpAudio | ||||
|         'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html', | ||||
|         'md5': 'd989f80f28ac954430f7b8a48197188a', | ||||
|         'info_dict': { | ||||
|             'id': 'stefanrichter100', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Interview mit einem Augenzeugen', | ||||
|             'is_live': False, | ||||
|             'uploader': 'njoy', | ||||
|             'upload_date': '20150909', | ||||
|             'duration': 140, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # httpAudioLive, no explicit ext | ||||
|         'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html', | ||||
|         'info_dict': { | ||||
|             'id': 'webradioweltweit100', | ||||
|             'ext': 'mp3', | ||||
|             'title': 're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | ||||
|             'is_live': True, | ||||
|             'uploader': 'njoy', | ||||
|             'upload_date': '20150810', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|   | ||||
| @@ -126,7 +126,8 @@ class AppleDailyIE(NextMediaIE): | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd', | ||||
|             'upload_date': '20150128', | ||||
|         } | ||||
|         }, | ||||
|         'skip': 'redirect to http://www.appledaily.com.tw/animation/', | ||||
|     }, { | ||||
|         # No thumbnail | ||||
|         'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/', | ||||
| @@ -140,10 +141,19 @@ class AppleDailyIE(NextMediaIE): | ||||
|         }, | ||||
|         'expected_warnings': [ | ||||
|             'video thumbnail', | ||||
|         ] | ||||
|         ], | ||||
|         'skip': 'redirect to http://www.appledaily.com.tw/animation/', | ||||
|     }, { | ||||
|         'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/', | ||||
|         'only_matching': True, | ||||
|         'md5': 'eaa20e6b9df418c912d7f5dec2ba734d', | ||||
|         'info_dict': { | ||||
|             'id': '35770334', | ||||
|             'ext': 'mp4', | ||||
|             'title': '咖啡占卜測 XU裝熟指數', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748', | ||||
|             'upload_date': '20140417', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _URL_PATTERN = r'\{url: \'(.+)\'\}' | ||||
|   | ||||
| @@ -16,53 +16,118 @@ from ..utils import ( | ||||
|  | ||||
| class NFLIE(InfoExtractor): | ||||
|     IE_NAME = 'nfl.com' | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/ | ||||
|         (?:.+?/)* | ||||
|         (?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', | ||||
|             'md5': '394ef771ddcd1354f665b471d78ec4c6', | ||||
|             'info_dict': { | ||||
|                 'id': '0ap3000000398478', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Week 3: Redskins vs. Eagles highlights', | ||||
|                 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', | ||||
|                 'upload_date': '20140921', | ||||
|                 'timestamp': 1411337580, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', | ||||
|             'md5': 'cf85bdb4bc49f6e9d3816d130c78279c', | ||||
|             'info_dict': { | ||||
|                 'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'LIVE: Post Game vs. Browns', | ||||
|                 'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', | ||||
|                 'upload_date': '20131229', | ||||
|                 'timestamp': 1388354455, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish', | ||||
|             'info_dict': { | ||||
|                 'id': '0ap3000000467607', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Frustrations flare on the field', | ||||
|                 'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.', | ||||
|                 'timestamp': 1422850320, | ||||
|                 'upload_date': '20150202', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood', | ||||
|             'only_matching': True, | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?P<host> | ||||
|                             (?:www\.)? | ||||
|                             (?: | ||||
|                                 (?: | ||||
|                                     nfl| | ||||
|                                     buffalobills| | ||||
|                                     miamidolphins| | ||||
|                                     patriots| | ||||
|                                     newyorkjets| | ||||
|                                     baltimoreravens| | ||||
|                                     bengals| | ||||
|                                     clevelandbrowns| | ||||
|                                     steelers| | ||||
|                                     houstontexans| | ||||
|                                     colts| | ||||
|                                     jaguars| | ||||
|                                     titansonline| | ||||
|                                     denverbroncos| | ||||
|                                     kcchiefs| | ||||
|                                     raiders| | ||||
|                                     chargers| | ||||
|                                     dallascowboys| | ||||
|                                     giants| | ||||
|                                     philadelphiaeagles| | ||||
|                                     redskins| | ||||
|                                     chicagobears| | ||||
|                                     detroitlions| | ||||
|                                     packers| | ||||
|                                     vikings| | ||||
|                                     atlantafalcons| | ||||
|                                     panthers| | ||||
|                                     neworleanssaints| | ||||
|                                     buccaneers| | ||||
|                                     azcardinals| | ||||
|                                     stlouisrams| | ||||
|                                     49ers| | ||||
|                                     seahawks | ||||
|                                 )\.com| | ||||
|                                 .+?\.clubs\.nfl\.com | ||||
|                             ) | ||||
|                         )/ | ||||
|                         (?:.+?/)* | ||||
|                         (?P<id>[^/#?&]+) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', | ||||
|         'md5': '394ef771ddcd1354f665b471d78ec4c6', | ||||
|         'info_dict': { | ||||
|             'id': '0ap3000000398478', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Week 3: Redskins vs. Eagles highlights', | ||||
|             'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', | ||||
|             'upload_date': '20140921', | ||||
|             'timestamp': 1411337580, | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     ] | ||||
|     }, { | ||||
|         'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', | ||||
|         'md5': 'cf85bdb4bc49f6e9d3816d130c78279c', | ||||
|         'info_dict': { | ||||
|             'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'LIVE: Post Game vs. Browns', | ||||
|             'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', | ||||
|             'upload_date': '20131229', | ||||
|             'timestamp': 1388354455, | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish', | ||||
|         'info_dict': { | ||||
|             'id': '0ap3000000467607', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Frustrations flare on the field', | ||||
|             'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.', | ||||
|             'timestamp': 1422850320, | ||||
|             'upload_date': '20150202', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette', | ||||
|         'md5': '4c319e2f625ffd0b481b4382c6fc124c', | ||||
|         'info_dict': { | ||||
|             'id': 'n-238346', | ||||
|             'ext': 'mp4', | ||||
|             'title': '10 Days at Gillette', | ||||
|             'description': 'md5:8cd9cd48fac16de596eadc0b24add951', | ||||
|             'timestamp': 1442618809, | ||||
|             'upload_date': '20150918', | ||||
|         }, | ||||
|     }, { | ||||
|         # lowercase data-contentid | ||||
|         'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7', | ||||
|         'info_dict': { | ||||
|             'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tomlin looks ahead to Ravens on a short week', | ||||
|             'description': 'md5:32f3f7b139f43913181d5cbb24ecad75', | ||||
|             'timestamp': 1443459651, | ||||
|             'upload_date': '20150928', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def prepend_host(host, url): | ||||
| @@ -95,13 +160,14 @@ class NFLIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         config_url = NFLIE.prepend_host(host, self._search_regex( | ||||
|             r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL', | ||||
|             default='static/content/static/config/video/config.json')) | ||||
|             r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1', | ||||
|             webpage, 'config URL', default='static/content/static/config/video/config.json', | ||||
|             group='config')) | ||||
|         # For articles, the id in the url is not the video id | ||||
|         video_id = self._search_regex( | ||||
|             r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id) | ||||
|         config = self._download_json(config_url, video_id, | ||||
|                                      note='Downloading player config') | ||||
|             r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>.+?)\1', | ||||
|             webpage, 'video id', default=video_id, group='id') | ||||
|         config = self._download_json(config_url, video_id, 'Downloading player config') | ||||
|         url_template = NFLIE.prepend_host( | ||||
|             host, '{contentURLTemplate:}'.format(**config)) | ||||
|         video_data = self._download_json( | ||||
|   | ||||
| @@ -72,7 +72,7 @@ class NHLBaseInfoExtractor(InfoExtractor): | ||||
|  | ||||
| class NHLIE(NHLBaseInfoExtractor): | ||||
|     IE_NAME = 'nhl.com' | ||||
|     _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)' | ||||
|     _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', | ||||
| @@ -136,6 +136,9 @@ class NHLIE(NHLBaseInfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True,  # Requires rtmpdump | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://video.nhl.com/videocenter/embed?playlist=836127', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -146,9 +149,9 @@ class NHLIE(NHLBaseInfoExtractor): | ||||
| class NHLNewsIE(NHLBaseInfoExtractor): | ||||
|     IE_NAME = 'nhl.com:news' | ||||
|     IE_DESC = 'NHL news' | ||||
|     _VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)' | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nhl.com/ice/news.htm?id=750727', | ||||
|         'md5': '4b3d1262e177687a3009937bd9ec0be8', | ||||
|         'info_dict': { | ||||
| @@ -159,13 +162,26 @@ class NHLNewsIE(NHLBaseInfoExtractor): | ||||
|             'duration': 37, | ||||
|             'upload_date': '20150128', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # iframe embed | ||||
|         'url': 'http://sabres.nhl.com/club/news.htm?id=780189', | ||||
|         'md5': '9f663d1c006c90ac9fb82777d4294e12', | ||||
|         'info_dict': { | ||||
|             'id': '836127', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Morning Skate: OTT vs. BUF (9/23/15)', | ||||
|             'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.", | ||||
|             'duration': 93, | ||||
|             'upload_date': '20150923', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         news_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, news_id) | ||||
|         video_id = self._search_regex( | ||||
|             [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"], | ||||
|             [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'", | ||||
|              r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'], | ||||
|             webpage, 'video id') | ||||
|         return self._real_extract_video(video_id) | ||||
|  | ||||
|   | ||||
| @@ -12,6 +12,7 @@ from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     encode_dict, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
| @@ -100,10 +101,7 @@ class NiconicoIE(InfoExtractor): | ||||
|             'mail': username, | ||||
|             'password': password, | ||||
|         } | ||||
|         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') | ||||
|         login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') | ||||
|         request = compat_urllib_request.Request( | ||||
|             'https://secure.nicovideo.jp/secure/login', login_data) | ||||
|         login_results = self._download_webpage( | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import str_to_int | ||||
| @@ -9,61 +8,93 @@ from ..utils import str_to_int | ||||
|  | ||||
| class NineGagIE(InfoExtractor): | ||||
|     IE_NAME = '9gag' | ||||
|     _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/ | ||||
|         (?: | ||||
|             v/(?P<numid>[0-9]+)| | ||||
|             p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+) | ||||
|         ) | ||||
|     ''' | ||||
|     _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         "url": "http://9gag.tv/v/1912", | ||||
|         "info_dict": { | ||||
|             "id": "1912", | ||||
|             "ext": "mp4", | ||||
|             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", | ||||
|             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome", | ||||
|         'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome', | ||||
|         'info_dict': { | ||||
|             'id': 'Kk2X5', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)', | ||||
|             'title': '\"People Are Awesome 2013\" Is Absolutely Awesome', | ||||
|             'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA', | ||||
|             'uploader': 'CompilationChannel', | ||||
|             'upload_date': '20131110', | ||||
|             "view_count": int, | ||||
|             "thumbnail": "re:^https?://", | ||||
|             'view_count': int, | ||||
|         }, | ||||
|         'add_ie': ['Youtube'] | ||||
|         'add_ie': ['Youtube'], | ||||
|     }, { | ||||
|         'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar', | ||||
|         'url': 'http://9gag.com/tv/p/aKolP3', | ||||
|         'info_dict': { | ||||
|             'id': 'KklwM', | ||||
|             'id': 'aKolP3', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': 'alternate-banned-opening-scene-of-gravity', | ||||
|             "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.", | ||||
|             'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie", | ||||
|             'uploader': 'Krishna Shenoi', | ||||
|             'upload_date': '20140401', | ||||
|             'uploader_id': 'krishnashenoi93', | ||||
|             'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video', | ||||
|             'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!", | ||||
|             'uploader_id': 'rickmereki', | ||||
|             'uploader': 'Rick Mereki', | ||||
|             'upload_date': '20110803', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|         'add_ie': ['Vimeo'], | ||||
|     }, { | ||||
|         'url': 'http://9gag.com/tv/p/KklwM', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://9gag.tv/p/Kk2X5', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://9gag.com/tv/embed/a5Dmvl', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _EXTERNAL_VIDEO_PROVIDER = { | ||||
|         '1': { | ||||
|             'url': '%s', | ||||
|             'ie_key': 'Youtube', | ||||
|         }, | ||||
|         '2': { | ||||
|             'url': 'http://player.vimeo.com/video/%s', | ||||
|             'ie_key': 'Vimeo', | ||||
|         }, | ||||
|         '3': { | ||||
|             'url': 'http://instagram.com/p/%s', | ||||
|             'ie_key': 'Instagram', | ||||
|         }, | ||||
|         '4': { | ||||
|             'url': 'http://vine.co/v/%s', | ||||
|             'ie_key': 'Vine', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('numid') or mobj.group('id') | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         post_view = json.loads(self._html_search_regex( | ||||
|             r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) | ||||
|         post_view = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost', | ||||
|                 webpage, 'post view'), | ||||
|             display_id) | ||||
|  | ||||
|         youtube_id = post_view['videoExternalId'] | ||||
|         ie_key = None | ||||
|         source_url = post_view.get('sourceUrl') | ||||
|         if not source_url: | ||||
|             external_video_id = post_view['videoExternalId'] | ||||
|             external_video_provider = post_view['videoExternalProvider'] | ||||
|             source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id | ||||
|             ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key'] | ||||
|         title = post_view['title'] | ||||
|         description = post_view['description'] | ||||
|         view_count = str_to_int(post_view['externalView']) | ||||
|         description = post_view.get('description') | ||||
|         view_count = str_to_int(post_view.get('externalView')) | ||||
|         thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w') | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': youtube_id, | ||||
|             'ie_key': 'Youtube', | ||||
|             'url': source_url, | ||||
|             'ie_key': ie_key, | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|   | ||||
| @@ -1,64 +1,134 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .brightcove import BrightcoveIE | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NownessIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])' | ||||
| class NownessBaseIE(InfoExtractor): | ||||
|     def _extract_url_result(self, post): | ||||
|         if post['type'] == 'video': | ||||
|             for media in post['media']: | ||||
|                 if media['type'] == 'video': | ||||
|                     video_id = media['content'] | ||||
|                     source = media['source'] | ||||
|                     if source == 'brightcove': | ||||
|                         player_code = self._download_webpage( | ||||
|                             'http://www.nowness.com/iframe?id=%s' % video_id, video_id, | ||||
|                             note='Downloading player JavaScript', | ||||
|                             errnote='Unable to download player JavaScript') | ||||
|                         bc_url = BrightcoveIE._extract_brightcove_url(player_code) | ||||
|                         if bc_url is None: | ||||
|                             raise ExtractorError('Could not find player definition') | ||||
|                         return self.url_result(bc_url, 'Brightcove') | ||||
|                     elif source == 'vimeo': | ||||
|                         return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') | ||||
|                     elif source == 'youtube': | ||||
|                         return self.url_result(video_id, 'Youtube') | ||||
|                     elif source == 'cinematique': | ||||
|                         # youtube-dl currently doesn't support cinematique | ||||
|                         # return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique') | ||||
|                         pass | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', | ||||
|             'md5': '068bc0202558c2e391924cb8cc470676', | ||||
|             'info_dict': { | ||||
|                 'id': '2520295746001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Candor: The Art of Gesticulation', | ||||
|                 'description': 'Candor: The Art of Gesticulation', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'uploader': 'Nowness', | ||||
|             } | ||||
|     def _api_request(self, url, request_path): | ||||
|         display_id = self._match_id(url) | ||||
|         request = compat_urllib_request.Request( | ||||
|             'http://api.nowness.com/api/' + request_path % display_id, | ||||
|             headers={ | ||||
|                 'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us', | ||||
|             }) | ||||
|         return display_id, self._download_json(request, display_id) | ||||
|  | ||||
|  | ||||
| class NownessIE(NownessBaseIE): | ||||
|     IE_NAME = 'nowness' | ||||
|     _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(?:story|(?:series|category)/[^/]+)/(?P<id>[^/]+?)(?:$|[?#])' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation', | ||||
|         'md5': '068bc0202558c2e391924cb8cc470676', | ||||
|         'info_dict': { | ||||
|             'id': '2520295746001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Candor: The Art of Gesticulation', | ||||
|             'description': 'Candor: The Art of Gesticulation', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'uploader': 'Nowness', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr', | ||||
|             'md5': 'e79cf125e387216f86b2e0a5b5c63aa3', | ||||
|             'info_dict': { | ||||
|                 'id': '3716354522001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', | ||||
|                 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg', | ||||
|                 'uploader': 'Nowness', | ||||
|             } | ||||
|     }, { | ||||
|         'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr', | ||||
|         'md5': 'e79cf125e387216f86b2e0a5b5c63aa3', | ||||
|         'info_dict': { | ||||
|             'id': '3716354522001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', | ||||
|             'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'uploader': 'Nowness', | ||||
|         }, | ||||
|     ] | ||||
|     }, { | ||||
|         # vimeo | ||||
|         'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut', | ||||
|         'md5': '9a5a6a8edf806407e411296ab6bc2a49', | ||||
|         'info_dict': { | ||||
|             'id': '130020913', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Bleu, Blanc, Rouge - A Godard Supercut', | ||||
|             'description': 'md5:f0ea5f1857dffca02dbd37875d742cec', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'upload_date': '20150607', | ||||
|             'uploader': 'Cinema Sem Lei', | ||||
|             'uploader_id': 'cinemasemlei', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('slug') | ||||
|         _, post = self._api_request(url, 'post/getBySlug/%s') | ||||
|         return self._extract_url_result(post) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         player_url = self._search_regex( | ||||
|             r'"([^"]+/content/issue-[0-9.]+.js)"', webpage, 'player URL') | ||||
|         real_id = self._search_regex( | ||||
|             r'\sdata-videoId="([0-9]+)"', webpage, 'internal video ID') | ||||
|  | ||||
|         player_code = self._download_webpage( | ||||
|             player_url, video_id, | ||||
|             note='Downloading player JavaScript', | ||||
|             errnote='Player download failed') | ||||
|         player_code = player_code.replace("'+d+'", real_id) | ||||
| class NownessPlaylistIE(NownessBaseIE): | ||||
|     IE_NAME = 'nowness:playlist' | ||||
|     _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues', | ||||
|         'info_dict': { | ||||
|             'id': '3286', | ||||
|         }, | ||||
|         'playlist_mincount': 8, | ||||
|     } | ||||
|  | ||||
|         bc_url = BrightcoveIE._extract_brightcove_url(player_code) | ||||
|         if bc_url is None: | ||||
|             raise ExtractorError('Could not find player definition') | ||||
|         return { | ||||
|             '_type': 'url', | ||||
|             'url': bc_url, | ||||
|             'ie_key': 'Brightcove', | ||||
|         } | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id, playlist = self._api_request(url, 'post?PlaylistId=%s') | ||||
|         entries = [self._extract_url_result(item) for item in playlist['items']] | ||||
|         return self.playlist_result(entries, playlist_id) | ||||
|  | ||||
|  | ||||
| class NownessSeriesIE(NownessBaseIE): | ||||
|     IE_NAME = 'nowness:series' | ||||
|     _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[^/]+?)(?:$|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.nowness.com/series/60-seconds', | ||||
|         'info_dict': { | ||||
|             'id': '60', | ||||
|             'title': '60 Seconds', | ||||
|             'description': 'One-minute wisdom in a new NOWNESS series', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id, series = self._api_request(url, 'series/getBySlug/%s') | ||||
|         entries = [self._extract_url_result(post) for post in series['posts']] | ||||
|         series_title = None | ||||
|         series_description = None | ||||
|         translations = series.get('translations', []) | ||||
|         if translations: | ||||
|             series_title = translations[0].get('title') or translations[0]['seoTitle'] | ||||
|             series_description = translations[0].get('seoDescription') | ||||
|         return self.playlist_result( | ||||
|             entries, compat_str(series['id']), series_title, series_description) | ||||
|   | ||||
| @@ -130,10 +130,16 @@ class NowTVIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         display_id_split = display_id.split('/') | ||||
|         if len(display_id) > 2: | ||||
|             display_id = '/'.join((display_id_split[0], display_id_split[-1])) | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id, | ||||
|   | ||||
| @@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE): | ||||
|     IE_NAME = 'nowvideo' | ||||
|     IE_DESC = 'NowVideo' | ||||
|  | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co|li)'} | ||||
|     _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'} | ||||
|  | ||||
|     _HOST = 'www.nowvideo.ch' | ||||
|  | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
| @@ -49,7 +50,7 @@ class NRKIE(InfoExtractor): | ||||
|  | ||||
|         if data['usageRights']['isGeoBlocked']: | ||||
|             raise ExtractorError( | ||||
|                 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', | ||||
|                 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', | ||||
|                 expected=True) | ||||
|  | ||||
|         video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81' | ||||
| @@ -196,20 +197,6 @@ class NRKTVIE(InfoExtractor): | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _debug_print(self, txt): | ||||
|         if self._downloader.params.get('verbose', False): | ||||
|             self.to_screen('[debug] %s' % txt) | ||||
|  | ||||
|     def _get_subtitles(self, subtitlesurl, video_id, baseurl): | ||||
|         url = "%s%s" % (baseurl, subtitlesurl) | ||||
|         self._debug_print('%s: Subtitle url: %s' % (video_id, url)) | ||||
|         captions = self._download_xml( | ||||
|             url, video_id, 'Downloading subtitles') | ||||
|         lang = captions.get('lang', 'no') | ||||
|         return {lang: [ | ||||
|             {'ext': 'ttml', 'url': url}, | ||||
|         ]} | ||||
|  | ||||
|     def _extract_f4m(self, manifest_url, video_id): | ||||
|         return self._extract_f4m_formats( | ||||
|             manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds') | ||||
| @@ -218,7 +205,7 @@ class NRKTVIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         part_id = mobj.group('part_id') | ||||
|         baseurl = mobj.group('baseurl') | ||||
|         base_url = mobj.group('baseurl') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
| @@ -278,11 +265,14 @@ class NRKTVIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles_url = self._html_search_regex( | ||||
|             r'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', | ||||
|             webpage, 'subtitle URL', default=None) | ||||
|         subtitles = None | ||||
|             r'data-subtitlesurl\s*=\s*(["\'])(?P<url>.+?)\1', | ||||
|             webpage, 'subtitle URL', default=None, group='url') | ||||
|         subtitles = {} | ||||
|         if subtitles_url: | ||||
|             subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl) | ||||
|             subtitles['no'] = [{ | ||||
|                 'ext': 'ttml', | ||||
|                 'url': compat_urlparse.urljoin(base_url, subtitles_url), | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
| @@ -12,7 +13,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class OdnoklassnikiIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)' | ||||
|     _TESTS = [{ | ||||
|         # metadata in JSON | ||||
|         'url': 'http://ok.ru/video/20079905452', | ||||
| @@ -28,6 +29,7 @@ class OdnoklassnikiIE(InfoExtractor): | ||||
|             'like_count': int, | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'skip': 'Video has been blocked', | ||||
|     }, { | ||||
|         # metadataUrl | ||||
|         'url': 'http://ok.ru/video/63567059965189-0', | ||||
| @@ -43,9 +45,27 @@ class OdnoklassnikiIE(InfoExtractor): | ||||
|             'like_count': int, | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|     }, { | ||||
|         # YouTube embed (metadataUrl, provider == USER_YOUTUBE) | ||||
|         'url': 'http://ok.ru/video/64211978996595-1', | ||||
|         'md5': '5d7475d428845cd2e13bae6f1a992278', | ||||
|         'info_dict': { | ||||
|             'id': '64211978996595-1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Космическая среда от 26 августа 2015', | ||||
|             'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0', | ||||
|             'duration': 440, | ||||
|             'upload_date': '20150826', | ||||
|             'uploader_id': '750099571', | ||||
|             'uploader': 'Алина П', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ok.ru/video/20648036891', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -54,9 +74,16 @@ class OdnoklassnikiIE(InfoExtractor): | ||||
|         webpage = self._download_webpage( | ||||
|             'http://ok.ru/video/%s' % video_id, video_id) | ||||
|  | ||||
|         error = self._search_regex( | ||||
|             r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', | ||||
|             webpage, 'error', default=None) | ||||
|         if error: | ||||
|             raise ExtractorError(error, expected=True) | ||||
|  | ||||
|         player = self._parse_json( | ||||
|             unescapeHTML(self._search_regex( | ||||
|                 r'data-attributes="([^"]+)"', webpage, 'player')), | ||||
|                 r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id, | ||||
|                 webpage, 'player', group='player')), | ||||
|             video_id) | ||||
|  | ||||
|         flashvars = player['flashvars'] | ||||
| @@ -89,16 +116,7 @@ class OdnoklassnikiIE(InfoExtractor): | ||||
|  | ||||
|         like_count = int_or_none(metadata.get('likeCount')) | ||||
|  | ||||
|         quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd')) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': f['url'], | ||||
|             'ext': 'mp4', | ||||
|             'format_id': f['name'], | ||||
|             'quality': quality(f['name']), | ||||
|         } for f in metadata['videos']] | ||||
|  | ||||
|         return { | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
| @@ -108,5 +126,24 @@ class OdnoklassnikiIE(InfoExtractor): | ||||
|             'uploader_id': uploader_id, | ||||
|             'like_count': like_count, | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|         if metadata.get('provider') == 'USER_YOUTUBE': | ||||
|             info.update({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': movie['contentId'], | ||||
|             }) | ||||
|             return info | ||||
|  | ||||
|         quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd')) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': f['url'], | ||||
|             'ext': 'mp4', | ||||
|             'format_id': f['name'], | ||||
|             'quality': quality(f['name']), | ||||
|         } for f in metadata['videos']] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info['formats'] = formats | ||||
|         return info | ||||
|   | ||||
| @@ -1,70 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote_plus | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     parse_age_limit, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class OpenFilmIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.openfilm.com/videos/human-resources-remastered', | ||||
|         'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37', | ||||
|         'info_dict': { | ||||
|             'id': '32736', | ||||
|             'display_id': 'human-resources-remastered', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Human Resources (Remastered)', | ||||
|             'description': 'Social Engineering in the 20th Century.', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 7164, | ||||
|             'timestamp': 1334756988, | ||||
|             'upload_date': '20120418', | ||||
|             'uploader_id': '41117', | ||||
|             'view_count': int, | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         player = compat_urllib_parse_unquote_plus( | ||||
|             self._og_search_video_url(webpage)) | ||||
|  | ||||
|         video = json.loads(self._search_regex( | ||||
|             r'\bp=({.+?})(?:&|$)', player, 'video JSON')) | ||||
|  | ||||
|         video_url = '%s1.mp4' % video['location'] | ||||
|         video_id = video.get('video_id') | ||||
|         display_id = video.get('alias') or display_id | ||||
|         title = video.get('title') | ||||
|         description = video.get('description') | ||||
|         thumbnail = video.get('main_thumb') | ||||
|         duration = int_or_none(video.get('duration')) | ||||
|         timestamp = parse_iso8601(video.get('dt_published'), ' ') | ||||
|         uploader_id = video.get('user_id') | ||||
|         view_count = int_or_none(video.get('views_count')) | ||||
|         age_limit = parse_age_limit(video.get('age_limit')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader_id': uploader_id, | ||||
|             'view_count': view_count, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
| @@ -134,6 +134,24 @@ class PBSIE(InfoExtractor): | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires ffmpeg | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # Video embedded in iframe containing angle brackets as attribute's value (e.g. | ||||
|             # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see | ||||
|             # https://github.com/rg3/youtube-dl/issues/7059) | ||||
|             'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/', | ||||
|             'info_dict': { | ||||
|                 'id': '2365546844', | ||||
|                 'display_id': 'a-chefs-life-season-3-episode-5-prickly-business', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business", | ||||
|                 'description': 'md5:61db2ddf27c9912f09c241014b118ed1', | ||||
|                 'duration': 1480, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires ffmpeg | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -167,7 +185,7 @@ class PBSIE(InfoExtractor): | ||||
|                 return media_id, presumptive_id, upload_date | ||||
|  | ||||
|             url = self._search_regex( | ||||
|                 r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']', | ||||
|                 r'(?s)<iframe[^>]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']', | ||||
|                 webpage, 'player URL') | ||||
|             mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|   | ||||
| @@ -19,7 +19,7 @@ class PlaywireIE(InfoExtractor): | ||||
|             'id': '3353705', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'S04_RM_UCL_Rus', | ||||
|             'thumbnail': 're:^http://.*\.png$', | ||||
|             'thumbnail': 're:^https?://.*\.png$', | ||||
|             'duration': 145.94, | ||||
|         }, | ||||
|     }, { | ||||
|   | ||||
| @@ -41,9 +41,7 @@ class PluralsightIE(InfoExtractor): | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             raise ExtractorError( | ||||
|                 'Pluralsight account is required, use --username and --password options to provide account credentials.', | ||||
|                 expected=True) | ||||
|             self.raise_login_required('Pluralsight account is required') | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             self._LOGIN_URL, None, 'Downloading login page') | ||||
|   | ||||
| @@ -20,7 +20,7 @@ from ..aes import ( | ||||
|  | ||||
|  | ||||
| class PornHubIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)' | ||||
|     _VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', | ||||
|         'md5': '882f488fa1f0026f023f33576004a2ed', | ||||
| @@ -34,6 +34,9 @@ class PornHubIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|   | ||||
| @@ -25,7 +25,7 @@ class QQMusicIE(InfoExtractor): | ||||
|             'id': '004295Et37taLD', | ||||
|             'ext': 'mp3', | ||||
|             'title': '可惜没如果', | ||||
|             'upload_date': '20141227', | ||||
|             'release_date': '20141227', | ||||
|             'creator': '林俊杰', | ||||
|             'description': 'md5:d327722d0361576fde558f1ac68a7065', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
| @@ -38,11 +38,26 @@ class QQMusicIE(InfoExtractor): | ||||
|             'id': '004MsGEo3DdNxV', | ||||
|             'ext': 'mp3', | ||||
|             'title': '如果', | ||||
|             'upload_date': '20050626', | ||||
|             'release_date': '20050626', | ||||
|             'creator': '李季美', | ||||
|             'description': 'md5:46857d5ed62bc4ba84607a805dccf437', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'lyrics not in .lrc format', | ||||
|         'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6', | ||||
|         'info_dict': { | ||||
|             'id': '001JyApY11tIp6', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Shadows Over Transylvania', | ||||
|             'release_date': '19970225', | ||||
|             'creator': 'Dark Funeral', | ||||
|             'description': 'md5:ed14d5bd7ecec19609108052c25b2c11', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _FORMATS = { | ||||
| @@ -112,15 +127,27 @@ class QQMusicIE(InfoExtractor): | ||||
|         self._check_formats(formats, mid) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|         actual_lrc_lyrics = ''.join( | ||||
|             line + '\n' for line in re.findall( | ||||
|                 r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content)) | ||||
|  | ||||
|         info_dict = { | ||||
|             'id': mid, | ||||
|             'formats': formats, | ||||
|             'title': song_name, | ||||
|             'upload_date': publish_time, | ||||
|             'release_date': publish_time, | ||||
|             'creator': singer, | ||||
|             'description': lrc_content, | ||||
|             'thumbnail': thumbnail_url, | ||||
|             'thumbnail': thumbnail_url | ||||
|         } | ||||
|         if actual_lrc_lyrics: | ||||
|             info_dict['subtitles'] = { | ||||
|                 'origin': [{ | ||||
|                     'ext': 'lrc', | ||||
|                     'data': actual_lrc_lyrics, | ||||
|                 }] | ||||
|             } | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class QQPlaylistBaseIE(InfoExtractor): | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
| @@ -72,6 +73,18 @@ class RaiIE(InfoExtractor): | ||||
|                 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!', | ||||
|                 'uploader': 'RaiTre', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', | ||||
|             'md5': '037104d2c14132887e5e4cf114569214', | ||||
|             'info_dict': { | ||||
|                 'id': '0c7a664b-d0f4-4b2c-8835-3f82e46f433e', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Il pacco', | ||||
|                 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', | ||||
|                 'uploader': 'RaiTre', | ||||
|                 'upload_date': '20141221', | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -90,11 +103,14 @@ class RaiIE(InfoExtractor): | ||||
|         relinker_url = self._extract_relinker_url(webpage) | ||||
|  | ||||
|         if not relinker_url: | ||||
|             iframe_path = self._search_regex( | ||||
|                 r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"', | ||||
|             iframe_url = self._search_regex( | ||||
|                 [r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"', | ||||
|                  r'drawMediaRaiTV\(["\'](.+?)["\']'], | ||||
|                 webpage, 'iframe') | ||||
|             if not iframe_url.startswith('http'): | ||||
|                 iframe_url = compat_urlparse.urljoin(url, iframe_url) | ||||
|             webpage = self._download_webpage( | ||||
|                 '%s/%s' % (host, iframe_path), video_id) | ||||
|                 iframe_url, video_id) | ||||
|             relinker_url = self._extract_relinker_url(webpage) | ||||
|  | ||||
|         relinker = self._download_json( | ||||
|   | ||||
| @@ -6,7 +6,7 @@ import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..compat import compat_urllib_request, compat_urlparse | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
| @@ -102,7 +102,9 @@ class RTVEALaCartaIE(InfoExtractor): | ||||
|         if info['state'] == 'DESPU': | ||||
|             raise ExtractorError('The video is no longer available', expected=True) | ||||
|         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) | ||||
|         png = self._download_webpage(png_url, video_id, 'Downloading url information') | ||||
|         png_request = compat_urllib_request.Request(png_url) | ||||
|         png_request.add_header('Referer', url) | ||||
|         png = self._download_webpage(png_request, video_id, 'Downloading url information') | ||||
|         video_url = _decrypt_url(png) | ||||
|         if not video_url.endswith('.f4m'): | ||||
|             auth_url = video_url.replace( | ||||
|   | ||||
| @@ -6,19 +6,19 @@ from ..compat import compat_urllib_parse_urlparse | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     xpath_attr, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RuutuIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi', | ||||
|             'url': 'http://www.ruutu.fi/video/2058907', | ||||
|             'md5': 'ab2093f39be1ca8581963451b3c0234f', | ||||
|             'info_dict': { | ||||
|                 'id': '2058907', | ||||
|                 'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', | ||||
|                 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', | ||||
| @@ -28,14 +28,13 @@ class RuutuIE(InfoExtractor): | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa', | ||||
|             'url': 'http://www.ruutu.fi/video/2057306', | ||||
|             'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', | ||||
|             'info_dict': { | ||||
|                 'id': '2057306', | ||||
|                 'display_id': 'superpesis-katso-koko-kausi-ruudussa', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Superpesis: katso koko kausi Ruudussa', | ||||
|                 'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77', | ||||
|                 'description': 'md5:da2736052fef3b2bd5e0005e63c25eac', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'duration': 40, | ||||
|                 'age_limit': 0, | ||||
| @@ -44,29 +43,10 @@ class RuutuIE(InfoExtractor): | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-media-id="(\d+)"', webpage, 'media id') | ||||
|  | ||||
|         video_xml_url = None | ||||
|  | ||||
|         media_data = self._search_regex( | ||||
|             r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage, | ||||
|             'media data', default=None) | ||||
|         if media_data: | ||||
|             media_json = self._parse_json(media_data, display_id, fatal=False) | ||||
|             if media_json: | ||||
|                 xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl') | ||||
|                 if xml_url: | ||||
|                     video_xml_url = xml_url.replace('{ID}', video_id) | ||||
|  | ||||
|         if not video_xml_url: | ||||
|             video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id | ||||
|  | ||||
|         video_xml = self._download_xml(video_xml_url, video_id) | ||||
|         video_xml = self._download_xml( | ||||
|             'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         processed_urls = [] | ||||
| @@ -94,7 +74,7 @@ class RuutuIE(InfoExtractor): | ||||
|                         preference = -1 if proto == 'rtmp' else 1 | ||||
|                         label = child.get('label') | ||||
|                         tbr = int_or_none(child.get('bitrate')) | ||||
|                         width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')] | ||||
|                         width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] | ||||
|                         formats.append({ | ||||
|                             'format_id': '%s-%s' % (proto, label if label else tbr), | ||||
|                             'url': video_url, | ||||
| @@ -109,10 +89,9 @@ class RuutuIE(InfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), | ||||
|             'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), | ||||
|             'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), | ||||
|             'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')), | ||||
|             'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), | ||||
|             'formats': formats, | ||||
|   | ||||
| @@ -20,7 +20,6 @@ from ..utils import ( | ||||
| class SafariBaseIE(InfoExtractor): | ||||
|     _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' | ||||
|     _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>' | ||||
|     _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com' | ||||
|     _NETRC_MACHINE = 'safari' | ||||
|  | ||||
|     _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' | ||||
| @@ -37,9 +36,7 @@ class SafariBaseIE(InfoExtractor): | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             raise ExtractorError( | ||||
|                 self._ACCOUNT_CREDENTIALS_HINT, | ||||
|                 expected=True) | ||||
|             self.raise_login_required('safaribooksonline.com account is required') | ||||
|  | ||||
|         headers = std_headers | ||||
|         if 'Referer' not in headers: | ||||
|   | ||||
| @@ -12,8 +12,8 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class ScreenwaveMediaIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)' | ||||
|  | ||||
|     _VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' | ||||
|     EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', | ||||
|         'only_matching': True, | ||||
| @@ -33,7 +33,7 @@ class ScreenwaveMediaIE(InfoExtractor): | ||||
|             'http://player.screenwavemedia.com/player.js', | ||||
|             video_id, 'Downloading playerconfig webpage') | ||||
|  | ||||
|         videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver') | ||||
|         videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver') | ||||
|  | ||||
|         sources = self._parse_json( | ||||
|             js_to_json( | ||||
| @@ -56,6 +56,7 @@ class ScreenwaveMediaIE(InfoExtractor): | ||||
|  | ||||
|         # Fallback to hardcoded sources if JS changes again | ||||
|         if not sources: | ||||
|             self.report_warning('Falling back to a hardcoded list of streams') | ||||
|             sources = [{ | ||||
|                 'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id), | ||||
|                 'type': 'mp4', | ||||
|   | ||||
| @@ -16,7 +16,7 @@ class ShahidIE(InfoExtractor): | ||||
|         'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', | ||||
|         'info_dict': { | ||||
|             'id': '90574', | ||||
|             'ext': 'm3u8', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3', | ||||
|             'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان', | ||||
|             'duration': 2972, | ||||
| @@ -81,7 +81,7 @@ class ShahidIE(InfoExtractor): | ||||
|                 compat_urllib_parse.urlencode({ | ||||
|                     'apiKey': 'sh@hid0nlin3', | ||||
|                     'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', | ||||
|                 }).encode('utf-8')), | ||||
|                 })), | ||||
|             video_id, 'Downloading video JSON') | ||||
|  | ||||
|         video = video[api_vars['playerType']] | ||||
|   | ||||
| @@ -14,17 +14,28 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class SharedIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})' | ||||
|     IE_DESC = 'shared.sx and vivo.sx' | ||||
|     _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://shared.sx/0060718775', | ||||
|         'md5': '106fefed92a8a2adb8c98e6a0652f49b', | ||||
|         'info_dict': { | ||||
|             'id': '0060718775', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Bmp4', | ||||
|             'filesize': 1720110, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://vivo.sx/d7ddda0e78', | ||||
|         'md5': '15b3af41be0b4fe01f4df075c2678b2c', | ||||
|         'info_dict': { | ||||
|             'id': 'd7ddda0e78', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Chicken', | ||||
|             'filesize': 528031, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|   | ||||
| @@ -330,10 +330,7 @@ class SmotriBroadcastIE(InfoExtractor): | ||||
|  | ||||
|             (username, password) = self._get_login_info() | ||||
|             if username is None: | ||||
|                 raise ExtractorError( | ||||
|                     'Erotic broadcasts allowed only for registered users, ' | ||||
|                     'use --username and --password options to provide account credentials.', | ||||
|                     expected=True) | ||||
|                 self.raise_login_required('Erotic broadcasts allowed only for registered users') | ||||
|  | ||||
|             login_form = { | ||||
|                 'login-hint53': '1', | ||||
|   | ||||
| @@ -113,7 +113,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' | ||||
|     _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' | ||||
|     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' | ||||
|  | ||||
|     def report_resolve(self, video_id): | ||||
|   | ||||
| @@ -16,7 +16,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class TapelyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tape\.ly/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?' | ||||
|     _API_URL = 'http://tape.ly/showtape?id={0:}' | ||||
|     _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}' | ||||
|     _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}' | ||||
| @@ -42,6 +42,10 @@ class TapelyIE(InfoExtractor): | ||||
|                 'ext': 'm4a', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://tapely.com/my-grief-as-told-by-water', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,24 +1,51 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .mitele import MiTeleIE | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     get_element_by_attribute, | ||||
|     parse_duration, | ||||
|     strip_jsonp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TelecincoIE(MiTeleIE): | ||||
|     IE_NAME = 'telecinco.es' | ||||
|     _VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html' | ||||
| class TelecincoIE(InfoExtractor): | ||||
|     IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' | ||||
|     _VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', | ||||
|         'md5': '5cbef3ad5ef17bf0d21570332d140729', | ||||
|         'info_dict': { | ||||
|             'id': 'MDSVID20141015_0058', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Con Martín Berasategui, hacer un bacalao al ...', | ||||
|             'duration': 662, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|     }, { | ||||
|         'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', | ||||
|         'md5': '0a5b9f3cc8b074f50a0578f823a12694', | ||||
|         'info_dict': { | ||||
|             'id': 'MDSVID20150916_0128', | ||||
|             'ext': 'mp4', | ||||
|             'title': '¿Quién es este ex futbolista con el que hablan ...', | ||||
|             'duration': 79, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', | ||||
|         'md5': 'ad1bfaaba922dd4a295724b05b68f86a', | ||||
|         'info_dict': { | ||||
|             'id': 'MDSVID20150513_0220', | ||||
|             'ext': 'mp4', | ||||
|             'title': '#DOYLACARA. Con la trata no hay trato', | ||||
|             'duration': 50, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html', | ||||
| @@ -27,3 +54,41 @@ class TelecincoIE(MiTeleIE): | ||||
|         'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         episode = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, episode) | ||||
|         embed_data_json = self._search_regex( | ||||
|             r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', | ||||
|         ).replace('\'', '"') | ||||
|         embed_data = json.loads(embed_data_json) | ||||
|  | ||||
|         domain = embed_data['mediaUrl'] | ||||
|         if not domain.startswith('http'): | ||||
|             # only happens in telecinco.es videos | ||||
|             domain = 'http://' + domain | ||||
|         info_url = compat_urlparse.urljoin( | ||||
|             domain, | ||||
|             compat_urllib_parse_unquote(embed_data['flashvars']['host']) | ||||
|         ) | ||||
|         info_el = self._download_xml(info_url, episode).find('./video/info') | ||||
|  | ||||
|         video_link = info_el.find('videoUrl/link').text | ||||
|         token_query = compat_urllib_parse.urlencode({'id': video_link}) | ||||
|         token_info = self._download_json( | ||||
|             embed_data['flashvars']['ov_tk'] + '?' + token_query, | ||||
|             episode, | ||||
|             transform_source=strip_jsonp | ||||
|         ) | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native') | ||||
|  | ||||
|         return { | ||||
|             'id': embed_data['videoId'], | ||||
|             'display_id': episode, | ||||
|             'title': info_el.find('title').text, | ||||
|             'formats': formats, | ||||
|             'description': get_element_by_attribute('class', 'text', webpage), | ||||
|             'thumbnail': info_el.find('thumb').text, | ||||
|             'duration': parse_duration(info_el.find('duration').text), | ||||
|         } | ||||
|   | ||||
| @@ -60,9 +60,7 @@ class TubiTvIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage): | ||||
|             raise ExtractorError( | ||||
|                 'This video requires login, use --username and --password ' | ||||
|                 'options to provide account credentials.', expected=True) | ||||
|             self.raise_login_required('This video requires login') | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|   | ||||
| @@ -2,14 +2,12 @@ | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
|  | ||||
|  | ||||
| class TudouIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])' | ||||
|     _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', | ||||
|         'md5': '140a49ed444bd22f93330985d8475fcb', | ||||
| @@ -27,41 +25,41 @@ class TudouIE(InfoExtractor): | ||||
|             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' | ||||
|  | ||||
|     def _url_for_id(self, id, quality=None): | ||||
|         info_url = "http://v2.tudou.com/f?id=" + str(id) | ||||
|     def _url_for_id(self, video_id, quality=None): | ||||
|         info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id) | ||||
|         if quality: | ||||
|             info_url += '&hd' + quality | ||||
|         webpage = self._download_webpage(info_url, id, "Opening the info webpage") | ||||
|         final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url') | ||||
|         xml_data = self._download_xml(info_url, video_id, "Opening the info XML page") | ||||
|         final_url = xml_data.text | ||||
|         return final_url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage) | ||||
|         if m and m.group(1): | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': 'youku:' + m.group(1), | ||||
|                 'ie_key': 'Youku' | ||||
|             } | ||||
|         youku_vcode = self._search_regex( | ||||
|             r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None) | ||||
|         if youku_vcode: | ||||
|             return self.url_result('youku:' + youku_vcode, ie='Youku') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r",kw:\s*['\"](.+?)[\"']", webpage, 'title') | ||||
|             r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title') | ||||
|         thumbnail_url = self._search_regex( | ||||
|             r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False) | ||||
|             r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False) | ||||
|  | ||||
|         player_url = self._search_regex( | ||||
|             r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']", | ||||
|             r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]', | ||||
|             webpage, 'player URL', default=self._PLAYER_URL) | ||||
|  | ||||
|         segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') | ||||
|         segments = json.loads(segs_json) | ||||
|         segments = self._parse_json(self._search_regex( | ||||
|             r'segs: \'([^\']+)\'', webpage, 'segments'), video_id) | ||||
|         # It looks like the keys are the arguments that have to be passed as | ||||
|         # the hd field in the request url, we pick the higher | ||||
|         # Also, filter non-number qualities (see issue #3643). | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class TumblrIE(InfoExtractor): | ||||
| @@ -28,6 +29,19 @@ class TumblrIE(InfoExtractor): | ||||
|             'description': 'md5:dba62ac8639482759c8eb10ce474586a', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video', | ||||
|         'md5': '7ae503065ad150122dc3089f8cf1546c', | ||||
|         'info_dict': { | ||||
|             'id': '130323439814', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'HD Video Testing \u2014 Test description for my HD video', | ||||
|             'description': 'md5:97cc3ab5fcd27ee4af6356701541319c', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'hd', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching', | ||||
|         'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab', | ||||
| @@ -37,6 +51,9 @@ class TumblrIE(InfoExtractor): | ||||
|             'title': 'naked smoking & stretching', | ||||
|             'upload_date': '20150506', | ||||
|             'timestamp': 1430931613, | ||||
|             'age_limit': 18, | ||||
|             'uploader_id': '1638622', | ||||
|             'uploader': 'naked-yogi', | ||||
|         }, | ||||
|         'add_ie': ['Vidme'], | ||||
|     }, { | ||||
| @@ -66,10 +83,38 @@ class TumblrIE(InfoExtractor): | ||||
|         if iframe_url is None: | ||||
|             return self.url_result(urlh.geturl(), 'Generic') | ||||
|  | ||||
|         iframe = self._download_webpage(iframe_url, video_id, | ||||
|                                         'Downloading iframe page') | ||||
|         video_url = self._search_regex(r'<source src="([^"]+)"', | ||||
|                                        iframe, 'video url') | ||||
|         iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page') | ||||
|  | ||||
|         duration = None | ||||
|         sources = [] | ||||
|  | ||||
|         sd_url = self._search_regex( | ||||
|             r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe, | ||||
|             'sd video url', default=None, group='url') | ||||
|         if sd_url: | ||||
|             sources.append((sd_url, 'sd')) | ||||
|  | ||||
|         options = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe, | ||||
|                 'hd video url', default='', group='options'), | ||||
|             video_id, fatal=False) | ||||
|         if options: | ||||
|             duration = int_or_none(options.get('duration')) | ||||
|             hd_url = options.get('hdUrl') | ||||
|             if hd_url: | ||||
|                 sources.append((hd_url, 'hd')) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'format_id': format_id, | ||||
|             'height': int_or_none(self._search_regex( | ||||
|                 r'/(\d{3,4})$', video_url, 'height', default=None)), | ||||
|             'quality': quality, | ||||
|         } for quality, (video_url, format_id) in enumerate(sources)] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         # The only place where you can get a title, it's not complete, | ||||
|         # but searching in other places doesn't work for all videos | ||||
| @@ -79,9 +124,9 @@ class TumblrIE(InfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': video_title, | ||||
|             'description': self._og_search_description(webpage, default=None), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage, default=None), | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user