Compare commits
	
		
			312 Commits
		
	
	
		
			2014.07.20
			...
			2014.08.25
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					067e922295 | ||
| 
						 | 
					c28df2478f | ||
| 
						 | 
					241f7a8ade | ||
| 
						 | 
					b252735910 | ||
| 
						 | 
					7adcbe7594 | ||
| 
						 | 
					8d31fa3cce | ||
| 
						 | 
					1f06864e9a | ||
| 
						 | 
					348ae0a79e | ||
| 
						 | 
					528d455632 | ||
| 
						 | 
					ba5d51b340 | ||
| 
						 | 
					7833d941bb | ||
| 
						 | 
					a2360a4c80 | ||
| 
						 | 
					a7cacbca2b | ||
| 
						 | 
					c6b4132a0a | ||
| 
						 | 
					ad260c90ab | ||
| 
						 | 
					b8313f07bc | ||
| 
						 | 
					92a17d28ac | ||
| 
						 | 
					5f90042bd6 | ||
| 
						 | 
					9480d1a566 | ||
| 
						 | 
					36b0079f23 | ||
| 
						 | 
					28028629b9 | ||
| 
						 | 
					11f75cac3d | ||
| 
						 | 
					e673db0194 | ||
| 
						 | 
					ebab4520ff | ||
| 
						 | 
					a71d1414eb | ||
| 
						 | 
					423817c468 | ||
| 
						 | 
					51ed9fce09 | ||
| 
						 | 
					d43aeb1d00 | ||
| 
						 | 
					4d805e063c | ||
| 
						 | 
					24e5e24166 | ||
| 
						 | 
					4d54ef20a2 | ||
| 
						 | 
					54036b3991 | ||
| 
						 | 
					e5402ac120 | ||
| 
						 | 
					f56f8399c7 | ||
| 
						 | 
					cf0c5fa3a1 | ||
| 
						 | 
					8c2ccefae6 | ||
| 
						 | 
					1f8b6af773 | ||
| 
						 | 
					8f9b683eeb | ||
| 
						 | 
					b5f4775b38 | ||
| 
						 | 
					01d906ffe9 | ||
| 
						 | 
					614582bcc4 | ||
| 
						 | 
					e1ab5000b2 | ||
| 
						 | 
					a5ed3e571e | ||
| 
						 | 
					10eaeb20c5 | ||
| 
						 | 
					fa8deaf38b | ||
| 
						 | 
					6857590059 | ||
| 
						 | 
					a3db22ebdf | ||
| 
						 | 
					c8e9a235d9 | ||
| 
						 | 
					30b871b0ca | ||
| 
						 | 
					eb9da9b732 | ||
| 
						 | 
					d769be6c96 | ||
| 
						 | 
					a54bda3ae2 | ||
| 
						 | 
					00558d9414 | ||
| 
						 | 
					49f3c16543 | ||
| 
						 | 
					2ef6fcb5d8 | ||
| 
						 | 
					38fc045253 | ||
| 
						 | 
					af1fd929c6 | ||
| 
						 | 
					b7b04c9234 | ||
| 
						 | 
					bc0bb6fd30 | ||
| 
						 | 
					430826c9d4 | ||
| 
						 | 
					68909f0c4e | ||
| 
						 | 
					9d048a17d8 | ||
| 
						 | 
					492641d10a | ||
| 
						 | 
					2b9faf5542 | ||
| 
						 | 
					ed2d6a1960 | ||
| 
						 | 
					be843678b1 | ||
| 
						 | 
					c71dfccc98 | ||
| 
						 | 
					1a9ccac7c1 | ||
| 
						 | 
					e330d59abb | ||
| 
						 | 
					394df6d7d0 | ||
| 
						 | 
					218f754940 | ||
| 
						 | 
					a053c3493a | ||
| 
						 | 
					50b294aab8 | ||
| 
						 | 
					756b046f3e | ||
| 
						 | 
					388ac0b18a | ||
| 
						 | 
					ad06434bd3 | ||
| 
						 | 
					bd9820c937 | ||
| 
						 | 
					deda8ac376 | ||
| 
						 | 
					e05f693942 | ||
| 
						 | 
					b27295d2ab | ||
| 
						 | 
					ace52c5713 | ||
| 
						 | 
					e62e150f64 | ||
| 
						 | 
					c44c0a775d | ||
| 
						 | 
					5fcf2dbed0 | ||
| 
						 | 
					91dff03217 | ||
| 
						 | 
					a200f4cee2 | ||
| 
						 | 
					ea6e8d5454 | ||
| 
						 | 
					83d35817f5 | ||
| 
						 | 
					76beff70a8 | ||
| 
						 | 
					61882bf7c6 | ||
| 
						 | 
					cab317a680 | ||
| 
						 | 
					73159f99cc | ||
| 
						 | 
					c15235cd07 | ||
| 
						 | 
					12c3ec3382 | ||
| 
						 | 
					55db73efdf | ||
| 
						 | 
					af40ac054a | ||
| 
						 | 
					a36819731b | ||
| 
						 | 
					181c8655c7 | ||
| 
						 | 
					3b95347bb6 | ||
| 
						 | 
					3b88ee9a7d | ||
| 
						 | 
					55c49908d2 | ||
| 
						 | 
					db9b0b67b7 | ||
| 
						 | 
					35f76e0061 | ||
| 
						 | 
					3f338cd6de | ||
| 
						 | 
					1d01f26ab1 | ||
| 
						 | 
					266c71f971 | ||
| 
						 | 
					e8ee972c6e | ||
| 
						 | 
					f83dda12ad | ||
| 
						 | 
					696d49815e | ||
| 
						 | 
					fe556f1b0c | ||
| 
						 | 
					d5638d974f | ||
| 
						 | 
					938dd254e5 | ||
| 
						 | 
					6493f5d704 | ||
| 
						 | 
					cd6b48365e | ||
| 
						 | 
					4d9bd478f9 | ||
| 
						 | 
					165250ff5e | ||
| 
						 | 
					83317f6938 | ||
| 
						 | 
					c1d293cfa6 | ||
| 
						 | 
					49807b4ac6 | ||
| 
						 | 
					c990bb3633 | ||
| 
						 | 
					af8322d2f9 | ||
| 
						 | 
					df866e7f2a | ||
| 
						 | 
					664718ff63 | ||
| 
						 | 
					3258263371 | ||
| 
						 | 
					3cfafc4a9b | ||
| 
						 | 
					6f600ff5d6 | ||
| 
						 | 
					90e075da3a | ||
| 
						 | 
					9572013de9 | ||
| 
						 | 
					3a5beb0ca1 | ||
| 
						 | 
					a6da7b6b96 | ||
| 
						 | 
					173a7026d5 | ||
| 
						 | 
					40a90862f4 | ||
| 
						 | 
					511c4325dc | ||
| 
						 | 
					85a699246a | ||
| 
						 | 
					4dc5286e13 | ||
| 
						 | 
					c767dc74b8 | ||
| 
						 | 
					56ca04f662 | ||
| 
						 | 
					eb3680123a | ||
| 
						 | 
					f5273890ee | ||
| 
						 | 
					c7a088a816 | ||
| 
						 | 
					fb17b60811 | ||
| 
						 | 
					1e58804260 | ||
| 
						 | 
					31bf213032 | ||
| 
						 | 
					1cccc41ddc | ||
| 
						 | 
					a91cf27767 | ||
| 
						 | 
					64d02399d8 | ||
| 
						 | 
					5961017202 | ||
| 
						 | 
					d9760fd43c | ||
| 
						 | 
					d42b2d2985 | ||
| 
						 | 
					cccfab6412 | ||
| 
						 | 
					4665664c92 | ||
| 
						 | 
					0adc996bc3 | ||
| 
						 | 
					b42a2a720b | ||
| 
						 | 
					37edd7dd4a | ||
| 
						 | 
					f87b3500c5 | ||
| 
						 | 
					66420a2db4 | ||
| 
						 | 
					6b8492a782 | ||
| 
						 | 
					6de0595eb8 | ||
| 
						 | 
					e48a2c646d | ||
| 
						 | 
					0f831a1a92 | ||
| 
						 | 
					1ce464aba9 | ||
| 
						 | 
					6994e70651 | ||
| 
						 | 
					3e510af38d | ||
| 
						 | 
					5ecd7b0a92 | ||
| 
						 | 
					a229909fa6 | ||
| 
						 | 
					548f31d99c | ||
| 
						 | 
					78b296b0ff | ||
| 
						 | 
					be79b07907 | ||
| 
						 | 
					5537dce84d | ||
| 
						 | 
					493987fefe | ||
| 
						 | 
					c97797a737 | ||
| 
						 | 
					8d7d9d3452 | ||
| 
						 | 
					7a5e7b303c | ||
| 
						 | 
					61aabb9d70 | ||
| 
						 | 
					62af3a0eb5 | ||
| 
						 | 
					60064c53f1 | ||
| 
						 | 
					98eb1c3fa2 | ||
| 
						 | 
					201e9eaa0e | ||
| 
						 | 
					9afa6ede21 | ||
| 
						 | 
					f4776371ae | ||
| 
						 | 
					328a20bf9c | ||
| 
						 | 
					5622f29ae4 | ||
| 
						 | 
					b4f23afbd1 | ||
| 
						 | 
					0138968a6a | ||
| 
						 | 
					c3f0b12b0f | ||
| 
						 | 
					4f31d0f2b7 | ||
| 
						 | 
					bff74bdd1a | ||
| 
						 | 
					10b04ff7f4 | ||
| 
						 | 
					1f7ccb9014 | ||
| 
						 | 
					c7b3209668 | ||
| 
						 | 
					895ba7d1dd | ||
| 
						 | 
					a2a1b0baa2 | ||
| 
						 | 
					8646eb790e | ||
| 
						 | 
					27ace98f51 | ||
| 
						 | 
					a00d73c8c8 | ||
| 
						 | 
					f036a6328e | ||
| 
						 | 
					31bb8d3f51 | ||
| 
						 | 
					4958ae2058 | ||
| 
						 | 
					7e8d73c183 | ||
| 
						 | 
					65bc504db8 | ||
| 
						 | 
					0fc74a0d91 | ||
| 
						 | 
					8d2cc6fbb1 | ||
| 
						 | 
					a954584f63 | ||
| 
						 | 
					cb3ff6fb01 | ||
| 
						 | 
					71aa656d13 | ||
| 
						 | 
					366b1f3cfe | ||
| 
						 | 
					64ce58db38 | ||
| 
						 | 
					11b85ce62e | ||
| 
						 | 
					1220352ff7 | ||
| 
						 | 
					8f3034d871 | ||
| 
						 | 
					7fa547ab02 | ||
| 
						 | 
					3182f3e2dc | ||
| 
						 | 
					cbf915f3f6 | ||
| 
						 | 
					b490b8849a | ||
| 
						 | 
					5d2519e5bf | ||
| 
						 | 
					c3415d1bac | ||
| 
						 | 
					36f3542883 | ||
| 
						 | 
					4cb71e9b6a | ||
| 
						 | 
					4bc7009e8a | ||
| 
						 | 
					16f8e9df8a | ||
| 
						 | 
					b081cebefa | ||
| 
						 | 
					916c145217 | ||
| 
						 | 
					4192b51c7c | ||
| 
						 | 
					052421ff09 | ||
| 
						 | 
					4e99f48817 | ||
| 
						 | 
					a11165ecc6 | ||
| 
						 | 
					fbb2fc5580 | ||
| 
						 | 
					2fe3d240cc | ||
| 
						 | 
					42f4dcfe41 | ||
| 
						 | 
					892e3192fb | ||
| 
						 | 
					7272eab9d0 | ||
| 
						 | 
					ebe832dc37 | ||
| 
						 | 
					825abb8175 | ||
| 
						 | 
					8944ec0109 | ||
| 
						 | 
					c084c93402 | ||
| 
						 | 
					8c778adc39 | ||
| 
						 | 
					71b6065009 | ||
| 
						 | 
					7e660ac113 | ||
| 
						 | 
					d799b47b82 | ||
| 
						 | 
					b7f8116406 | ||
| 
						 | 
					6db274e057 | ||
| 
						 | 
					0c92b57398 | ||
| 
						 | 
					becafcbf0f | ||
| 
						 | 
					92a86f4c1a | ||
| 
						 | 
					dfe029a62c | ||
| 
						 | 
					b0472057a3 | ||
| 
						 | 
					c081b35c27 | ||
| 
						 | 
					9f43890bcd | ||
| 
						 | 
					94a20aa5f8 | ||
| 
						 | 
					94e8df3a7e | ||
| 
						 | 
					37e64addc8 | ||
| 
						 | 
					d82ba23ba5 | ||
| 
						 | 
					0fd7fd71b4 | ||
| 
						 | 
					eae12e3fe3 | ||
| 
						 | 
					798a2cad4f | ||
| 
						 | 
					41c0849429 | ||
| 
						 | 
					a4e5af1184 | ||
| 
						 | 
					b090af5922 | ||
| 
						 | 
					388841f819 | ||
| 
						 | 
					1a2ecbfbc4 | ||
| 
						 | 
					38e292b112 | ||
| 
						 | 
					c4f731262d | ||
| 
						 | 
					07cc63f386 | ||
| 
						 | 
					e42a692f00 | ||
| 
						 | 
					6ec7538bb4 | ||
| 
						 | 
					2871d489a9 | ||
| 
						 | 
					1771ddd85d | ||
| 
						 | 
					5198bf68fc | ||
| 
						 | 
					e00fc35dbe | ||
| 
						 | 
					8904e979df | ||
| 
						 | 
					53eb217661 | ||
| 
						 | 
					9dcb8f3fc7 | ||
| 
						 | 
					1e8ac8364b | ||
| 
						 | 
					754d8a035e | ||
| 
						 | 
					f1f725c6a0 | ||
| 
						 | 
					06c155420f | ||
| 
						 | 
					7dabd2ac45 | ||
| 
						 | 
					df8ba0d2cf | ||
| 
						 | 
					ff1956e07b | ||
| 
						 | 
					caf5a8817b | ||
| 
						 | 
					a850fde1d8 | ||
| 
						 | 
					0e6ebc13d1 | ||
| 
						 | 
					6f5342a201 | ||
| 
						 | 
					264a7044f5 | ||
| 
						 | 
					1a30deca50 | ||
| 
						 | 
					d8624e6a80 | ||
| 
						 | 
					4f95d455ed | ||
| 
						 | 
					468d19a9c1 | ||
| 
						 | 
					9aeaf730ad | ||
| 
						 | 
					db964a33a1 | ||
| 
						 | 
					da8fb85859 | ||
| 
						 | 
					54330a1c3c | ||
| 
						 | 
					9732d77ed2 | ||
| 
						 | 
					199ece7eb8 | ||
| 
						 | 
					1997eb0078 | ||
| 
						 | 
					eef4a7a304 | ||
| 
						 | 
					246168bd72 | ||
| 
						 | 
					7fbf54dc62 | ||
| 
						 | 
					351f373865 | ||
| 
						 | 
					72e785f36a | ||
| 
						 | 
					727d2930f2 | ||
| 
						 | 
					c13bf7c836 | ||
| 
						 | 
					8adec2b9e0 | ||
| 
						 | 
					66aa382eae | ||
| 
						 | 
					c065fd35ae | ||
| 
						 | 
					37e3cbe22e | ||
| 
						 | 
					610134730a | ||
| 
						 | 
					212a5e28ba | ||
| 
						 | 
					ee1a7032d5 | ||
| 
						 | 
					7ed806d241 | ||
| 
						 | 
					dd06c95e43 | ||
| 
						 | 
					3442b30ab2 | 
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -26,5 +26,6 @@ updates_key.pem
 | 
			
		||||
*.m4a
 | 
			
		||||
*.m4v
 | 
			
		||||
*.part
 | 
			
		||||
*.swp
 | 
			
		||||
test/testdata
 | 
			
		||||
.tox
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										8
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								Makefile
									
									
									
									
									
								
							@@ -6,10 +6,10 @@ clean:
 | 
			
		||||
cleanall: clean
 | 
			
		||||
	rm -f youtube-dl youtube-dl.exe
 | 
			
		||||
 | 
			
		||||
PREFIX=/usr/local
 | 
			
		||||
BINDIR=$(PREFIX)/bin
 | 
			
		||||
MANDIR=$(PREFIX)/man
 | 
			
		||||
PYTHON=/usr/bin/env python
 | 
			
		||||
PREFIX ?= /usr/local
 | 
			
		||||
BINDIR ?= $(PREFIX)/bin
 | 
			
		||||
MANDIR ?= $(PREFIX)/man
 | 
			
		||||
PYTHON ?= /usr/bin/env python
 | 
			
		||||
 | 
			
		||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
 | 
			
		||||
ifeq ($(PREFIX),/usr)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										178
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										178
									
								
								README.md
									
									
									
									
									
								
							@@ -17,6 +17,14 @@ If you do not have curl, you can alternatively use a recent wget:
 | 
			
		||||
 | 
			
		||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
 | 
			
		||||
 | 
			
		||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
 | 
			
		||||
 | 
			
		||||
    brew install youtube-dl
 | 
			
		||||
 | 
			
		||||
You can also use pip:
 | 
			
		||||
 | 
			
		||||
    sudo pip install youtube-dl
 | 
			
		||||
 | 
			
		||||
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
 | 
			
		||||
 | 
			
		||||
# DESCRIPTION
 | 
			
		||||
@@ -38,12 +46,6 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                                     playlist or the command line) if an error
 | 
			
		||||
                                     occurs
 | 
			
		||||
    --dump-user-agent                display the current browser identification
 | 
			
		||||
    --user-agent UA                  specify a custom user agent
 | 
			
		||||
    --referer REF                    specify a custom referer, use if the video
 | 
			
		||||
                                     access is restricted to one domain
 | 
			
		||||
    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
 | 
			
		||||
                                     separated by a colon ':'. You can use this
 | 
			
		||||
                                     option multiple times
 | 
			
		||||
    --list-extractors                List all supported extractors and the URLs
 | 
			
		||||
                                     they would handle
 | 
			
		||||
    --extractor-descriptions         Output descriptions of all supported
 | 
			
		||||
@@ -51,35 +53,22 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
 | 
			
		||||
                                     an empty string (--proxy "") for direct
 | 
			
		||||
                                     connection
 | 
			
		||||
    --no-check-certificate           Suppress HTTPS certificate validation.
 | 
			
		||||
    --prefer-insecure                Use an unencrypted connection to retrieve
 | 
			
		||||
                                     information about the video. (Currently
 | 
			
		||||
                                     supported only for YouTube)
 | 
			
		||||
    --cache-dir DIR                  Location in the filesystem where youtube-dl
 | 
			
		||||
                                     can store some downloaded information
 | 
			
		||||
                                     permanently. By default $XDG_CACHE_HOME
 | 
			
		||||
                                     /youtube-dl or ~/.cache/youtube-dl . At the
 | 
			
		||||
                                     moment, only YouTube player files (for
 | 
			
		||||
                                     videos with obfuscated signatures) are
 | 
			
		||||
                                     cached, but that may change.
 | 
			
		||||
    --no-cache-dir                   Disable filesystem caching
 | 
			
		||||
    --socket-timeout None            Time to wait before giving up, in seconds
 | 
			
		||||
    --bidi-workaround                Work around terminals that lack
 | 
			
		||||
                                     bidirectional text support. Requires bidiv
 | 
			
		||||
                                     or fribidi executable in PATH
 | 
			
		||||
    --default-search PREFIX          Use this prefix for unqualified URLs. For
 | 
			
		||||
                                     example "gvsearch2:" downloads two videos
 | 
			
		||||
                                     from google videos for  youtube-dl "large
 | 
			
		||||
                                     apple". Use the value "auto" to let
 | 
			
		||||
                                     youtube-dl guess. The default value "error"
 | 
			
		||||
                                     just throws an error.
 | 
			
		||||
                                     youtube-dl guess ("auto_warning" to emit a
 | 
			
		||||
                                     warning when guessing). "error" just throws
 | 
			
		||||
                                     an error. The default value "fixup_error"
 | 
			
		||||
                                     repairs broken URLs, but emits an error if
 | 
			
		||||
                                     this is not possible instead of searching.
 | 
			
		||||
    --ignore-config                  Do not read configuration files. When given
 | 
			
		||||
                                     in the global configuration file /etc
 | 
			
		||||
                                     /youtube-dl.conf: do not read the user
 | 
			
		||||
                                     configuration in ~/.config/youtube-dl.conf
 | 
			
		||||
                                     (%APPDATA%/youtube-dl/config.txt on
 | 
			
		||||
                                     Windows)
 | 
			
		||||
    --encoding ENCODING              Force the specified encoding (experimental)
 | 
			
		||||
 | 
			
		||||
## Video Selection:
 | 
			
		||||
    --playlist-start NUMBER          playlist video to start at (default is 1)
 | 
			
		||||
@@ -125,9 +114,9 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                                     of SIZE.
 | 
			
		||||
 | 
			
		||||
## Filesystem Options:
 | 
			
		||||
    -t, --title                      use title in file name (default)
 | 
			
		||||
    -a, --batch-file FILE            file containing URLs to download ('-' for
 | 
			
		||||
                                     stdin)
 | 
			
		||||
    --id                             use only video ID in file name
 | 
			
		||||
    -l, --literal                    [deprecated] alias of --title
 | 
			
		||||
    -A, --auto-number                number downloaded files starting from 00000
 | 
			
		||||
    -o, --output TEMPLATE            output filename template. Use %(title)s to
 | 
			
		||||
                                     get the title, %(uploader)s for the
 | 
			
		||||
@@ -160,18 +149,15 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
    --restrict-filenames             Restrict filenames to only ASCII
 | 
			
		||||
                                     characters, and avoid "&" and spaces in
 | 
			
		||||
                                     filenames
 | 
			
		||||
    -a, --batch-file FILE            file containing URLs to download ('-' for
 | 
			
		||||
                                     stdin)
 | 
			
		||||
    --load-info FILE                 json file containing the video information
 | 
			
		||||
                                     (created with the "--write-json" option)
 | 
			
		||||
    -t, --title                      [deprecated] use title in file name
 | 
			
		||||
                                     (default)
 | 
			
		||||
    -l, --literal                    [deprecated] alias of --title
 | 
			
		||||
    -w, --no-overwrites              do not overwrite files
 | 
			
		||||
    -c, --continue                   force resume of partially downloaded files.
 | 
			
		||||
                                     By default, youtube-dl will resume
 | 
			
		||||
                                     downloads if possible.
 | 
			
		||||
    --no-continue                    do not resume partially downloaded files
 | 
			
		||||
                                     (restart from beginning)
 | 
			
		||||
    --cookies FILE                   file to read cookies from and dump cookie
 | 
			
		||||
                                     jar in
 | 
			
		||||
    --no-part                        do not use .part files
 | 
			
		||||
    --no-mtime                       do not use the Last-modified header to set
 | 
			
		||||
                                     the file modification time
 | 
			
		||||
@@ -181,6 +167,19 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
    --write-annotations              write video annotations to a .annotation
 | 
			
		||||
                                     file
 | 
			
		||||
    --write-thumbnail                write thumbnail image to disk
 | 
			
		||||
    --load-info FILE                 json file containing the video information
 | 
			
		||||
                                     (created with the "--write-json" option)
 | 
			
		||||
    --cookies FILE                   file to read cookies from and dump cookie
 | 
			
		||||
                                     jar in
 | 
			
		||||
    --cache-dir DIR                  Location in the filesystem where youtube-dl
 | 
			
		||||
                                     can store some downloaded information
 | 
			
		||||
                                     permanently. By default $XDG_CACHE_HOME
 | 
			
		||||
                                     /youtube-dl or ~/.cache/youtube-dl . At the
 | 
			
		||||
                                     moment, only YouTube player files (for
 | 
			
		||||
                                     videos with obfuscated signatures) are
 | 
			
		||||
                                     cached, but that may change.
 | 
			
		||||
    --no-cache-dir                   Disable filesystem caching
 | 
			
		||||
    --rm-cache-dir                   Delete all filesystem cache files
 | 
			
		||||
 | 
			
		||||
## Verbosity / Simulation Options:
 | 
			
		||||
    -q, --quiet                      activates quiet mode
 | 
			
		||||
@@ -210,6 +209,22 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                                     problems
 | 
			
		||||
    --print-traffic                  Display sent and read HTTP traffic
 | 
			
		||||
 | 
			
		||||
## Workarounds:
 | 
			
		||||
    --encoding ENCODING              Force the specified encoding (experimental)
 | 
			
		||||
    --no-check-certificate           Suppress HTTPS certificate validation.
 | 
			
		||||
    --prefer-insecure                Use an unencrypted connection to retrieve
 | 
			
		||||
                                     information about the video. (Currently
 | 
			
		||||
                                     supported only for YouTube)
 | 
			
		||||
    --user-agent UA                  specify a custom user agent
 | 
			
		||||
    --referer REF                    specify a custom referer, use if the video
 | 
			
		||||
                                     access is restricted to one domain
 | 
			
		||||
    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
 | 
			
		||||
                                     separated by a colon ':'. You can use this
 | 
			
		||||
                                     option multiple times
 | 
			
		||||
    --bidi-workaround                Work around terminals that lack
 | 
			
		||||
                                     bidirectional text support. Requires bidiv
 | 
			
		||||
                                     or fribidi executable in PATH
 | 
			
		||||
 | 
			
		||||
## Video Format Options:
 | 
			
		||||
    -f, --format FORMAT              video format code, specify the order of
 | 
			
		||||
                                     preference using slashes: "-f 22/17/18".
 | 
			
		||||
@@ -240,6 +255,7 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
## Authentication Options:
 | 
			
		||||
    -u, --username USERNAME          account username
 | 
			
		||||
    -p, --password PASSWORD          account password
 | 
			
		||||
    -2, --twofactor TWOFACTOR        two-factor auth code
 | 
			
		||||
    -n, --netrc                      use .netrc authentication data
 | 
			
		||||
    --video-password PASSWORD        video password (vimeo, smotri)
 | 
			
		||||
 | 
			
		||||
@@ -272,6 +288,10 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                                     postprocessors (default)
 | 
			
		||||
    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
 | 
			
		||||
                                     postprocessors
 | 
			
		||||
    --exec CMD                       Execute a command on the file after
 | 
			
		||||
                                     downloading, similar to find's -exec
 | 
			
		||||
                                     syntax. Example: --exec 'adb push {}
 | 
			
		||||
                                     /sdcard/Music/ && rm {}'
 | 
			
		||||
 | 
			
		||||
# CONFIGURATION
 | 
			
		||||
 | 
			
		||||
@@ -296,10 +316,12 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
 | 
			
		||||
 | 
			
		||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
 | 
			
		||||
 | 
			
		||||
    $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
 | 
			
		||||
    youtube-dl test video ''_ä↭𝕐.mp4    # All kinds of weird characters
 | 
			
		||||
    $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
 | 
			
		||||
    youtube-dl_test_video_.mp4          # A simple file name
 | 
			
		||||
```bash
 | 
			
		||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
 | 
			
		||||
youtube-dl test video ''_ä↭𝕐.mp4    # All kinds of weird characters
 | 
			
		||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
 | 
			
		||||
youtube-dl_test_video_.mp4          # A simple file name
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
# VIDEO SELECTION
 | 
			
		||||
 | 
			
		||||
@@ -310,14 +332,16 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
 | 
			
		||||
 
 | 
			
		||||
Examples:
 | 
			
		||||
 | 
			
		||||
    # Download only the videos uploaded in the last 6 months
 | 
			
		||||
    $ youtube-dl --dateafter now-6months
 | 
			
		||||
```bash
 | 
			
		||||
# Download only the videos uploaded in the last 6 months
 | 
			
		||||
$ youtube-dl --dateafter now-6months
 | 
			
		||||
 | 
			
		||||
    # Download only the videos uploaded on January 1, 1970
 | 
			
		||||
    $ youtube-dl --date 19700101
 | 
			
		||||
# Download only the videos uploaded on January 1, 1970
 | 
			
		||||
$ youtube-dl --date 19700101
 | 
			
		||||
 | 
			
		||||
    $ # will only download the videos uploaded in the 200x decade
 | 
			
		||||
    $ youtube-dl --dateafter 20000101 --datebefore 20091231
 | 
			
		||||
$ # will only download the videos uploaded in the 200x decade
 | 
			
		||||
$ youtube-dl --dateafter 20000101 --datebefore 20091231
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
# FAQ
 | 
			
		||||
 | 
			
		||||
@@ -392,49 +416,49 @@ If you want to add support for a new site, you can follow this quick list (assum
 | 
			
		||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
 | 
			
		||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
 | 
			
		||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
 | 
			
		||||
    ```python
 | 
			
		||||
    # coding: utf-8
 | 
			
		||||
    from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
        # coding: utf-8
 | 
			
		||||
        from __future__ import unicode_literals
 | 
			
		||||
    import re
 | 
			
		||||
 | 
			
		||||
        import re
 | 
			
		||||
    from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
        from .common import InfoExtractor
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
        class YourExtractorIE(InfoExtractor):
 | 
			
		||||
            _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
 | 
			
		||||
            _TEST = {
 | 
			
		||||
                'url': 'http://yourextractor.com/watch/42',
 | 
			
		||||
                'md5': 'TODO: md5 sum of the first 10KiB of the video file',
 | 
			
		||||
                'info_dict': {
 | 
			
		||||
                    'id': '42',
 | 
			
		||||
                    'ext': 'mp4',
 | 
			
		||||
                    'title': 'Video title goes here',
 | 
			
		||||
                    # TODO more properties, either as:
 | 
			
		||||
                    # * A value
 | 
			
		||||
                    # * MD5 checksum; start the string with md5:
 | 
			
		||||
                    # * A regular expression; start the string with re:
 | 
			
		||||
                    # * Any Python type (for example int or float)
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
    class YourExtractorIE(InfoExtractor):
 | 
			
		||||
        _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
 | 
			
		||||
        _TEST = {
 | 
			
		||||
            'url': 'http://yourextractor.com/watch/42',
 | 
			
		||||
            'md5': 'TODO: md5 sum of the first 10KiB of the video file',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '42',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Video title goes here',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
                # TODO more properties, either as:
 | 
			
		||||
                # * A value
 | 
			
		||||
                # * MD5 checksum; start the string with md5:
 | 
			
		||||
                # * A regular expression; start the string with re:
 | 
			
		||||
                # * Any Python type (for example int or float)
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
            def _real_extract(self, url):
 | 
			
		||||
                mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
                video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
                # TODO more code goes here, for example ...
 | 
			
		||||
                webpage = self._download_webpage(url, video_id)
 | 
			
		||||
                title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
 | 
			
		||||
 | 
			
		||||
                return {
 | 
			
		||||
                    'id': video_id,
 | 
			
		||||
                    'title': title,
 | 
			
		||||
                    # TODO more properties (see youtube_dl/extractor/common.py)
 | 
			
		||||
                }
 | 
			
		||||
        def _real_extract(self, url):
 | 
			
		||||
            mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
            video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
            # TODO more code goes here, for example ...
 | 
			
		||||
            webpage = self._download_webpage(url, video_id)
 | 
			
		||||
            title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
 | 
			
		||||
 | 
			
		||||
            return {
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                # TODO more properties (see youtube_dl/extractor/common.py)
 | 
			
		||||
            }
 | 
			
		||||
    ```
 | 
			
		||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
 | 
			
		||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
 | 
			
		||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
 | 
			
		||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
 | 
			
		||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
 | 
			
		||||
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
 | 
			
		||||
 
 | 
			
		||||
@@ -117,8 +117,9 @@ def expect_info_dict(self, expected_dict, got_dict):
 | 
			
		||||
                u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
 | 
			
		||||
 | 
			
		||||
    # Check for the presence of mandatory fields
 | 
			
		||||
    for key in ('id', 'url', 'title', 'ext'):
 | 
			
		||||
        self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
 | 
			
		||||
    if got_dict.get('_type') != 'playlist':
 | 
			
		||||
        for key in ('id', 'url', 'title', 'ext'):
 | 
			
		||||
            self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
 | 
			
		||||
    # Check for mandatory fields that are automatically set by YoutubeDL
 | 
			
		||||
    for key in ['webpage_url', 'extractor', 'extractor_key']:
 | 
			
		||||
        self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
 | 
			
		||||
@@ -137,8 +138,8 @@ def expect_info_dict(self, expected_dict, got_dict):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def assertRegexpMatches(self, text, regexp, msg=None):
 | 
			
		||||
    if hasattr(self, 'assertRegexpMatches'):
 | 
			
		||||
        return self.assertRegexpMatches(text, regexp, msg)
 | 
			
		||||
    if hasattr(self, 'assertRegexp'):
 | 
			
		||||
        return self.assertRegexp(text, regexp, msg)
 | 
			
		||||
    else:
 | 
			
		||||
        m = re.match(regexp, text)
 | 
			
		||||
        if not m:
 | 
			
		||||
@@ -148,3 +149,10 @@ def assertRegexpMatches(self, text, regexp, msg=None):
 | 
			
		||||
            else:
 | 
			
		||||
                msg = note + ', ' + msg
 | 
			
		||||
            self.assertTrue(m, msg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def assertGreaterEqual(self, got, expected, msg=None):
 | 
			
		||||
    if not (got >= expected):
 | 
			
		||||
        if msg is None:
 | 
			
		||||
            msg = '%r not greater than or equal to %r' % (got, expected)
 | 
			
		||||
        self.assertTrue(got >= expected, msg)
 | 
			
		||||
 
 | 
			
		||||
@@ -221,7 +221,7 @@ class TestFormatSelection(unittest.TestCase):
 | 
			
		||||
            '138', '137', '248', '136', '247', '135', '246',
 | 
			
		||||
            '245', '244', '134', '243', '133', '242', '160',
 | 
			
		||||
            # Dash audio
 | 
			
		||||
            '141', '172', '140', '139', '171',
 | 
			
		||||
            '141', '172', '140', '171', '139',
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        for f1id, f2id in zip(order, order[1:]):
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,6 @@ from youtube_dl.extractor import (
 | 
			
		||||
    FacebookIE,
 | 
			
		||||
    gen_extractors,
 | 
			
		||||
    JustinTVIE,
 | 
			
		||||
    PBSIE,
 | 
			
		||||
    YoutubeIE,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -100,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_facebook_matching(self):
 | 
			
		||||
        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
 | 
			
		||||
        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
 | 
			
		||||
 | 
			
		||||
    def test_no_duplicates(self):
 | 
			
		||||
        ies = gen_extractors()
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,6 @@ from test.helper import (
 | 
			
		||||
    get_params,
 | 
			
		||||
    gettestcases,
 | 
			
		||||
    expect_info_dict,
 | 
			
		||||
    md5,
 | 
			
		||||
    try_rm,
 | 
			
		||||
    report_warning,
 | 
			
		||||
)
 | 
			
		||||
@@ -24,7 +23,6 @@ import socket
 | 
			
		||||
import youtube_dl.YoutubeDL
 | 
			
		||||
from youtube_dl.utils import (
 | 
			
		||||
    compat_http_client,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_HTTPError,
 | 
			
		||||
    DownloadError,
 | 
			
		||||
@@ -65,15 +63,21 @@ def generator(test_case):
 | 
			
		||||
    def test_template(self):
 | 
			
		||||
        ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
 | 
			
		||||
        other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
 | 
			
		||||
        is_playlist = any(k.startswith('playlist') for k in test_case)
 | 
			
		||||
        test_cases = test_case.get(
 | 
			
		||||
            'playlist', [] if is_playlist else [test_case])
 | 
			
		||||
 | 
			
		||||
        def print_skipping(reason):
 | 
			
		||||
            print('Skipping %s: %s' % (test_case['name'], reason))
 | 
			
		||||
        if not ie.working():
 | 
			
		||||
            print_skipping('IE marked as not _WORKING')
 | 
			
		||||
            return
 | 
			
		||||
        if 'playlist' not in test_case:
 | 
			
		||||
            info_dict = test_case.get('info_dict', {})
 | 
			
		||||
            if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
 | 
			
		||||
 | 
			
		||||
        for tc in test_cases:
 | 
			
		||||
            info_dict = tc.get('info_dict', {})
 | 
			
		||||
            if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
 | 
			
		||||
                raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
 | 
			
		||||
 | 
			
		||||
        if 'skip' in test_case:
 | 
			
		||||
            print_skipping(test_case['skip'])
 | 
			
		||||
            return
 | 
			
		||||
@@ -83,6 +87,9 @@ def generator(test_case):
 | 
			
		||||
                return
 | 
			
		||||
 | 
			
		||||
        params = get_params(test_case.get('params', {}))
 | 
			
		||||
        if is_playlist and 'playlist' not in test_case:
 | 
			
		||||
            params.setdefault('extract_flat', True)
 | 
			
		||||
            params.setdefault('skip_download', True)
 | 
			
		||||
 | 
			
		||||
        ydl = YoutubeDL(params)
 | 
			
		||||
        ydl.add_default_info_extractors()
 | 
			
		||||
@@ -95,7 +102,6 @@ def generator(test_case):
 | 
			
		||||
        def get_tc_filename(tc):
 | 
			
		||||
            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
 | 
			
		||||
 | 
			
		||||
        test_cases = test_case.get('playlist', [test_case])
 | 
			
		||||
        def try_rm_tcs_files():
 | 
			
		||||
            for tc in test_cases:
 | 
			
		||||
                tc_filename = get_tc_filename(tc)
 | 
			
		||||
@@ -107,7 +113,10 @@ def generator(test_case):
 | 
			
		||||
            try_num = 1
 | 
			
		||||
            while True:
 | 
			
		||||
                try:
 | 
			
		||||
                    ydl.download([test_case['url']])
 | 
			
		||||
                    # We're not using .download here sine that is just a shim
 | 
			
		||||
                    # for outside error handling, and returns the exit code
 | 
			
		||||
                    # instead of the result dict.
 | 
			
		||||
                    res_dict = ydl.extract_info(test_case['url'])
 | 
			
		||||
                except (DownloadError, ExtractorError) as err:
 | 
			
		||||
                    # Check if the exception is not a network related one
 | 
			
		||||
                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
 | 
			
		||||
@@ -123,6 +132,17 @@ def generator(test_case):
 | 
			
		||||
                else:
 | 
			
		||||
                    break
 | 
			
		||||
 | 
			
		||||
            if is_playlist:
 | 
			
		||||
                self.assertEqual(res_dict['_type'], 'playlist')
 | 
			
		||||
                expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
 | 
			
		||||
            if 'playlist_mincount' in test_case:
 | 
			
		||||
                self.assertGreaterEqual(
 | 
			
		||||
                    len(res_dict['entries']),
 | 
			
		||||
                    test_case['playlist_mincount'],
 | 
			
		||||
                    'Expected at least %d in playlist %s, but got only %d' % (
 | 
			
		||||
                        test_case['playlist_mincount'], test_case['url'],
 | 
			
		||||
                        len(res_dict['entries'])))
 | 
			
		||||
 | 
			
		||||
            for tc in test_cases:
 | 
			
		||||
                tc_filename = get_tc_filename(tc)
 | 
			
		||||
                if not test_case.get('params', {}).get('skip_download', False):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,17 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
 | 
			
		||||
## DEPRECATED FILE!
 | 
			
		||||
# Add new tests to the extractors themselves, like this:
 | 
			
		||||
# _TEST = {
 | 
			
		||||
#    'url': 'http://example.com/playlist/42',
 | 
			
		||||
#    'playlist_mincount': 99,
 | 
			
		||||
#    'info_dict': {
 | 
			
		||||
#        'id': '42',
 | 
			
		||||
#        'title': 'Playlist number forty-two',
 | 
			
		||||
#    }
 | 
			
		||||
# }
 | 
			
		||||
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
# Allow direct execution
 | 
			
		||||
@@ -11,6 +22,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
from test.helper import (
 | 
			
		||||
    assertRegexpMatches,
 | 
			
		||||
    assertGreaterEqual,
 | 
			
		||||
    expect_info_dict,
 | 
			
		||||
    FakeYDL,
 | 
			
		||||
)
 | 
			
		||||
@@ -50,6 +62,7 @@ from youtube_dl.extractor import (
 | 
			
		||||
    InstagramUserIE,
 | 
			
		||||
    CSpanIE,
 | 
			
		||||
    AolIE,
 | 
			
		||||
    GameOnePlaylistIE,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -71,8 +84,8 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        ie = DailymotionUserIE(dl)
 | 
			
		||||
        result = ie.extract('https://www.dailymotion.com/user/nqtv')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 100)
 | 
			
		||||
        self.assertEqual(result['title'], 'Rémi Gaillard')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 100)
 | 
			
		||||
 | 
			
		||||
    def test_vimeo_channel(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -111,7 +124,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        ie = VineUserIE(dl)
 | 
			
		||||
        result = ie.extract('https://vine.co/Visa')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 47)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 47)
 | 
			
		||||
 | 
			
		||||
    def test_ustream_channel(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -119,7 +132,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://www.ustream.tv/channel/channeljapan')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], '10874166')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 54)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 54)
 | 
			
		||||
 | 
			
		||||
    def test_soundcloud_set(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -127,7 +140,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['title'], 'The Royal Concept EP')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 6)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 6)
 | 
			
		||||
 | 
			
		||||
    def test_soundcloud_user(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -135,7 +148,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('https://soundcloud.com/the-concept-band')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], '9615865')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 12)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 12)
 | 
			
		||||
 | 
			
		||||
    def test_soundcloud_likes(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -143,7 +156,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('https://soundcloud.com/the-concept-band/likes')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], '9615865')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 1)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 1)
 | 
			
		||||
 | 
			
		||||
    def test_soundcloud_playlist(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -153,7 +166,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(result['id'], '4110309')
 | 
			
		||||
        self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
 | 
			
		||||
        assertRegexpMatches(
 | 
			
		||||
            self, result['description'], r'TILT Brass - Bowery Poetry Club')
 | 
			
		||||
            self, result['description'], r'.*?TILT Brass - Bowery Poetry Club')
 | 
			
		||||
        self.assertEqual(len(result['entries']), 6)
 | 
			
		||||
 | 
			
		||||
    def test_livestream_event(self):
 | 
			
		||||
@@ -162,7 +175,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://new.livestream.com/tedx/cityenglish')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['title'], 'TEDCity2.0 (English)')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 4)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 4)
 | 
			
		||||
 | 
			
		||||
    def test_livestreamoriginal_folder(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -170,7 +183,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 28)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 28)
 | 
			
		||||
 | 
			
		||||
    def test_nhl_videocenter(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -187,15 +200,15 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://bambuser.com/channel/pixelversity')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['title'], 'pixelversity')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 60)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 60)
 | 
			
		||||
 | 
			
		||||
    def test_bandcamp_album(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = BandcampAlbumIE(dl)
 | 
			
		||||
        result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
 | 
			
		||||
        result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['title'], 'Nightmare Night EP')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 4)
 | 
			
		||||
        self.assertEqual(result['title'], 'Hierophany of the Open Grave')
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 9)
 | 
			
		||||
        
 | 
			
		||||
    def test_smotri_community(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -204,7 +217,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'kommuna')
 | 
			
		||||
        self.assertEqual(result['title'], 'КПРФ')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 4)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 4)
 | 
			
		||||
        
 | 
			
		||||
    def test_smotri_user(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -213,7 +226,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'inspector')
 | 
			
		||||
        self.assertEqual(result['title'], 'Inspector')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 9)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 9)
 | 
			
		||||
 | 
			
		||||
    def test_AcademicEarthCourse(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -232,7 +245,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'dvoe_iz_lartsa')
 | 
			
		||||
        self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 24)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 24)
 | 
			
		||||
 | 
			
		||||
    def test_ivi_compilation_season(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -241,7 +254,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
 | 
			
		||||
        self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 12)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 12)
 | 
			
		||||
        
 | 
			
		||||
    def test_imdb_list(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -260,7 +273,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(result['id'], 'cryptography')
 | 
			
		||||
        self.assertEqual(result['title'], 'Journey into cryptography')
 | 
			
		||||
        self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 3)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 3)
 | 
			
		||||
 | 
			
		||||
    def test_EveryonesMixtape(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -277,7 +290,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://rutube.ru/tags/video/1800/')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], '1800')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 68)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 68)
 | 
			
		||||
 | 
			
		||||
    def test_rutube_person(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -285,7 +298,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://rutube.ru/video/person/313878/')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], '313878')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 37)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 37)
 | 
			
		||||
 | 
			
		||||
    def test_multiple_brightcove_videos(self):
 | 
			
		||||
        # https://github.com/rg3/youtube-dl/issues/2283
 | 
			
		||||
@@ -322,7 +335,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], '10')
 | 
			
		||||
        self.assertEqual(result['title'], 'Who are the hackers?')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 6)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 6)
 | 
			
		||||
 | 
			
		||||
    def test_toypics_user(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -330,7 +343,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://videos.toypics.net/Mikey')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'Mikey')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 17)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 17)
 | 
			
		||||
 | 
			
		||||
    def test_xtube_user(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -338,7 +351,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'greenshowers')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 155)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 155)
 | 
			
		||||
 | 
			
		||||
    def test_InstagramUser(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -346,7 +359,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://instagram.com/porsche')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'porsche')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 2)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 2)
 | 
			
		||||
        test_video = next(
 | 
			
		||||
            e for e in result['entries']
 | 
			
		||||
            if e['id'] == '614605558512799803_462752227')
 | 
			
		||||
@@ -385,7 +398,7 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(result['id'], '152147')
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            result['title'], 'Brace Yourself - Today\'s Weirdest News')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 10)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 10)
 | 
			
		||||
 | 
			
		||||
    def test_TeacherTubeUser(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
@@ -393,7 +406,8 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], 'rbhagwati2')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 179)
 | 
			
		||||
        assertGreaterEqual(self, len(result['entries']), 179)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -7,6 +7,7 @@ import unittest
 | 
			
		||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import errno
 | 
			
		||||
import io
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 
 | 
			
		||||
@@ -219,6 +219,7 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(parse_duration('0h0m0s'), 0)
 | 
			
		||||
        self.assertEqual(parse_duration('0m0s'), 0)
 | 
			
		||||
        self.assertEqual(parse_duration('0s'), 0)
 | 
			
		||||
        self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
 | 
			
		||||
 | 
			
		||||
    def test_fix_xml_ampersands(self):
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
@@ -280,7 +281,7 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        d = json.loads(stripped)
 | 
			
		||||
        self.assertEqual(d, [{"id": "532cb", "x": 3}])
 | 
			
		||||
 | 
			
		||||
    def test_uppercase_escpae(self):
 | 
			
		||||
    def test_uppercase_escape(self):
 | 
			
		||||
        self.assertEqual(uppercase_escape(u'aä'), u'aä')
 | 
			
		||||
        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,7 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
# Allow direct execution
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
@@ -16,47 +18,65 @@ from youtube_dl.utils import compat_str, compat_urlretrieve
 | 
			
		||||
 | 
			
		||||
_TESTS = [
 | 
			
		||||
    (
 | 
			
		||||
        u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
 | 
			
		||||
        u'js',
 | 
			
		||||
        'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
 | 
			
		||||
        'js',
 | 
			
		||||
        86,
 | 
			
		||||
        u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
 | 
			
		||||
        '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
 | 
			
		||||
        u'js',
 | 
			
		||||
        'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
 | 
			
		||||
        'js',
 | 
			
		||||
        85,
 | 
			
		||||
        u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
 | 
			
		||||
        '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
 | 
			
		||||
        u'js',
 | 
			
		||||
        'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
 | 
			
		||||
        'js',
 | 
			
		||||
        90,
 | 
			
		||||
        u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
 | 
			
		||||
        ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
 | 
			
		||||
        u'js',
 | 
			
		||||
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
 | 
			
		||||
        'js',
 | 
			
		||||
        84,
 | 
			
		||||
        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
 | 
			
		||||
        'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
 | 
			
		||||
        u'js',
 | 
			
		||||
        u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
 | 
			
		||||
        u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
 | 
			
		||||
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
 | 
			
		||||
        'js',
 | 
			
		||||
        '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
 | 
			
		||||
        'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
 | 
			
		||||
        u'swf',
 | 
			
		||||
        'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
 | 
			
		||||
        'swf',
 | 
			
		||||
        86,
 | 
			
		||||
        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
 | 
			
		||||
        'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
 | 
			
		||||
        u'swf',
 | 
			
		||||
        u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
 | 
			
		||||
        u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
 | 
			
		||||
        'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
 | 
			
		||||
        'swf',
 | 
			
		||||
        'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
 | 
			
		||||
        '9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
 | 
			
		||||
        'js',
 | 
			
		||||
        84,
 | 
			
		||||
        '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
 | 
			
		||||
        'js',
 | 
			
		||||
        83,
 | 
			
		||||
        '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
 | 
			
		||||
    ),
 | 
			
		||||
    (
 | 
			
		||||
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
 | 
			
		||||
        'js',
 | 
			
		||||
        '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
 | 
			
		||||
        '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
 | 
			
		||||
    )
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -69,7 +89,7 @@ class TestSignature(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def make_tfunc(url, stype, sig_input, expected_sig):
 | 
			
		||||
    m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
 | 
			
		||||
    m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
 | 
			
		||||
    assert m, '%r should follow URL format' % url
 | 
			
		||||
    test_id = m.group(1)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +0,0 @@
 | 
			
		||||
# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
 | 
			
		||||
from .downloader import FileDownloader as RealFileDownloader
 | 
			
		||||
from .downloader import get_suitable_downloader
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# This class reproduces the old behaviour of FileDownloader
 | 
			
		||||
class FileDownloader(RealFileDownloader):
 | 
			
		||||
    def _do_download(self, filename, info_dict):
 | 
			
		||||
        real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
 | 
			
		||||
        for ph in self._progress_hooks:
 | 
			
		||||
            real_fd.add_progress_hook(ph)
 | 
			
		||||
        return real_fd.download(filename, info_dict)
 | 
			
		||||
@@ -162,6 +162,7 @@ class YoutubeDL(object):
 | 
			
		||||
    default_search:    Prepend this string if an input url is not valid.
 | 
			
		||||
                       'auto' for elaborate guessing
 | 
			
		||||
    encoding:          Use this encoding instead of the system-specified.
 | 
			
		||||
    extract_flat:      Do not resolve URLs, return the immediate result.
 | 
			
		||||
 | 
			
		||||
    The following parameters are not used by YoutubeDL itself, they are used by
 | 
			
		||||
    the FileDownloader:
 | 
			
		||||
@@ -171,6 +172,7 @@ class YoutubeDL(object):
 | 
			
		||||
    The following options are used by the post processors:
 | 
			
		||||
    prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 | 
			
		||||
                       otherwise prefer avconv.
 | 
			
		||||
    exec_cmd:          Arbitrary command to run after downloading
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    params = None
 | 
			
		||||
@@ -275,7 +277,7 @@ class YoutubeDL(object):
 | 
			
		||||
            return message
 | 
			
		||||
 | 
			
		||||
        assert hasattr(self, '_output_process')
 | 
			
		||||
        assert type(message) == type('')
 | 
			
		||||
        assert isinstance(message, compat_str)
 | 
			
		||||
        line_count = message.count('\n') + 1
 | 
			
		||||
        self._output_process.stdin.write((message + '\n').encode('utf-8'))
 | 
			
		||||
        self._output_process.stdin.flush()
 | 
			
		||||
@@ -303,7 +305,7 @@ class YoutubeDL(object):
 | 
			
		||||
 | 
			
		||||
    def to_stderr(self, message):
 | 
			
		||||
        """Print message to stderr."""
 | 
			
		||||
        assert type(message) == type('')
 | 
			
		||||
        assert isinstance(message, compat_str)
 | 
			
		||||
        if self.params.get('logger'):
 | 
			
		||||
            self.params['logger'].error(message)
 | 
			
		||||
        else:
 | 
			
		||||
@@ -423,7 +425,7 @@ class YoutubeDL(object):
 | 
			
		||||
            autonumber_templ = '%0' + str(autonumber_size) + 'd'
 | 
			
		||||
            template_dict['autonumber'] = autonumber_templ % self._num_downloads
 | 
			
		||||
            if template_dict.get('playlist_index') is not None:
 | 
			
		||||
                template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
 | 
			
		||||
                template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 | 
			
		||||
            if template_dict.get('resolution') is None:
 | 
			
		||||
                if template_dict.get('width') and template_dict.get('height'):
 | 
			
		||||
                    template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 | 
			
		||||
@@ -479,7 +481,10 @@ class YoutubeDL(object):
 | 
			
		||||
                return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 | 
			
		||||
        age_limit = self.params.get('age_limit')
 | 
			
		||||
        if age_limit is not None:
 | 
			
		||||
            if age_limit < info_dict.get('age_limit', 0):
 | 
			
		||||
            actual_age_limit = info_dict.get('age_limit')
 | 
			
		||||
            if actual_age_limit is None:
 | 
			
		||||
                actual_age_limit = 0
 | 
			
		||||
            if age_limit < actual_age_limit:
 | 
			
		||||
                return 'Skipping "' + title + '" because it is age restricted'
 | 
			
		||||
        if self.in_download_archive(info_dict):
 | 
			
		||||
            return '%s has already been recorded in archive' % video_title
 | 
			
		||||
@@ -558,7 +563,12 @@ class YoutubeDL(object):
 | 
			
		||||
        Returns the resolved ie_result.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 | 
			
		||||
        result_type = ie_result.get('_type', 'video')
 | 
			
		||||
 | 
			
		||||
        if self.params.get('extract_flat', False):
 | 
			
		||||
            if result_type in ('url', 'url_transparent'):
 | 
			
		||||
                return ie_result
 | 
			
		||||
 | 
			
		||||
        if result_type == 'video':
 | 
			
		||||
            self.add_extra_info(ie_result, extra_info)
 | 
			
		||||
            return self.process_video_result(ie_result, download=download)
 | 
			
		||||
@@ -627,6 +637,7 @@ class YoutubeDL(object):
 | 
			
		||||
            for i, entry in enumerate(entries, 1):
 | 
			
		||||
                self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 | 
			
		||||
                extra = {
 | 
			
		||||
                    'n_entries': n_entries,
 | 
			
		||||
                    'playlist': playlist,
 | 
			
		||||
                    'playlist_index': i + playliststart,
 | 
			
		||||
                    'extractor': ie_result['extractor'],
 | 
			
		||||
@@ -849,7 +860,7 @@ class YoutubeDL(object):
 | 
			
		||||
        # Keep for backwards compatibility
 | 
			
		||||
        info_dict['stitle'] = info_dict['title']
 | 
			
		||||
 | 
			
		||||
        if not 'format' in info_dict:
 | 
			
		||||
        if 'format' not in info_dict:
 | 
			
		||||
            info_dict['format'] = info_dict['ext']
 | 
			
		||||
 | 
			
		||||
        reason = self._match_entry(info_dict)
 | 
			
		||||
@@ -999,7 +1010,7 @@ class YoutubeDL(object):
 | 
			
		||||
                    if info_dict.get('requested_formats') is not None:
 | 
			
		||||
                        downloaded = []
 | 
			
		||||
                        success = True
 | 
			
		||||
                        merger = FFmpegMergerPP(self)
 | 
			
		||||
                        merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
 | 
			
		||||
                        if not merger._get_executable():
 | 
			
		||||
                            postprocessors = []
 | 
			
		||||
                            self.report_warning('You have requested multiple '
 | 
			
		||||
@@ -1197,6 +1208,10 @@ class YoutubeDL(object):
 | 
			
		||||
            if res:
 | 
			
		||||
                res += ', '
 | 
			
		||||
            res += format_bytes(fdict['filesize'])
 | 
			
		||||
        elif fdict.get('filesize_approx') is not None:
 | 
			
		||||
            if res:
 | 
			
		||||
                res += ', '
 | 
			
		||||
            res += '~' + format_bytes(fdict['filesize_approx'])
 | 
			
		||||
        return res
 | 
			
		||||
 | 
			
		||||
    def list_formats(self, info_dict):
 | 
			
		||||
@@ -1230,14 +1245,18 @@ class YoutubeDL(object):
 | 
			
		||||
        if not self.params.get('verbose'):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        write_string(
 | 
			
		||||
        if type('') is not compat_str:
 | 
			
		||||
            # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
 | 
			
		||||
            self.report_warning(
 | 
			
		||||
                'Your Python is broken! Update to a newer and supported version')
 | 
			
		||||
 | 
			
		||||
        encoding_str = (
 | 
			
		||||
            '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
 | 
			
		||||
                locale.getpreferredencoding(),
 | 
			
		||||
                sys.getfilesystemencoding(),
 | 
			
		||||
                sys.stdout.encoding,
 | 
			
		||||
                self.get_encoding()),
 | 
			
		||||
            encoding=None
 | 
			
		||||
        )
 | 
			
		||||
                self.get_encoding()))
 | 
			
		||||
        write_string(encoding_str, encoding=None)
 | 
			
		||||
 | 
			
		||||
        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
 | 
			
		||||
        try:
 | 
			
		||||
 
 | 
			
		||||
@@ -66,18 +66,25 @@ __authors__  = (
 | 
			
		||||
    'Naglis Jonaitis',
 | 
			
		||||
    'Charles Chen',
 | 
			
		||||
    'Hassaan Ali',
 | 
			
		||||
    'Dobrosław Żybort',
 | 
			
		||||
    'David Fabijan',
 | 
			
		||||
    'Sebastian Haas',
 | 
			
		||||
    'Alexander Kirk',
 | 
			
		||||
    'Erik Johnson',
 | 
			
		||||
    'Keith Beckman',
 | 
			
		||||
    'Ole Ernst',
 | 
			
		||||
    'Aaron McDaniel (mcd1992)',
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
__license__ = 'Public Domain'
 | 
			
		||||
 | 
			
		||||
import codecs
 | 
			
		||||
import io
 | 
			
		||||
import locale
 | 
			
		||||
import optparse
 | 
			
		||||
import os
 | 
			
		||||
import random
 | 
			
		||||
import re
 | 
			
		||||
import shlex
 | 
			
		||||
import shutil
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -99,7 +106,7 @@ from .utils import (
 | 
			
		||||
    write_string,
 | 
			
		||||
)
 | 
			
		||||
from .update import update_self
 | 
			
		||||
from .FileDownloader import (
 | 
			
		||||
from .downloader import (
 | 
			
		||||
    FileDownloader,
 | 
			
		||||
)
 | 
			
		||||
from .extractor import gen_extractors
 | 
			
		||||
@@ -113,6 +120,7 @@ from .postprocessor import (
 | 
			
		||||
    FFmpegExtractAudioPP,
 | 
			
		||||
    FFmpegEmbedSubtitlePP,
 | 
			
		||||
    XAttrMetadataPP,
 | 
			
		||||
    ExecAfterDownloadPP,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -224,6 +232,7 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    downloader     = optparse.OptionGroup(parser, 'Download Options')
 | 
			
		||||
    postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
 | 
			
		||||
    filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
 | 
			
		||||
    workarounds    = optparse.OptionGroup(parser, 'Workarounds')
 | 
			
		||||
    verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
 | 
			
		||||
 | 
			
		||||
    general.add_option('-h', '--help',
 | 
			
		||||
@@ -240,14 +249,6 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    general.add_option('--dump-user-agent',
 | 
			
		||||
            action='store_true', dest='dump_user_agent',
 | 
			
		||||
            help='display the current browser identification', default=False)
 | 
			
		||||
    general.add_option('--user-agent',
 | 
			
		||||
            dest='user_agent', help='specify a custom user agent', metavar='UA')
 | 
			
		||||
    general.add_option('--referer',
 | 
			
		||||
            dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
 | 
			
		||||
            metavar='REF', default=None)
 | 
			
		||||
    general.add_option('--add-header',
 | 
			
		||||
            dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
 | 
			
		||||
            metavar='FIELD:VALUE')
 | 
			
		||||
    general.add_option('--list-extractors',
 | 
			
		||||
            action='store_true', dest='list_extractors',
 | 
			
		||||
            help='List all supported extractors and the URLs they would handle', default=False)
 | 
			
		||||
@@ -257,33 +258,17 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--proxy', dest='proxy', default=None, metavar='URL',
 | 
			
		||||
        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
 | 
			
		||||
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
 | 
			
		||||
        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
 | 
			
		||||
        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
 | 
			
		||||
        help='Disable filesystem caching')
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--socket-timeout', dest='socket_timeout',
 | 
			
		||||
        type=float, default=None, help=u'Time to wait before giving up, in seconds')
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
 | 
			
		||||
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--default-search',
 | 
			
		||||
        dest='default_search', metavar='PREFIX',
 | 
			
		||||
        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
 | 
			
		||||
        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--ignore-config',
 | 
			
		||||
        action='store_true',
 | 
			
		||||
        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
 | 
			
		||||
    general.add_option(
 | 
			
		||||
        '--encoding', dest='encoding', metavar='ENCODING',
 | 
			
		||||
        help='Force the specified encoding (experimental)')
 | 
			
		||||
 | 
			
		||||
    selection.add_option(
 | 
			
		||||
        '--playlist-start',
 | 
			
		||||
@@ -335,6 +320,8 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
            dest='username', metavar='USERNAME', help='account username')
 | 
			
		||||
    authentication.add_option('-p', '--password',
 | 
			
		||||
            dest='password', metavar='PASSWORD', help='account password')
 | 
			
		||||
    authentication.add_option('-2', '--twofactor',
 | 
			
		||||
            dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code')
 | 
			
		||||
    authentication.add_option('-n', '--netrc',
 | 
			
		||||
            action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
 | 
			
		||||
    authentication.add_option('--video-password',
 | 
			
		||||
@@ -384,6 +371,33 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
 | 
			
		||||
    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
 | 
			
		||||
 | 
			
		||||
    workarounds.add_option(
 | 
			
		||||
        '--encoding', dest='encoding', metavar='ENCODING',
 | 
			
		||||
        help='Force the specified encoding (experimental)')
 | 
			
		||||
    workarounds.add_option(
 | 
			
		||||
        '--no-check-certificate', action='store_true',
 | 
			
		||||
        dest='no_check_certificate', default=False,
 | 
			
		||||
        help='Suppress HTTPS certificate validation.')
 | 
			
		||||
    workarounds.add_option(
 | 
			
		||||
        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
 | 
			
		||||
        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
 | 
			
		||||
    workarounds.add_option(
 | 
			
		||||
        '--user-agent', metavar='UA',
 | 
			
		||||
        dest='user_agent', help='specify a custom user agent')
 | 
			
		||||
    workarounds.add_option(
 | 
			
		||||
        '--referer', metavar='REF',
 | 
			
		||||
        dest='referer', default=None,
 | 
			
		||||
        help='specify a custom referer, use if the video access is restricted to one domain',
 | 
			
		||||
    )
 | 
			
		||||
    workarounds.add_option(
 | 
			
		||||
        '--add-header', metavar='FIELD:VALUE',
 | 
			
		||||
        dest='headers', action='append',
 | 
			
		||||
        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
 | 
			
		||||
    )
 | 
			
		||||
    workarounds.add_option(
 | 
			
		||||
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
 | 
			
		||||
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
 | 
			
		||||
 | 
			
		||||
    verbosity.add_option('-q', '--quiet',
 | 
			
		||||
            action='store_true', dest='quiet', help='activates quiet mode', default=False)
 | 
			
		||||
    verbosity.add_option(
 | 
			
		||||
@@ -441,12 +455,10 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
            help='Display sent and read HTTP traffic')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    filesystem.add_option('-t', '--title',
 | 
			
		||||
            action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
 | 
			
		||||
    filesystem.add_option('-a', '--batch-file',
 | 
			
		||||
            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
 | 
			
		||||
    filesystem.add_option('--id',
 | 
			
		||||
            action='store_true', dest='useid', help='use only video ID in file name', default=False)
 | 
			
		||||
    filesystem.add_option('-l', '--literal',
 | 
			
		||||
            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
 | 
			
		||||
    filesystem.add_option('-A', '--auto-number',
 | 
			
		||||
            action='store_true', dest='autonumber',
 | 
			
		||||
            help='number downloaded files starting from 00000', default=False)
 | 
			
		||||
@@ -472,11 +484,10 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    filesystem.add_option('--restrict-filenames',
 | 
			
		||||
            action='store_true', dest='restrictfilenames',
 | 
			
		||||
            help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
 | 
			
		||||
    filesystem.add_option('-a', '--batch-file',
 | 
			
		||||
            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
 | 
			
		||||
    filesystem.add_option('--load-info',
 | 
			
		||||
            dest='load_info_filename', metavar='FILE',
 | 
			
		||||
            help='json file containing the video information (created with the "--write-json" option)')
 | 
			
		||||
    filesystem.add_option('-t', '--title',
 | 
			
		||||
            action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
 | 
			
		||||
    filesystem.add_option('-l', '--literal',
 | 
			
		||||
            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
 | 
			
		||||
    filesystem.add_option('-w', '--no-overwrites',
 | 
			
		||||
            action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
 | 
			
		||||
    filesystem.add_option('-c', '--continue',
 | 
			
		||||
@@ -484,8 +495,6 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    filesystem.add_option('--no-continue',
 | 
			
		||||
            action='store_false', dest='continue_dl',
 | 
			
		||||
            help='do not resume partially downloaded files (restart from beginning)')
 | 
			
		||||
    filesystem.add_option('--cookies',
 | 
			
		||||
            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
 | 
			
		||||
    filesystem.add_option('--no-part',
 | 
			
		||||
            action='store_true', dest='nopart', help='do not use .part files', default=False)
 | 
			
		||||
    filesystem.add_option('--no-mtime',
 | 
			
		||||
@@ -503,6 +512,20 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    filesystem.add_option('--write-thumbnail',
 | 
			
		||||
            action='store_true', dest='writethumbnail',
 | 
			
		||||
            help='write thumbnail image to disk', default=False)
 | 
			
		||||
    filesystem.add_option('--load-info',
 | 
			
		||||
            dest='load_info_filename', metavar='FILE',
 | 
			
		||||
            help='json file containing the video information (created with the "--write-json" option)')
 | 
			
		||||
    filesystem.add_option('--cookies',
 | 
			
		||||
            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
 | 
			
		||||
    filesystem.add_option(
 | 
			
		||||
        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
 | 
			
		||||
        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
 | 
			
		||||
    filesystem.add_option(
 | 
			
		||||
        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
 | 
			
		||||
        help='Disable filesystem caching')
 | 
			
		||||
    filesystem.add_option(
 | 
			
		||||
        '--rm-cache-dir', action='store_true', dest='rm_cachedir',
 | 
			
		||||
        help='Delete all filesystem cache files')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
 | 
			
		||||
@@ -529,13 +552,16 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
        help='Prefer avconv over ffmpeg for running the postprocessors (default)')
 | 
			
		||||
    postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
 | 
			
		||||
        help='Prefer ffmpeg over avconv for running the postprocessors')
 | 
			
		||||
 | 
			
		||||
    postproc.add_option(
 | 
			
		||||
        '--exec', metavar='CMD', dest='exec_cmd',
 | 
			
		||||
        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
 | 
			
		||||
 | 
			
		||||
    parser.add_option_group(general)
 | 
			
		||||
    parser.add_option_group(selection)
 | 
			
		||||
    parser.add_option_group(downloader)
 | 
			
		||||
    parser.add_option_group(filesystem)
 | 
			
		||||
    parser.add_option_group(verbosity)
 | 
			
		||||
    parser.add_option_group(workarounds)
 | 
			
		||||
    parser.add_option_group(video_format)
 | 
			
		||||
    parser.add_option_group(subtitles)
 | 
			
		||||
    parser.add_option_group(authentication)
 | 
			
		||||
@@ -635,7 +661,7 @@ def _real_main(argv=None):
 | 
			
		||||
            if desc is False:
 | 
			
		||||
                continue
 | 
			
		||||
            if hasattr(ie, 'SEARCH_KEY'):
 | 
			
		||||
                _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
 | 
			
		||||
                _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny')
 | 
			
		||||
                _COUNTS = (u'', u'5', u'10', u'all')
 | 
			
		||||
                desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
 | 
			
		||||
            compat_print(desc)
 | 
			
		||||
@@ -696,7 +722,7 @@ def _real_main(argv=None):
 | 
			
		||||
        date = DateRange.day(opts.date)
 | 
			
		||||
    else:
 | 
			
		||||
        date = DateRange(opts.dateafter, opts.datebefore)
 | 
			
		||||
    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
 | 
			
		||||
    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
 | 
			
		||||
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
 | 
			
		||||
 | 
			
		||||
    # Do not download videos when there are audio-only formats
 | 
			
		||||
@@ -732,6 +758,7 @@ def _real_main(argv=None):
 | 
			
		||||
        'usenetrc': opts.usenetrc,
 | 
			
		||||
        'username': opts.username,
 | 
			
		||||
        'password': opts.password,
 | 
			
		||||
        'twofactor': opts.twofactor,
 | 
			
		||||
        'videopassword': opts.videopassword,
 | 
			
		||||
        'quiet': (opts.quiet or any_printing),
 | 
			
		||||
        'no_warnings': opts.no_warnings,
 | 
			
		||||
@@ -808,6 +835,7 @@ def _real_main(argv=None):
 | 
			
		||||
        'default_search': opts.default_search,
 | 
			
		||||
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
 | 
			
		||||
        'encoding': opts.encoding,
 | 
			
		||||
        'exec_cmd': opts.exec_cmd,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    with YoutubeDL(ydl_opts) as ydl:
 | 
			
		||||
@@ -831,13 +859,37 @@ def _real_main(argv=None):
 | 
			
		||||
                ydl.add_post_processor(FFmpegAudioFixPP())
 | 
			
		||||
            ydl.add_post_processor(AtomicParsleyPP())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
 | 
			
		||||
        # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
 | 
			
		||||
        if opts.exec_cmd:
 | 
			
		||||
            ydl.add_post_processor(ExecAfterDownloadPP(
 | 
			
		||||
                verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
 | 
			
		||||
 | 
			
		||||
        # Update version
 | 
			
		||||
        if opts.update_self:
 | 
			
		||||
            update_self(ydl.to_screen, opts.verbose)
 | 
			
		||||
 | 
			
		||||
        # Remove cache dir
 | 
			
		||||
        if opts.rm_cachedir:
 | 
			
		||||
            if opts.cachedir is None:
 | 
			
		||||
                ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
 | 
			
		||||
            else:
 | 
			
		||||
                if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
 | 
			
		||||
                    ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
 | 
			
		||||
                    retcode = 141
 | 
			
		||||
                else:
 | 
			
		||||
                    ydl.to_screen(
 | 
			
		||||
                        u'Removing cache dir %s .' % opts.cachedir,
 | 
			
		||||
                        skip_eol=True)
 | 
			
		||||
                    if os.path.exists(opts.cachedir):
 | 
			
		||||
                        ydl.to_screen(u'.', skip_eol=True)
 | 
			
		||||
                        shutil.rmtree(opts.cachedir)
 | 
			
		||||
                    ydl.to_screen(u'.')
 | 
			
		||||
 | 
			
		||||
        # Maybe do nothing
 | 
			
		||||
        if (len(all_urls) < 1) and (opts.load_info_filename is None):
 | 
			
		||||
            if not opts.update_self:
 | 
			
		||||
            if not (opts.update_self or opts.rm_cachedir):
 | 
			
		||||
                parser.error(u'you must provide at least one URL')
 | 
			
		||||
            else:
 | 
			
		||||
                sys.exit()
 | 
			
		||||
 
 | 
			
		||||
@@ -292,7 +292,7 @@ class FileDownloader(object):
 | 
			
		||||
 | 
			
		||||
    def real_download(self, filename, info_dict):
 | 
			
		||||
        """Real download process. Redefine in subclasses."""
 | 
			
		||||
        raise NotImplementedError(u'This method must be implemented by sublcasses')
 | 
			
		||||
        raise NotImplementedError(u'This method must be implemented by subclasses')
 | 
			
		||||
 | 
			
		||||
    def _hook_progress(self, status):
 | 
			
		||||
        for ph in self._progress_hooks:
 | 
			
		||||
 
 | 
			
		||||
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
 | 
			
		||||
 | 
			
		||||
    def real_download(self, filename, info_dict):
 | 
			
		||||
        man_url = info_dict['url']
 | 
			
		||||
        requested_bitrate = info_dict.get('tbr')
 | 
			
		||||
        self.to_screen('[download] Downloading f4m manifest')
 | 
			
		||||
        manifest = self.ydl.urlopen(man_url).read()
 | 
			
		||||
        self.report_destination(filename)
 | 
			
		||||
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
 | 
			
		||||
 | 
			
		||||
        doc = etree.fromstring(manifest)
 | 
			
		||||
        formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
 | 
			
		||||
        formats = sorted(formats, key=lambda f: f[0])
 | 
			
		||||
        rate, media = formats[-1]
 | 
			
		||||
        if requested_bitrate is None:
 | 
			
		||||
            # get the best format
 | 
			
		||||
            formats = sorted(formats, key=lambda f: f[0])
 | 
			
		||||
            rate, media = formats[-1]
 | 
			
		||||
        else:
 | 
			
		||||
            rate, media = list(filter(
 | 
			
		||||
                lambda f: int(f[0]) == requested_bitrate, formats))[0]
 | 
			
		||||
 | 
			
		||||
        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
 | 
			
		||||
        bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
 | 
			
		||||
        metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
 | 
			
		||||
 
 | 
			
		||||
@@ -27,8 +27,16 @@ class HttpFD(FileDownloader):
 | 
			
		||||
            headers['Youtubedl-user-agent'] = info_dict['user_agent']
 | 
			
		||||
        if 'http_referer' in info_dict:
 | 
			
		||||
            headers['Referer'] = info_dict['http_referer']
 | 
			
		||||
        basic_request = compat_urllib_request.Request(url, None, headers)
 | 
			
		||||
        request = compat_urllib_request.Request(url, None, headers)
 | 
			
		||||
        add_headers = info_dict.get('http_headers')
 | 
			
		||||
        if add_headers:
 | 
			
		||||
            headers.update(add_headers)
 | 
			
		||||
        data = info_dict.get('http_post_data')
 | 
			
		||||
        http_method = info_dict.get('http_method')
 | 
			
		||||
        basic_request = compat_urllib_request.Request(url, data, headers)
 | 
			
		||||
        request = compat_urllib_request.Request(url, data, headers)
 | 
			
		||||
        if http_method is not None:
 | 
			
		||||
            basic_request.get_method = lambda: http_method
 | 
			
		||||
            request.get_method = lambda: http_method
 | 
			
		||||
 | 
			
		||||
        is_test = self.params.get('test', False)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,4 @@
 | 
			
		||||
from .abc import ABCIE
 | 
			
		||||
from .academicearth import AcademicEarthCourseIE
 | 
			
		||||
from .addanime import AddAnimeIE
 | 
			
		||||
from .adultswim import AdultSwimIE
 | 
			
		||||
@@ -68,6 +69,7 @@ from .dfb import DFBIE
 | 
			
		||||
from .dotsub import DotsubIE
 | 
			
		||||
from .dreisat import DreiSatIE
 | 
			
		||||
from .drtv import DRTVIE
 | 
			
		||||
from .dump import DumpIE
 | 
			
		||||
from .defense import DefenseGouvFrIE
 | 
			
		||||
from .discovery import DiscoveryIE
 | 
			
		||||
from .divxstage import DivxStageIE
 | 
			
		||||
@@ -76,12 +78,17 @@ from .ebaumsworld import EbaumsWorldIE
 | 
			
		||||
from .ehow import EHowIE
 | 
			
		||||
from .eighttracks import EightTracksIE
 | 
			
		||||
from .eitb import EitbIE
 | 
			
		||||
from .ellentv import (
 | 
			
		||||
    EllenTVIE,
 | 
			
		||||
    EllenTVClipsIE,
 | 
			
		||||
)
 | 
			
		||||
from .elpais import ElPaisIE
 | 
			
		||||
from .empflix import EmpflixIE
 | 
			
		||||
from .engadget import EngadgetIE
 | 
			
		||||
from .escapist import EscapistIE
 | 
			
		||||
from .everyonesmixtape import EveryonesMixtapeIE
 | 
			
		||||
from .exfm import ExfmIE
 | 
			
		||||
from .expotv import ExpoTVIE
 | 
			
		||||
from .extremetube import ExtremeTubeIE
 | 
			
		||||
from .facebook import FacebookIE
 | 
			
		||||
from .faz import FazIE
 | 
			
		||||
@@ -109,20 +116,27 @@ from .freesound import FreesoundIE
 | 
			
		||||
from .freespeech import FreespeechIE
 | 
			
		||||
from .funnyordie import FunnyOrDieIE
 | 
			
		||||
from .gamekings import GamekingsIE
 | 
			
		||||
from .gameone import GameOneIE
 | 
			
		||||
from .gameone import (
 | 
			
		||||
    GameOneIE,
 | 
			
		||||
    GameOnePlaylistIE,
 | 
			
		||||
)
 | 
			
		||||
from .gamespot import GameSpotIE
 | 
			
		||||
from .gamestar import GameStarIE
 | 
			
		||||
from .gametrailers import GametrailersIE
 | 
			
		||||
from .gdcvault import GDCVaultIE
 | 
			
		||||
from .generic import GenericIE
 | 
			
		||||
from .godtube import GodTubeIE
 | 
			
		||||
from .googleplus import GooglePlusIE
 | 
			
		||||
from .googlesearch import GoogleSearchIE
 | 
			
		||||
from .gorillavid import GorillaVidIE
 | 
			
		||||
from .goshgay import GoshgayIE
 | 
			
		||||
from .grooveshark import GroovesharkIE
 | 
			
		||||
from .hark import HarkIE
 | 
			
		||||
from .helsinki import HelsinkiIE
 | 
			
		||||
from .hentaistigma import HentaiStigmaIE
 | 
			
		||||
from .hotnewhiphop import HotNewHipHopIE
 | 
			
		||||
from .howcast import HowcastIE
 | 
			
		||||
from .howstuffworks import HowStuffWorksIE
 | 
			
		||||
from .huffpost import HuffPostIE
 | 
			
		||||
from .hypem import HypemIE
 | 
			
		||||
from .iconosquare import IconosquareIE
 | 
			
		||||
@@ -140,8 +154,10 @@ from .ivi import (
 | 
			
		||||
    IviIE,
 | 
			
		||||
    IviCompilationIE
 | 
			
		||||
)
 | 
			
		||||
from .izlesene import IzleseneIE
 | 
			
		||||
from .jadorecettepub import JadoreCettePubIE
 | 
			
		||||
from .jeuxvideo import JeuxVideoIE
 | 
			
		||||
from .jove import JoveIE
 | 
			
		||||
from .jukebox import JukeboxIE
 | 
			
		||||
from .justintv import JustinTVIE
 | 
			
		||||
from .jpopsukitv import JpopsukiIE
 | 
			
		||||
@@ -151,6 +167,7 @@ from .khanacademy import KhanAcademyIE
 | 
			
		||||
from .kickstarter import KickStarterIE
 | 
			
		||||
from .keek import KeekIE
 | 
			
		||||
from .kontrtube import KontrTubeIE
 | 
			
		||||
from .krasview import KrasViewIE
 | 
			
		||||
from .ku6 import Ku6IE
 | 
			
		||||
from .la7 import LA7IE
 | 
			
		||||
from .lifenews import LifeNewsIE
 | 
			
		||||
@@ -171,15 +188,19 @@ from .malemotion import MalemotionIE
 | 
			
		||||
from .mdr import MDRIE
 | 
			
		||||
from .metacafe import MetacafeIE
 | 
			
		||||
from .metacritic import MetacriticIE
 | 
			
		||||
from .ministrygrid import MinistryGridIE
 | 
			
		||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
 | 
			
		||||
from .mitele import MiTeleIE
 | 
			
		||||
from .mixcloud import MixcloudIE
 | 
			
		||||
from .mlb import MLBIE
 | 
			
		||||
from .mpora import MporaIE
 | 
			
		||||
from .mofosex import MofosexIE
 | 
			
		||||
from .mojvideo import MojvideoIE
 | 
			
		||||
from .mooshare import MooshareIE
 | 
			
		||||
from .morningstar import MorningstarIE
 | 
			
		||||
from .motherless import MotherlessIE
 | 
			
		||||
from .motorsport import MotorsportIE
 | 
			
		||||
from .movieclips import MovieClipsIE
 | 
			
		||||
from .moviezine import MoviezineIE
 | 
			
		||||
from .movshare import MovShareIE
 | 
			
		||||
from .mtv import (
 | 
			
		||||
@@ -219,12 +240,17 @@ from .nrk import (
 | 
			
		||||
from .ntv import NTVIE
 | 
			
		||||
from .nytimes import NYTimesIE
 | 
			
		||||
from .nuvid import NuvidIE
 | 
			
		||||
from .oe1 import OE1IE
 | 
			
		||||
from .ooyala import OoyalaIE
 | 
			
		||||
from .orf import ORFIE
 | 
			
		||||
from .orf import (
 | 
			
		||||
    ORFTVthekIE,
 | 
			
		||||
    ORFOE1IE,
 | 
			
		||||
    ORFFM4IE,
 | 
			
		||||
)
 | 
			
		||||
from .parliamentliveuk import ParliamentLiveUKIE
 | 
			
		||||
from .patreon import PatreonIE
 | 
			
		||||
from .pbs import PBSIE
 | 
			
		||||
from .photobucket import PhotobucketIE
 | 
			
		||||
from .playfm import PlayFMIE
 | 
			
		||||
from .playvid import PlayvidIE
 | 
			
		||||
from .podomatic import PodomaticIE
 | 
			
		||||
from .pornhd import PornHdIE
 | 
			
		||||
@@ -242,9 +268,10 @@ from .ro220 import Ro220IE
 | 
			
		||||
from .rottentomatoes import RottenTomatoesIE
 | 
			
		||||
from .roxwel import RoxwelIE
 | 
			
		||||
from .rtbf import RTBFIE
 | 
			
		||||
from .rtlnl import RtlXlIE
 | 
			
		||||
from .rtlnow import RTLnowIE
 | 
			
		||||
from .rts import RTSIE
 | 
			
		||||
from .rtve import RTVEALaCartaIE
 | 
			
		||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
 | 
			
		||||
from .ruhd import RUHDIE
 | 
			
		||||
from .rutube import (
 | 
			
		||||
    RutubeIE,
 | 
			
		||||
@@ -255,9 +282,11 @@ from .rutube import (
 | 
			
		||||
from .rutv import RUTVIE
 | 
			
		||||
from .sapo import SapoIE
 | 
			
		||||
from .savefrom import SaveFromIE
 | 
			
		||||
from .sbs import SBSIE
 | 
			
		||||
from .scivee import SciVeeIE
 | 
			
		||||
from .screencast import ScreencastIE
 | 
			
		||||
from .servingsys import ServingSysIE
 | 
			
		||||
from .shared import SharedIE
 | 
			
		||||
from .sina import SinaIE
 | 
			
		||||
from .slideshare import SlideshareIE
 | 
			
		||||
from .slutload import SlutloadIE
 | 
			
		||||
@@ -267,6 +296,8 @@ from .smotri import (
 | 
			
		||||
    SmotriUserIE,
 | 
			
		||||
    SmotriBroadcastIE,
 | 
			
		||||
)
 | 
			
		||||
from .snotr import SnotrIE
 | 
			
		||||
from .sockshare import SockshareIE
 | 
			
		||||
from .sohu import SohuIE
 | 
			
		||||
from .soundcloud import (
 | 
			
		||||
    SoundcloudIE,
 | 
			
		||||
@@ -318,6 +349,8 @@ from .tumblr import TumblrIE
 | 
			
		||||
from .tutv import TutvIE
 | 
			
		||||
from .tvigle import TvigleIE
 | 
			
		||||
from .tvp import TvpIE
 | 
			
		||||
from .tvplay import TVPlayIE
 | 
			
		||||
from .ubu import UbuIE
 | 
			
		||||
from .udemy import (
 | 
			
		||||
    UdemyIE,
 | 
			
		||||
    UdemyCourseIE
 | 
			
		||||
@@ -339,6 +372,7 @@ from .videofyme import VideofyMeIE
 | 
			
		||||
from .videopremium import VideoPremiumIE
 | 
			
		||||
from .videott import VideoTtIE
 | 
			
		||||
from .videoweed import VideoWeedIE
 | 
			
		||||
from .vidme import VidmeIE
 | 
			
		||||
from .vimeo import (
 | 
			
		||||
    VimeoIE,
 | 
			
		||||
    VimeoChannelIE,
 | 
			
		||||
@@ -361,6 +395,7 @@ from .vuclip import VuClipIE
 | 
			
		||||
from .vulture import VultureIE
 | 
			
		||||
from .washingtonpost import WashingtonPostIE
 | 
			
		||||
from .wat import WatIE
 | 
			
		||||
from .wayofthemaster import WayOfTheMasterIE
 | 
			
		||||
from .wdr import (
 | 
			
		||||
    WDRIE,
 | 
			
		||||
    WDRMobileIE,
 | 
			
		||||
@@ -372,6 +407,7 @@ from .wistia import WistiaIE
 | 
			
		||||
from .worldstarhiphop import WorldStarHipHopIE
 | 
			
		||||
from .wrzuta import WrzutaIE
 | 
			
		||||
from .xbef import XBefIE
 | 
			
		||||
from .xboxclips import XboxClipsIE
 | 
			
		||||
from .xhamster import XHamsterIE
 | 
			
		||||
from .xnxx import XNXXIE
 | 
			
		||||
from .xvideos import XVideosIE
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/abc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/abc.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,48 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ABCIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'abc.net.au'
 | 
			
		||||
    _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
 | 
			
		||||
        'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '5624716',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
 | 
			
		||||
            'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        urls_info_json = self._search_regex(
 | 
			
		||||
            r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
 | 
			
		||||
            flags=re.DOTALL)
 | 
			
		||||
        urls_info = json.loads(urls_info_json.replace('\'', '"'))
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'url': url_info['url'],
 | 
			
		||||
            'width': int(url_info['width']),
 | 
			
		||||
            'height': int(url_info['height']),
 | 
			
		||||
            'tbr': int(url_info['bitrate']),
 | 
			
		||||
            'filesize': int(url_info['filesize']),
 | 
			
		||||
        } for url_info in urls_info]
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': self._og_search_title(webpage),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,5 +1,7 @@
 | 
			
		||||
#coding: utf-8
 | 
			
		||||
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
@@ -13,13 +15,14 @@ class AparatIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.aparat.com/v/wP8On',
 | 
			
		||||
        u'file': u'wP8On.mp4',
 | 
			
		||||
        u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"تیم گلکسی 11 - زومیت",
 | 
			
		||||
        'url': 'http://www.aparat.com/v/wP8On',
 | 
			
		||||
        'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'wP8On',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'تیم گلکسی 11 - زومیت',
 | 
			
		||||
        },
 | 
			
		||||
        #u'skip': u'Extremely unreliable',
 | 
			
		||||
        # 'skip': 'Extremely unreliable',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -29,8 +32,8 @@ class AparatIE(InfoExtractor):
 | 
			
		||||
        # Note: There is an easier-to-parse configuration at
 | 
			
		||||
        # http://www.aparat.com/video/video/config/videohash/%video_id
 | 
			
		||||
        # but the URL in there does not work
 | 
			
		||||
        embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
 | 
			
		||||
                     video_id + u'/vt/frame')
 | 
			
		||||
        embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
 | 
			
		||||
                     video_id + '/vt/frame')
 | 
			
		||||
        webpage = self._download_webpage(embed_url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@ import json
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': format_url,
 | 
			
		||||
                    'format': format['type'],
 | 
			
		||||
                    'width': format['width'],
 | 
			
		||||
                    'height': int(format['height']),
 | 
			
		||||
                    'width': int_or_none(format['width']),
 | 
			
		||||
                    'height': int_or_none(format['height']),
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,8 @@ from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    qualities,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -44,8 +46,14 @@ class ARDIE(InfoExtractor):
 | 
			
		||||
        else:
 | 
			
		||||
            video_id = m.group('video_id')
 | 
			
		||||
 | 
			
		||||
        urlp = compat_urllib_parse_urlparse(url)
 | 
			
		||||
        url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
 | 
			
		||||
            raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
 | 
			
		||||
             r'<meta name="dcterms.title" content="(.*?)"/>',
 | 
			
		||||
 
 | 
			
		||||
@@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
 | 
			
		||||
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
 | 
			
		||||
            return any(re.match(r, f['versionCode']) for r in regexes)
 | 
			
		||||
        # Some formats may not be in the same language as the url
 | 
			
		||||
        # TODO: Might want not to drop videos that does not match requested language
 | 
			
		||||
        # but to process those formats with lower precedence
 | 
			
		||||
        formats = filter(_match_lang, all_formats)
 | 
			
		||||
        formats = list(formats) # in python3 filter returns an iterator
 | 
			
		||||
        formats = list(formats)  # in python3 filter returns an iterator
 | 
			
		||||
        if not formats:
 | 
			
		||||
            # Some videos are only available in the 'Originalversion'
 | 
			
		||||
            # they aren't tagged as being in French or German
 | 
			
		||||
            if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
 | 
			
		||||
                formats = all_formats
 | 
			
		||||
            else:
 | 
			
		||||
                raise ExtractorError(u'The formats list is empty')
 | 
			
		||||
            # Sometimes there are neither videos of requested lang code
 | 
			
		||||
            # nor original version videos available
 | 
			
		||||
            # For such cases we just take all_formats as is
 | 
			
		||||
            formats = all_formats
 | 
			
		||||
            if not formats:
 | 
			
		||||
                raise ExtractorError('The formats list is empty')
 | 
			
		||||
 | 
			
		||||
        if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
 | 
			
		||||
            def sort_key(f):
 | 
			
		||||
@@ -173,16 +177,26 @@ class ArteTVPlus7IE(InfoExtractor):
 | 
			
		||||
# It also uses the arte_vp_url url from the webpage to extract the information
 | 
			
		||||
class ArteTVCreativeIE(ArteTVPlus7IE):
 | 
			
		||||
    IE_NAME = 'arte.tv:creative'
 | 
			
		||||
    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
 | 
			
		||||
    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/(?:magazine?/)?(?P<id>[^?#]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '050489-002',
 | 
			
		||||
            'id': '72176',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
 | 
			
		||||
            'title': 'Folge 2 - Corporate Design',
 | 
			
		||||
            'upload_date': '20131004',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '160676',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Monty Python live (mostly)',
 | 
			
		||||
            'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
 | 
			
		||||
            'upload_date': '20140805',
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ArteTVFutureIE(ArteTVPlus7IE):
 | 
			
		||||
 
 | 
			
		||||
@@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor):
 | 
			
		||||
                    'height': int(m['h']),
 | 
			
		||||
                })
 | 
			
		||||
            elif m['type'] == 'original':
 | 
			
		||||
                duration = m['d']
 | 
			
		||||
                duration = float(m['d'])
 | 
			
		||||
            elif m['type'] == 'youtube':
 | 
			
		||||
                yt_id = m['link']
 | 
			
		||||
                self.to_screen('Youtube video detected: %s' % yt_id)
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BlipTVIE(SubtitlesInfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
 | 
			
		||||
    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_TESTS]+)))'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
@@ -49,6 +49,21 @@ class BlipTVIE(SubtitlesInfoExtractor):
 | 
			
		||||
                'uploader_id': '792887',
 | 
			
		||||
                'duration': 279,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # https://bugzilla.redhat.com/show_bug.cgi?id=967465
 | 
			
		||||
            'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
 | 
			
		||||
            'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '6573122',
 | 
			
		||||
                'ext': 'mov',
 | 
			
		||||
                'upload_date': '20130520',
 | 
			
		||||
                'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
 | 
			
		||||
                'title': 'Red vs. Blue Season 11 Trailer',
 | 
			
		||||
                'timestamp': 1369029609,
 | 
			
		||||
                'uploader': 'redvsblue',
 | 
			
		||||
                'uploader_id': '792887',
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
@@ -150,7 +165,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BlipTVUserIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
 | 
			
		||||
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
 | 
			
		||||
    _PAGE_SIZE = 12
 | 
			
		||||
    IE_NAME = 'blip.tv:user'
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
 | 
			
		||||
        'md5': '7bf08858ff7c203c870e8a6190e221e5',
 | 
			
		||||
        # The md5 checksum changes
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'qurhIVlJSB6hzkVi229d8g',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
 | 
			
		||||
        return {
 | 
			
		||||
            'id': name.split('-')[-1],
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': f4m_url,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'formats': self._extract_f4m_formats(f4m_url, name),
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -7,12 +7,13 @@ from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BRIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'Bayerischer Rundfunk Mediathek'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
 | 
			
		||||
    _BASE_URL = 'http://www.br.de'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
@@ -22,8 +23,9 @@ class BRIE(InfoExtractor):
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Am 1. und 2. August in Oberammergau',
 | 
			
		||||
                'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
 | 
			
		||||
                'title': 'Wenn das Traditions-Theater wackelt',
 | 
			
		||||
                'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
 | 
			
		||||
                'duration': 34,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
@@ -34,6 +36,7 @@ class BRIE(InfoExtractor):
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Über den Pass',
 | 
			
		||||
                'description': 'Die Eroberung der Alpen: Über den Pass',
 | 
			
		||||
                'duration': 2588,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
@@ -44,6 +47,7 @@ class BRIE(InfoExtractor):
 | 
			
		||||
                'ext': 'aac',
 | 
			
		||||
                'title': '"Keine neuen Schulden im nächsten Jahr"',
 | 
			
		||||
                'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
 | 
			
		||||
                'duration': 64,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
@@ -54,6 +58,7 @@ class BRIE(InfoExtractor):
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Umweltbewusster Häuslebauer',
 | 
			
		||||
                'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
 | 
			
		||||
                'duration': 116,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
@@ -64,6 +69,7 @@ class BRIE(InfoExtractor):
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Folge 1 - Metaphysik',
 | 
			
		||||
                'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
 | 
			
		||||
                'duration': 893,
 | 
			
		||||
                'uploader': 'Eva Maria Steimle',
 | 
			
		||||
                'upload_date': '20140117',
 | 
			
		||||
            }
 | 
			
		||||
@@ -84,6 +90,7 @@ class BRIE(InfoExtractor):
 | 
			
		||||
            media = {
 | 
			
		||||
                'id': xml_media.get('externalId'),
 | 
			
		||||
                'title': xml_media.find('title').text,
 | 
			
		||||
                'duration': parse_duration(xml_media.find('duration').text),
 | 
			
		||||
                'formats': self._extract_formats(xml_media.find('assets')),
 | 
			
		||||
                'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
 | 
			
		||||
                'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
 | 
			
		||||
 
 | 
			
		||||
@@ -154,12 +154,14 @@ class BrightcoveIE(InfoExtractor):
 | 
			
		||||
    def _extract_brightcove_urls(cls, webpage):
 | 
			
		||||
        """Return a list of all Brightcove URLs from the webpage """
 | 
			
		||||
 | 
			
		||||
        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
 | 
			
		||||
        url_m = re.search(
 | 
			
		||||
            r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if url_m:
 | 
			
		||||
            url = unescapeHTML(url_m.group(1))
 | 
			
		||||
            # Some sites don't add it, we can't download with this url, for example:
 | 
			
		||||
            # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
 | 
			
		||||
            if 'playerKey' in url:
 | 
			
		||||
            if 'playerKey' in url or 'videoId' in url:
 | 
			
		||||
                return [url]
 | 
			
		||||
 | 
			
		||||
        matches = re.findall(
 | 
			
		||||
@@ -188,9 +190,13 @@ class BrightcoveIE(InfoExtractor):
 | 
			
		||||
            referer = smuggled_data.get('Referer', url)
 | 
			
		||||
            return self._get_video_info(
 | 
			
		||||
                videoPlayer[0], query_str, query, referer=referer)
 | 
			
		||||
        else:
 | 
			
		||||
        elif 'playerKey' in query:
 | 
			
		||||
            player_key = query['playerKey']
 | 
			
		||||
            return self._get_playlist_info(player_key[0])
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
    def _get_video_info(self, video_id, query_str, query, referer=None):
 | 
			
		||||
        request_url = self._FEDERATED_URL_TEMPLATE % query_str
 | 
			
		||||
@@ -202,6 +208,13 @@ class BrightcoveIE(InfoExtractor):
 | 
			
		||||
            req.add_header('Referer', referer)
 | 
			
		||||
        webpage = self._download_webpage(req, video_id)
 | 
			
		||||
 | 
			
		||||
        error_msg = self._html_search_regex(
 | 
			
		||||
            r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
 | 
			
		||||
            'error message', default=None)
 | 
			
		||||
        if error_msg is not None:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'brightcove said: %s' % error_msg, expected=True)
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
        info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
 | 
			
		||||
        info = json.loads(info)['data']
 | 
			
		||||
 
 | 
			
		||||
@@ -1,24 +1,42 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CBSIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P<id>[^/]+)/.*'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
 | 
			
		||||
        u'file': u'4JUVEwq3wUT7.flv',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Connect Chat feat. Garth Brooks',
 | 
			
		||||
            u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
 | 
			
		||||
            u'duration': 1495,
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '4JUVEwq3wUT7',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Connect Chat feat. Garth Brooks',
 | 
			
		||||
            'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
 | 
			
		||||
            'duration': 1495,
 | 
			
		||||
        },
 | 
			
		||||
        u'params': {
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            u'skip_download': True,
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
        '_skip': 'Blocked outside the US',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'P9gjWjelt6iP',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Live on Letterman - St. Vincent',
 | 
			
		||||
            'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
 | 
			
		||||
            'duration': 3221,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
        '_skip': 'Blocked outside the US',
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
@@ -26,5 +44,5 @@ class CBSIE(InfoExtractor):
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        real_id = self._search_regex(
 | 
			
		||||
            r"video\.settings\.pid\s*=\s*'([^']+)';",
 | 
			
		||||
            webpage, u'real video ID')
 | 
			
		||||
            webpage, 'real video ID')
 | 
			
		||||
        return self.url_result(u'theplatform:%s' % real_id)
 | 
			
		||||
 
 | 
			
		||||
@@ -42,7 +42,7 @@ class ChilloutzoneIE(InfoExtractor):
 | 
			
		||||
            'id': '85523671',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'The Sunday Times - Icons',
 | 
			
		||||
            'description': 'md5:3e1c0dc6047498d6728dcdaad0891762',
 | 
			
		||||
            'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84',
 | 
			
		||||
            'uploader': 'Us',
 | 
			
		||||
            'uploader_id': 'usfilms',
 | 
			
		||||
            'upload_date': '20140131'
 | 
			
		||||
 
 | 
			
		||||
@@ -43,7 +43,11 @@ class CNETIE(InfoExtractor):
 | 
			
		||||
            raise ExtractorError('Cannot find video data')
 | 
			
		||||
 | 
			
		||||
        video_id = vdata['id']
 | 
			
		||||
        title = vdata['headline']
 | 
			
		||||
        title = vdata.get('headline')
 | 
			
		||||
        if title is None:
 | 
			
		||||
            title = vdata.get('title')
 | 
			
		||||
        if title is None:
 | 
			
		||||
            raise ExtractorError('Cannot find title!')
 | 
			
		||||
        description = vdata.get('dek')
 | 
			
		||||
        thumbnail = vdata.get('image', {}).get('path')
 | 
			
		||||
        author = vdata.get('author')
 | 
			
		||||
 
 | 
			
		||||
@@ -18,6 +18,7 @@ from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    compiled_regex_type,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
    sanitize_filename,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
@@ -69,6 +70,7 @@ class InfoExtractor(object):
 | 
			
		||||
                    * vcodec     Name of the video codec in use
 | 
			
		||||
                    * container  Name of the container format
 | 
			
		||||
                    * filesize   The number of bytes, if known in advance
 | 
			
		||||
                    * filesize_approx  An estimate for the number of bytes
 | 
			
		||||
                    * player_url SWF Player URL (used for rtmpdump).
 | 
			
		||||
                    * protocol   The protocol that will be used for the actual
 | 
			
		||||
                                 download, lower-case.
 | 
			
		||||
@@ -82,6 +84,12 @@ class InfoExtractor(object):
 | 
			
		||||
                                 format, irrespective of the file format.
 | 
			
		||||
                                 -1 for default (order by other properties),
 | 
			
		||||
                                 -2 or smaller for less than default.
 | 
			
		||||
                    * http_referer  HTTP Referer header value to set.
 | 
			
		||||
                    * http_method  HTTP method to use for the download.
 | 
			
		||||
                    * http_headers  A dictionary of additional HTTP headers
 | 
			
		||||
                                 to add to the request.
 | 
			
		||||
                    * http_post_data  Additional data to send with a POST
 | 
			
		||||
                                 request.
 | 
			
		||||
    url:            Final video URL.
 | 
			
		||||
    ext:            Video filename extension.
 | 
			
		||||
    format:         The video format, defaults to ext (used for --get-format)
 | 
			
		||||
@@ -300,8 +308,12 @@ class InfoExtractor(object):
 | 
			
		||||
    def _download_json(self, url_or_request, video_id,
 | 
			
		||||
                       note=u'Downloading JSON metadata',
 | 
			
		||||
                       errnote=u'Unable to download JSON metadata',
 | 
			
		||||
                       transform_source=None):
 | 
			
		||||
        json_string = self._download_webpage(url_or_request, video_id, note, errnote)
 | 
			
		||||
                       transform_source=None,
 | 
			
		||||
                       fatal=True):
 | 
			
		||||
        json_string = self._download_webpage(
 | 
			
		||||
            url_or_request, video_id, note, errnote, fatal=fatal)
 | 
			
		||||
        if (not fatal) and json_string is False:
 | 
			
		||||
            return None
 | 
			
		||||
        if transform_source:
 | 
			
		||||
            json_string = transform_source(json_string)
 | 
			
		||||
        try:
 | 
			
		||||
@@ -368,7 +380,8 @@ class InfoExtractor(object):
 | 
			
		||||
        else:
 | 
			
		||||
            for p in pattern:
 | 
			
		||||
                mobj = re.search(p, string, flags)
 | 
			
		||||
                if mobj: break
 | 
			
		||||
                if mobj:
 | 
			
		||||
                    break
 | 
			
		||||
 | 
			
		||||
        if os.name != 'nt' and sys.stderr.isatty():
 | 
			
		||||
            _name = u'\033[0;34m%s\033[0m' % name
 | 
			
		||||
@@ -427,6 +440,22 @@ class InfoExtractor(object):
 | 
			
		||||
        
 | 
			
		||||
        return (username, password)
 | 
			
		||||
 | 
			
		||||
    def _get_tfa_info(self):
 | 
			
		||||
        """
 | 
			
		||||
        Get the two-factor authentication info
 | 
			
		||||
        TODO - asking the user will be required for sms/phone verify
 | 
			
		||||
        currently just uses the command line option
 | 
			
		||||
        If there's no info available, return None
 | 
			
		||||
        """
 | 
			
		||||
        if self._downloader is None:
 | 
			
		||||
            return None
 | 
			
		||||
        downloader_params = self._downloader.params
 | 
			
		||||
 | 
			
		||||
        if downloader_params.get('twofactor', None) is not None:
 | 
			
		||||
            return downloader_params['twofactor']
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    # Helper functions for extracting OpenGraph info
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _og_regexes(prop):
 | 
			
		||||
@@ -456,8 +485,9 @@ class InfoExtractor(object):
 | 
			
		||||
        return self._og_search_property('title', html, **kargs)
 | 
			
		||||
 | 
			
		||||
    def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
 | 
			
		||||
        regexes = self._og_regexes('video')
 | 
			
		||||
        if secure: regexes = self._og_regexes('video:secure_url') + regexes
 | 
			
		||||
        regexes = self._og_regexes('video') + self._og_regexes('video:url')
 | 
			
		||||
        if secure:
 | 
			
		||||
            regexes = self._og_regexes('video:secure_url') + regexes
 | 
			
		||||
        return self._html_search_regex(regexes, html, name, **kargs)
 | 
			
		||||
 | 
			
		||||
    def _og_search_url(self, html, **kargs):
 | 
			
		||||
@@ -468,7 +498,7 @@ class InfoExtractor(object):
 | 
			
		||||
            display_name = name
 | 
			
		||||
        return self._html_search_regex(
 | 
			
		||||
            r'''(?ix)<meta
 | 
			
		||||
                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
 | 
			
		||||
                    (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
 | 
			
		||||
                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
 | 
			
		||||
            html, display_name, fatal=fatal, **kwargs)
 | 
			
		||||
 | 
			
		||||
@@ -555,6 +585,7 @@ class InfoExtractor(object):
 | 
			
		||||
                f.get('abr') if f.get('abr') is not None else -1,
 | 
			
		||||
                audio_ext_preference,
 | 
			
		||||
                f.get('filesize') if f.get('filesize') is not None else -1,
 | 
			
		||||
                f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
 | 
			
		||||
                f.get('format_id'),
 | 
			
		||||
            )
 | 
			
		||||
        formats.sort(key=_formats_key)
 | 
			
		||||
@@ -583,6 +614,28 @@ class InfoExtractor(object):
 | 
			
		||||
        self.to_screen(msg)
 | 
			
		||||
        time.sleep(timeout)
 | 
			
		||||
 | 
			
		||||
    def _extract_f4m_formats(self, manifest_url, video_id):
 | 
			
		||||
        manifest = self._download_xml(
 | 
			
		||||
            manifest_url, video_id, 'Downloading f4m manifest',
 | 
			
		||||
            'Unable to download f4m manifest')
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
 | 
			
		||||
        for i, media_el in enumerate(media_nodes):
 | 
			
		||||
            tbr = int_or_none(media_el.attrib.get('bitrate'))
 | 
			
		||||
            format_id = 'f4m-%d' % (i if tbr is None else tbr)
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'url': manifest_url,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'tbr': tbr,
 | 
			
		||||
                'width': int_or_none(media_el.attrib.get('width')),
 | 
			
		||||
                'height': int_or_none(media_el.attrib.get('height')),
 | 
			
		||||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return formats
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SearchInfoExtractor(InfoExtractor):
 | 
			
		||||
    """
 | 
			
		||||
 
 | 
			
		||||
@@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
 | 
			
		||||
            video_id)
 | 
			
		||||
        video_info = player_info.find('video')
 | 
			
		||||
 | 
			
		||||
        f4m_info = self._download_xml(video_info.find('url').text, video_id)
 | 
			
		||||
        f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
 | 
			
		||||
        token_el = f4m_info.find('token')
 | 
			
		||||
        manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -5,24 +5,26 @@ import os.path
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import compat_urllib_parse_unquote
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DropboxIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4',
 | 
			
		||||
        'md5': '8ae17c51172fb7f93bdd6a214cc8c896',
 | 
			
		||||
        'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4',
 | 
			
		||||
        'md5': '8a3d905427a6951ccb9eb292f154530b',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '0qr9sai2veej4f8',
 | 
			
		||||
            'id': 'nelirfsxnmcfbfh',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'THE_DOCTOR_GAMES'
 | 
			
		||||
            'title': 'youtube-dl test video \'ä"BaW_jenozKc'
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        title = os.path.splitext(mobj.group('title'))[0]
 | 
			
		||||
        fn = compat_urllib_parse_unquote(mobj.group('title'))
 | 
			
		||||
        title = os.path.splitext(fn)[0]
 | 
			
		||||
        video_url = url + '?dl=1'
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										39
									
								
								youtube_dl/extractor/dump.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								youtube_dl/extractor/dump.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DumpIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.dump.com/oneus/',
 | 
			
		||||
        'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'oneus',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': "He's one of us.",
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        m = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = m.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
 | 
			
		||||
 | 
			
		||||
        thumb = self._og_search_thumbnail(webpage)
 | 
			
		||||
        title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'thumbnail': thumb,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,19 +1,21 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EbaumsWorldIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
 | 
			
		||||
        u'file': u'83367677.mp4',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'A Giant Python Opens The Door',
 | 
			
		||||
            u'description': u'This is how nightmares start...',
 | 
			
		||||
            u'uploader': u'jihadpizza',
 | 
			
		||||
        'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '83367677',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'A Giant Python Opens The Door',
 | 
			
		||||
            'description': 'This is how nightmares start...',
 | 
			
		||||
            'uploader': 'jihadpizza',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -28,7 +30,6 @@ class EbaumsWorldIE(InfoExtractor):
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': config.find('title').text,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': determine_ext(video_url),
 | 
			
		||||
            'description': config.find('description').text,
 | 
			
		||||
            'thumbnail': config.find('image').text,
 | 
			
		||||
            'uploader': config.find('username').text,
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										79
									
								
								youtube_dl/extractor/ellentv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								youtube_dl/extractor/ellentv.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,79 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EllenTVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
 | 
			
		||||
        'md5': 'e4af06f3bf0d5f471921a18db5764642',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '0-7jqrsr18',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'What\'s Wrong with These Photos? A Whole Lot',
 | 
			
		||||
            'timestamp': 1406876400,
 | 
			
		||||
            'upload_date': '20140801',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        timestamp = parse_iso8601(self._search_regex(
 | 
			
		||||
            r'<span class="publish-date"><time datetime="([^"]+)">',
 | 
			
		||||
            webpage, 'timestamp'))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': self._og_search_title(webpage),
 | 
			
		||||
            'url': self._html_search_meta('VideoURL', webpage, 'url'),
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EllenTVClipsIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'EllenTV:clips'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'meryl-streep-vanessa-hudgens',
 | 
			
		||||
            'title': 'Meryl Streep, Vanessa Hudgens',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 9,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        playlist_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, playlist_id)
 | 
			
		||||
        playlist = self._extract_playlist(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'id': playlist_id,
 | 
			
		||||
            'title': self._og_search_title(webpage),
 | 
			
		||||
            'entries': self._extract_entries(playlist)
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _extract_playlist(self, webpage):
 | 
			
		||||
        json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
 | 
			
		||||
        try:
 | 
			
		||||
            return json.loads("[{" + json_string + "}]")
 | 
			
		||||
        except ValueError as ve:
 | 
			
		||||
            raise ExtractorError('Failed to download JSON', cause=ve)
 | 
			
		||||
 | 
			
		||||
    def _extract_entries(self, playlist):
 | 
			
		||||
        return [self.url_result(item['url'], 'EllenTV') for item in playlist]
 | 
			
		||||
@@ -36,7 +36,7 @@ class EscapistIE(InfoExtractor):
 | 
			
		||||
            r'<meta name="description" content="([^"]*)"',
 | 
			
		||||
            webpage, 'description', fatal=False)
 | 
			
		||||
 | 
			
		||||
        playerUrl = self._og_search_video_url(webpage, name=u'player URL')
 | 
			
		||||
        playerUrl = self._og_search_video_url(webpage, name='player URL')
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<meta name="title" content="([^"]*)"',
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/expotv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/expotv.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,73 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ExpoTVIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561',
 | 
			
		||||
        'md5': '2985e6d7a392b2f7a05e0ca350fe41d0',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '17561',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'upload_date': '20060212',
 | 
			
		||||
            'title': 'My Favorite Online Scrapbook Store',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'description': 'You\'ll find most everything you need at this virtual store front.',
 | 
			
		||||
            'uploader': 'Anna T.',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        player_key = self._search_regex(
 | 
			
		||||
            r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
 | 
			
		||||
        config_url = 'http://client.expotv.com/video/config/%s/%s' % (
 | 
			
		||||
            video_id, player_key)
 | 
			
		||||
        config = self._download_json(
 | 
			
		||||
            config_url, video_id,
 | 
			
		||||
            note='Downloading video configuration')
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'url': fcfg['file'],
 | 
			
		||||
            'height': int_or_none(fcfg.get('height')),
 | 
			
		||||
            'format_note': fcfg.get('label'),
 | 
			
		||||
            'ext': self._search_regex(
 | 
			
		||||
                r'filename=.*\.([a-z0-9_A-Z]+)&', fcfg['file'],
 | 
			
		||||
                'file extension', default=None),
 | 
			
		||||
        } for fcfg in config['sources']]
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(webpage)
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
        thumbnail = config.get('image')
 | 
			
		||||
        view_count = int_or_none(self._search_regex(
 | 
			
		||||
            r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
 | 
			
		||||
        uploader = self._search_regex(
 | 
			
		||||
            r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
 | 
			
		||||
            fatal=False)
 | 
			
		||||
        upload_date = unified_strdate(self._search_regex(
 | 
			
		||||
            r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
 | 
			
		||||
            fatal=False))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
        }
 | 
			
		||||
@@ -20,7 +20,7 @@ from ..utils import (
 | 
			
		||||
class FacebookIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'''(?x)
 | 
			
		||||
        https?://(?:\w+\.)?facebook\.com/
 | 
			
		||||
        (?:[^#?]*\#!/)?
 | 
			
		||||
        (?:[^#]*?\#!/)?
 | 
			
		||||
        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
 | 
			
		||||
        (?:v|video_id)=(?P<id>[0-9]+)
 | 
			
		||||
        (?:.*)'''
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,6 @@ from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -43,7 +42,6 @@ class FiredriveIE(InfoExtractor):
 | 
			
		||||
        fields = dict(re.findall(r'''(?x)<input\s+
 | 
			
		||||
            type="hidden"\s+
 | 
			
		||||
            name="([^"]+)"\s+
 | 
			
		||||
            (?:id="[^"]+"\s+)?
 | 
			
		||||
            value="([^"]*)"
 | 
			
		||||
            ''', webpage))
 | 
			
		||||
 | 
			
		||||
@@ -67,7 +65,7 @@ class FiredriveIE(InfoExtractor):
 | 
			
		||||
        ext = self._search_regex(r'type:\s?\'([^\']+)\',',
 | 
			
		||||
                                 webpage, 'extension', fatal=False)
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
 | 
			
		||||
            r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'format_id': 'sd',
 | 
			
		||||
 
 | 
			
		||||
@@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
 | 
			
		||||
            + video_id, video_id, 'Downloading XML config')
 | 
			
		||||
 | 
			
		||||
        manifest_url = info.find('videos/video/url').text
 | 
			
		||||
        video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
 | 
			
		||||
        video_url = video_url.replace('/z/', '/i/')
 | 
			
		||||
        manifest_url = manifest_url.replace('/z/', '/i/')
 | 
			
		||||
        
 | 
			
		||||
        if manifest_url.startswith('rtmp'):
 | 
			
		||||
            formats = [{'url': manifest_url, 'ext': 'flv'}]
 | 
			
		||||
        else:
 | 
			
		||||
            formats = []
 | 
			
		||||
            available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
 | 
			
		||||
            for index, format_descr in enumerate(available_formats.split(',')):
 | 
			
		||||
                format_info = {
 | 
			
		||||
                    'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
 | 
			
		||||
                    'ext': 'mp4',
 | 
			
		||||
                }
 | 
			
		||||
                m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
 | 
			
		||||
                if m_resolution is not None:
 | 
			
		||||
                    format_info.update({
 | 
			
		||||
                        'width': int(m_resolution.group('width')),
 | 
			
		||||
                        'height': int(m_resolution.group('height')),
 | 
			
		||||
                    })
 | 
			
		||||
                formats.append(format_info)
 | 
			
		||||
 | 
			
		||||
        thumbnail_path = info.find('image').text
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'title': info.find('titre').text,
 | 
			
		||||
                'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
 | 
			
		||||
                'description': info.find('synopsis').text,
 | 
			
		||||
                }
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': info.find('titre').text,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
 | 
			
		||||
            'description': info.find('synopsis').text,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PluzzIE(FranceTVBaseInfoExtractor):
 | 
			
		||||
 
 | 
			
		||||
@@ -26,7 +26,7 @@ class FunnyOrDieIE(InfoExtractor):
 | 
			
		||||
            'id': 'e402820827',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Please Use This Song (Jon Lajoie)',
 | 
			
		||||
            'description': 'md5:2ed27d364f5a805a6dba199faaf6681d',
 | 
			
		||||
            'description': 'Please use this to sell something.  www.jonlajoie.com',
 | 
			
		||||
            'thumbnail': 're:^http:.*\.jpg$',
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 
 | 
			
		||||
@@ -88,3 +88,28 @@ class GameOneIE(InfoExtractor):
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GameOnePlaylistIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
 | 
			
		||||
    IE_NAME = 'gameone:playlist'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.gameone.de/tv',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'title': 'GameOne',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 294,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
 | 
			
		||||
        max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
 | 
			
		||||
        entries = [
 | 
			
		||||
            self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
 | 
			
		||||
            for video_id in range(max_id, 0, -1)]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'title': 'GameOne',
 | 
			
		||||
            'entries': entries,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/gamestar.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/gamestar.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,74 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GameStarIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
 | 
			
		||||
        'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '76110',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
 | 
			
		||||
            'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
 | 
			
		||||
            'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
 | 
			
		||||
            'upload_date': '20140728',
 | 
			
		||||
            'duration': 17
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        og_title = self._og_search_title(webpage)
 | 
			
		||||
        title = og_title.replace(' - Video bei GameStar.de', '').strip()
 | 
			
		||||
 | 
			
		||||
        url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
 | 
			
		||||
 | 
			
		||||
        description = self._og_search_description(webpage).strip()
 | 
			
		||||
 | 
			
		||||
        thumbnail = self._proto_relative_url(
 | 
			
		||||
            self._og_search_thumbnail(webpage), scheme='http:')
 | 
			
		||||
 | 
			
		||||
        upload_date = unified_strdate(self._html_search_regex(
 | 
			
		||||
            r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+)  ',
 | 
			
		||||
            webpage, 'upload_date', fatal=False))
 | 
			
		||||
 | 
			
		||||
        duration = parse_duration(self._html_search_regex(
 | 
			
		||||
            r'  Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
 | 
			
		||||
            fatal=False))
 | 
			
		||||
 | 
			
		||||
        view_count = str_to_int(self._html_search_regex(
 | 
			
		||||
            r'  Zuschauer: ([0-9\.]+)  ', webpage,
 | 
			
		||||
            'view_count', fatal=False))
 | 
			
		||||
 | 
			
		||||
        comment_count = int_or_none(self._html_search_regex(
 | 
			
		||||
            r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
 | 
			
		||||
            fatal=False))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': url,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'comment_count': comment_count
 | 
			
		||||
        }
 | 
			
		||||
@@ -8,6 +8,7 @@ from ..utils import (
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GDCVaultIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
@@ -31,6 +32,15 @@ class GDCVaultIE(InfoExtractor):
 | 
			
		||||
                'skip_download': True,  # Requires rtmpdump
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
 | 
			
		||||
            'md5': 'a5eb77996ef82118afbbe8e48731b98e',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '1015301',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _parse_mp4(self, xml_description):
 | 
			
		||||
@@ -103,18 +113,40 @@ class GDCVaultIE(InfoExtractor):
 | 
			
		||||
        webpage_url = 'http://www.gdcvault.com/play/' + video_id
 | 
			
		||||
        start_page = self._download_webpage(webpage_url, video_id)
 | 
			
		||||
 | 
			
		||||
        xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
 | 
			
		||||
        direct_url = self._search_regex(
 | 
			
		||||
            r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
 | 
			
		||||
            start_page, 'url', default=None)
 | 
			
		||||
        if direct_url:
 | 
			
		||||
            video_url = 'http://www.gdcvault.com/' + direct_url
 | 
			
		||||
            title = self._html_search_regex(
 | 
			
		||||
                r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
 | 
			
		||||
                start_page, 'title')
 | 
			
		||||
 | 
			
		||||
            return {
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': title,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        xml_root = self._html_search_regex(
 | 
			
		||||
            r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
 | 
			
		||||
            start_page, 'xml root', default=None)
 | 
			
		||||
        if xml_root is None:
 | 
			
		||||
            # Probably need to authenticate
 | 
			
		||||
            start_page = self._login(webpage_url, video_id)
 | 
			
		||||
            if start_page is None:
 | 
			
		||||
            login_res = self._login(webpage_url, video_id)
 | 
			
		||||
            if login_res is None:
 | 
			
		||||
                self.report_warning('Could not login.')
 | 
			
		||||
            else:
 | 
			
		||||
                start_page = login_res
 | 
			
		||||
                # Grab the url from the authenticated page
 | 
			
		||||
                xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
 | 
			
		||||
                xml_root = self._html_search_regex(
 | 
			
		||||
                    r'<iframe src="(.*?)player.html.*?".*?</iframe>',
 | 
			
		||||
                    start_page, 'xml root')
 | 
			
		||||
 | 
			
		||||
        xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
 | 
			
		||||
        xml_name = self._html_search_regex(
 | 
			
		||||
            r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
 | 
			
		||||
            start_page, 'xml filename', default=None)
 | 
			
		||||
        if xml_name is None:
 | 
			
		||||
            # Fallback to the older format
 | 
			
		||||
            xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
 | 
			
		||||
 
 | 
			
		||||
@@ -8,18 +8,19 @@ import re
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .youtube import YoutubeIE
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_xml_parse_error,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    HEADRequest,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    parse_xml,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
    url_basename,
 | 
			
		||||
)
 | 
			
		||||
from .brightcove import BrightcoveIE
 | 
			
		||||
@@ -289,6 +290,58 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'description': 'Mario\'s life in the fast lane has never looked so good.',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # YouTube embed via <data-embed-url="">
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'jpSGZsgga_I',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Asphalt 8: Airborne - Launch Trailer',
 | 
			
		||||
                'uploader': 'Gameloft',
 | 
			
		||||
                'uploader_id': 'gameloft',
 | 
			
		||||
                'upload_date': '20130821',
 | 
			
		||||
                'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # Camtasia studio
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 | 
			
		||||
            'playlist': [{
 | 
			
		||||
                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 | 
			
		||||
                'info_dict': {
 | 
			
		||||
                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 | 
			
		||||
                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 | 
			
		||||
                    'ext': 'flv',
 | 
			
		||||
                    'duration': 2235.90,
 | 
			
		||||
                }
 | 
			
		||||
            }, {
 | 
			
		||||
                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 | 
			
		||||
                'info_dict': {
 | 
			
		||||
                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 | 
			
		||||
                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 | 
			
		||||
                    'ext': 'flv',
 | 
			
		||||
                    'duration': 2235.93,
 | 
			
		||||
                }
 | 
			
		||||
            }],
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # Flowplayer
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 | 
			
		||||
            'md5': '9d65602bf31c6e20014319c7d07fba27',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '5123ea6d5e5a7',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'age_limit': 18,
 | 
			
		||||
                'uploader': 'www.handjobhub.com',
 | 
			
		||||
                'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def report_download_webpage(self, video_id):
 | 
			
		||||
@@ -301,58 +354,6 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        """Report information extraction."""
 | 
			
		||||
        self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 | 
			
		||||
 | 
			
		||||
    def _send_head(self, url):
 | 
			
		||||
        """Check if it is a redirect, like url shorteners, in case return the new url."""
 | 
			
		||||
 | 
			
		||||
        class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 | 
			
		||||
            """
 | 
			
		||||
            Subclass the HTTPRedirectHandler to make it use our
 | 
			
		||||
            HEADRequest also on the redirected URL
 | 
			
		||||
            """
 | 
			
		||||
            def redirect_request(self, req, fp, code, msg, headers, newurl):
 | 
			
		||||
                if code in (301, 302, 303, 307):
 | 
			
		||||
                    newurl = newurl.replace(' ', '%20')
 | 
			
		||||
                    newheaders = dict((k,v) for k,v in req.headers.items()
 | 
			
		||||
                                      if k.lower() not in ("content-length", "content-type"))
 | 
			
		||||
                    try:
 | 
			
		||||
                        # This function was deprecated in python 3.3 and removed in 3.4
 | 
			
		||||
                        origin_req_host = req.get_origin_req_host()
 | 
			
		||||
                    except AttributeError:
 | 
			
		||||
                        origin_req_host = req.origin_req_host
 | 
			
		||||
                    return HEADRequest(newurl,
 | 
			
		||||
                                       headers=newheaders,
 | 
			
		||||
                                       origin_req_host=origin_req_host,
 | 
			
		||||
                                       unverifiable=True)
 | 
			
		||||
                else:
 | 
			
		||||
                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
 | 
			
		||||
 | 
			
		||||
        class HTTPMethodFallback(compat_urllib_request.BaseHandler):
 | 
			
		||||
            """
 | 
			
		||||
            Fallback to GET if HEAD is not allowed (405 HTTP error)
 | 
			
		||||
            """
 | 
			
		||||
            def http_error_405(self, req, fp, code, msg, headers):
 | 
			
		||||
                fp.read()
 | 
			
		||||
                fp.close()
 | 
			
		||||
 | 
			
		||||
                newheaders = dict((k,v) for k,v in req.headers.items()
 | 
			
		||||
                                  if k.lower() not in ("content-length", "content-type"))
 | 
			
		||||
                return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
 | 
			
		||||
                                                 headers=newheaders,
 | 
			
		||||
                                                 origin_req_host=req.get_origin_req_host(),
 | 
			
		||||
                                                 unverifiable=True))
 | 
			
		||||
 | 
			
		||||
        # Build our opener
 | 
			
		||||
        opener = compat_urllib_request.OpenerDirector()
 | 
			
		||||
        for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
 | 
			
		||||
                        HTTPMethodFallback, HEADRedirectHandler,
 | 
			
		||||
                        compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
 | 
			
		||||
            opener.add_handler(handler())
 | 
			
		||||
 | 
			
		||||
        response = opener.open(HEADRequest(url))
 | 
			
		||||
        if response is None:
 | 
			
		||||
            raise ExtractorError('Invalid URL protocol')
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    def _extract_rss(self, url, video_id, doc):
 | 
			
		||||
        playlist_title = doc.find('./channel/title').text
 | 
			
		||||
        playlist_desc_el = doc.find('./channel/description')
 | 
			
		||||
@@ -372,6 +373,43 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            'entries': entries,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _extract_camtasia(self, url, video_id, webpage):
 | 
			
		||||
        """ Returns None if no camtasia video can be found. """
 | 
			
		||||
 | 
			
		||||
        camtasia_cfg = self._search_regex(
 | 
			
		||||
            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 | 
			
		||||
            webpage, 'camtasia configuration file', default=None)
 | 
			
		||||
        if camtasia_cfg is None:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_meta('DC.title', webpage, fatal=True)
 | 
			
		||||
 | 
			
		||||
        camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 | 
			
		||||
        camtasia_cfg = self._download_xml(
 | 
			
		||||
            camtasia_url, video_id,
 | 
			
		||||
            note='Downloading camtasia configuration',
 | 
			
		||||
            errnote='Failed to download camtasia configuration')
 | 
			
		||||
        fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for n in fileset_node.getchildren():
 | 
			
		||||
            url_n = n.find('./uri')
 | 
			
		||||
            if url_n is None:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            entries.append({
 | 
			
		||||
                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 | 
			
		||||
                'title': '%s - %s' % (title, n.tag),
 | 
			
		||||
                'url': compat_urlparse.urljoin(url, url_n.text),
 | 
			
		||||
                'duration': float_or_none(n.find('./duration').text),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'entries': entries,
 | 
			
		||||
            'title': title,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        if url.startswith('//'):
 | 
			
		||||
            return {
 | 
			
		||||
@@ -383,13 +421,13 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        if not parsed_url.scheme:
 | 
			
		||||
            default_search = self._downloader.params.get('default_search')
 | 
			
		||||
            if default_search is None:
 | 
			
		||||
                default_search = 'error'
 | 
			
		||||
                default_search = 'fixup_error'
 | 
			
		||||
 | 
			
		||||
            if default_search in ('auto', 'auto_warning'):
 | 
			
		||||
            if default_search in ('auto', 'auto_warning', 'fixup_error'):
 | 
			
		||||
                if '/' in url:
 | 
			
		||||
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 | 
			
		||||
                    return self.url_result('http://' + url)
 | 
			
		||||
                else:
 | 
			
		||||
                elif default_search != 'fixup_error':
 | 
			
		||||
                    if default_search == 'auto_warning':
 | 
			
		||||
                        if re.match(r'^(?:url|URL)$', url):
 | 
			
		||||
                            raise ExtractorError(
 | 
			
		||||
@@ -399,25 +437,40 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                            self._downloader.report_warning(
 | 
			
		||||
                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
 | 
			
		||||
                    return self.url_result('ytsearch:' + url)
 | 
			
		||||
            elif default_search == 'error':
 | 
			
		||||
 | 
			
		||||
            if default_search in ('error', 'fixup_error'):
 | 
			
		||||
                raise ExtractorError(
 | 
			
		||||
                    ('%r is not a valid URL. '
 | 
			
		||||
                     'Set --default-search "ytseach" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 | 
			
		||||
                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 | 
			
		||||
                    ) % (url, url), expected=True)
 | 
			
		||||
            else:
 | 
			
		||||
                assert ':' in default_search
 | 
			
		||||
                return self.url_result(default_search + url)
 | 
			
		||||
        video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 | 
			
		||||
 | 
			
		||||
        url, smuggled_data = unsmuggle_url(url)
 | 
			
		||||
        force_videoid = None
 | 
			
		||||
        if smuggled_data and 'force_videoid' in smuggled_data:
 | 
			
		||||
            force_videoid = smuggled_data['force_videoid']
 | 
			
		||||
            video_id = force_videoid
 | 
			
		||||
        else:
 | 
			
		||||
            video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
 | 
			
		||||
 | 
			
		||||
        self.to_screen('%s: Requesting header' % video_id)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            response = self._send_head(url)
 | 
			
		||||
        head_req = HEADRequest(url)
 | 
			
		||||
        response = self._request_webpage(
 | 
			
		||||
            head_req, video_id,
 | 
			
		||||
            note=False, errnote='Could not send HEAD request to %s' % url,
 | 
			
		||||
            fatal=False)
 | 
			
		||||
 | 
			
		||||
        if response is not False:
 | 
			
		||||
            # Check for redirect
 | 
			
		||||
            new_url = response.geturl()
 | 
			
		||||
            if url != new_url:
 | 
			
		||||
                self.report_following_redirect(new_url)
 | 
			
		||||
                if force_videoid:
 | 
			
		||||
                    new_url = smuggle_url(
 | 
			
		||||
                        new_url, {'force_videoid': force_videoid})
 | 
			
		||||
                return self.url_result(new_url)
 | 
			
		||||
 | 
			
		||||
            # Check for direct link to a video
 | 
			
		||||
@@ -438,10 +491,6 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                    'upload_date': upload_date,
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
        except compat_urllib_error.HTTPError:
 | 
			
		||||
            # This may be a stupid server that doesn't like HEAD, our UA, or so
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        except ValueError:
 | 
			
		||||
@@ -459,6 +508,11 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        except compat_xml_parse_error:
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        # Is it a Camtasia project?
 | 
			
		||||
        camtasia_res = self._extract_camtasia(url, video_id, webpage)
 | 
			
		||||
        if camtasia_res is not None:
 | 
			
		||||
            return camtasia_res
 | 
			
		||||
 | 
			
		||||
        # Sometimes embedded video player is hidden behind percent encoding
 | 
			
		||||
        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 | 
			
		||||
        # Unescaping the whole page allows to handle those cases in a generic way
 | 
			
		||||
@@ -474,10 +528,26 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            r'(?s)<title>(.*?)</title>', webpage, 'video title',
 | 
			
		||||
            default='video')
 | 
			
		||||
 | 
			
		||||
        # Try to detect age limit automatically
 | 
			
		||||
        age_limit = self._rta_search(webpage)
 | 
			
		||||
        # And then there are the jokers who advertise that they use RTA,
 | 
			
		||||
        # but actually don't.
 | 
			
		||||
        AGE_LIMIT_MARKERS = [
 | 
			
		||||
            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 | 
			
		||||
        ]
 | 
			
		||||
        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 | 
			
		||||
            age_limit = 18
 | 
			
		||||
 | 
			
		||||
        # video uploader is domain name
 | 
			
		||||
        video_uploader = self._search_regex(
 | 
			
		||||
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 | 
			
		||||
 | 
			
		||||
        # Helper method
 | 
			
		||||
        def _playlist_from_matches(matches, getter, ie=None):
 | 
			
		||||
            urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
 | 
			
		||||
            return self.playlist_result(
 | 
			
		||||
                urlrs, playlist_id=video_id, playlist_title=video_title)
 | 
			
		||||
 | 
			
		||||
        # Look for BrightCove:
 | 
			
		||||
        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 | 
			
		||||
        if bc_urls:
 | 
			
		||||
@@ -513,6 +583,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        matches = re.findall(r'''(?x)
 | 
			
		||||
            (?:
 | 
			
		||||
                <iframe[^>]+?src=|
 | 
			
		||||
                data-video-url=|
 | 
			
		||||
                <embed[^>]+?src=|
 | 
			
		||||
                embedSWF\(?:\s*
 | 
			
		||||
            )
 | 
			
		||||
@@ -521,19 +592,15 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                (?:embed|v)/.+?)
 | 
			
		||||
            \1''', webpage)
 | 
			
		||||
        if matches:
 | 
			
		||||
            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
 | 
			
		||||
                     for tuppl in matches]
 | 
			
		||||
            return self.playlist_result(
 | 
			
		||||
                urlrs, playlist_id=video_id, playlist_title=video_title)
 | 
			
		||||
            return _playlist_from_matches(
 | 
			
		||||
                matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
 | 
			
		||||
 | 
			
		||||
        # Look for embedded Dailymotion player
 | 
			
		||||
        matches = re.findall(
 | 
			
		||||
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
 | 
			
		||||
        if matches:
 | 
			
		||||
            urlrs = [self.url_result(unescapeHTML(tuppl[1]))
 | 
			
		||||
                     for tuppl in matches]
 | 
			
		||||
            return self.playlist_result(
 | 
			
		||||
                urlrs, playlist_id=video_id, playlist_title=video_title)
 | 
			
		||||
            return _playlist_from_matches(
 | 
			
		||||
                matches, lambda m: unescapeHTML(m[1]))
 | 
			
		||||
 | 
			
		||||
        # Look for embedded Wistia player
 | 
			
		||||
        match = re.search(
 | 
			
		||||
@@ -552,7 +619,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
 | 
			
		||||
        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
 | 
			
		||||
        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            return self.url_result(mobj.group(1), 'BlipTV')
 | 
			
		||||
 | 
			
		||||
@@ -647,10 +714,8 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        # Look for funnyordie embed
 | 
			
		||||
        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
 | 
			
		||||
        if matches:
 | 
			
		||||
            urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
 | 
			
		||||
                     for eurl in matches]
 | 
			
		||||
            return self.playlist_result(
 | 
			
		||||
                urlrs, playlist_id=video_id, playlist_title=video_title)
 | 
			
		||||
            return _playlist_from_matches(
 | 
			
		||||
                matches, getter=unescapeHTML, ie='FunnyOrDie')
 | 
			
		||||
 | 
			
		||||
        # Look for embedded RUTV player
 | 
			
		||||
        rutv_url = RUTVIE._extract_url(webpage)
 | 
			
		||||
@@ -705,6 +770,20 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            url = unescapeHTML(mobj.group('url'))
 | 
			
		||||
            return self.url_result(url, ie='MTVServicesEmbedded')
 | 
			
		||||
 | 
			
		||||
        # Look for embedded yahoo player
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(mobj.group('url'), 'Yahoo')
 | 
			
		||||
 | 
			
		||||
        # Look for embedded sbs.com.au player
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(mobj.group('url'), 'SBS')
 | 
			
		||||
 | 
			
		||||
        # Start with something easy: JW Player in SWFObject
 | 
			
		||||
        found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
 | 
			
		||||
        if not found:
 | 
			
		||||
@@ -722,6 +801,15 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        if not found:
 | 
			
		||||
            # Broaden the findall a little bit: JWPlayer JS loader
 | 
			
		||||
            found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
 | 
			
		||||
        if not found:
 | 
			
		||||
            # Flow player
 | 
			
		||||
            found = re.findall(r'''(?xs)
 | 
			
		||||
                flowplayer\("[^"]+",\s*
 | 
			
		||||
                    \{[^}]+?\}\s*,
 | 
			
		||||
                    \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
 | 
			
		||||
                        ["']?url["']?\s*:\s*["']([^"']+)["']
 | 
			
		||||
            ''', webpage)
 | 
			
		||||
            assert found
 | 
			
		||||
        if not found:
 | 
			
		||||
            # Try to find twitter cards info
 | 
			
		||||
            found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
 | 
			
		||||
@@ -731,7 +819,12 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
 | 
			
		||||
            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
 | 
			
		||||
            if m_video_type is not None:
 | 
			
		||||
                found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
 | 
			
		||||
                def check_video(vurl):
 | 
			
		||||
                    vpath = compat_urlparse.urlparse(vurl).path
 | 
			
		||||
                    return '.' in vpath and not vpath.endswith('.swf')
 | 
			
		||||
                found = list(filter(
 | 
			
		||||
                    check_video,
 | 
			
		||||
                    re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
 | 
			
		||||
        if not found:
 | 
			
		||||
            # HTML5 video
 | 
			
		||||
            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
 | 
			
		||||
@@ -768,6 +861,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'uploader': video_uploader,
 | 
			
		||||
                'title': video_title,
 | 
			
		||||
                'age_limit': age_limit,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        if len(entries) == 1:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/godtube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/godtube.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GodTubeIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
 | 
			
		||||
            'md5': '77108c1e4ab58f48031101a1a2119789',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '0C0CNNNU',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Woman at the well.',
 | 
			
		||||
                'duration': 159,
 | 
			
		||||
                'timestamp': 1205712000,
 | 
			
		||||
                'uploader': 'beverlybmusic',
 | 
			
		||||
                'upload_date': '20080317',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        config = self._download_xml(
 | 
			
		||||
            'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
 | 
			
		||||
            video_id, 'Downloading player config XML')
 | 
			
		||||
 | 
			
		||||
        video_url = config.find('.//file').text
 | 
			
		||||
        uploader = config.find('.//author').text
 | 
			
		||||
        timestamp = parse_iso8601(config.find('.//date').text)
 | 
			
		||||
        duration = parse_duration(config.find('.//duration').text)
 | 
			
		||||
        thumbnail = config.find('.//image').text
 | 
			
		||||
 | 
			
		||||
        media = self._download_xml(
 | 
			
		||||
            'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
 | 
			
		||||
 | 
			
		||||
        title = media.find('.//title').text
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										190
									
								
								youtube_dl/extractor/grooveshark.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										190
									
								
								youtube_dl/extractor/grooveshark.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,190 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
import math
 | 
			
		||||
import os.path
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
 | 
			
		||||
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self._current_object = None
 | 
			
		||||
        self.objects = []
 | 
			
		||||
        compat_html_parser.HTMLParser.__init__(self)
 | 
			
		||||
 | 
			
		||||
    def handle_starttag(self, tag, attrs):
 | 
			
		||||
        attrs = dict((k, v) for k, v in attrs)
 | 
			
		||||
        if tag == 'object':
 | 
			
		||||
            self._current_object = {'attrs': attrs, 'params': []}
 | 
			
		||||
        elif tag == 'param':
 | 
			
		||||
            self._current_object['params'].append(attrs)
 | 
			
		||||
 | 
			
		||||
    def handle_endtag(self, tag):
 | 
			
		||||
        if tag == 'object':
 | 
			
		||||
            self.objects.append(self._current_object)
 | 
			
		||||
            self._current_object = None
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def extract_object_tags(cls, html):
 | 
			
		||||
        p = cls()
 | 
			
		||||
        p.feed(html)
 | 
			
		||||
        p.close()
 | 
			
		||||
        return p.objects
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GroovesharkIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
 | 
			
		||||
        'md5': '7ecf8aefa59d6b2098517e1baa530023',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '6SS1DW',
 | 
			
		||||
            'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'duration': 227,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    do_playerpage_request = True
 | 
			
		||||
    do_bootstrap_request = True
 | 
			
		||||
 | 
			
		||||
    def _parse_target(self, target):
 | 
			
		||||
        uri = compat_urlparse.urlparse(target)
 | 
			
		||||
        hash = uri.fragment[1:].split('?')[0]
 | 
			
		||||
        token = os.path.basename(hash.rstrip('/'))
 | 
			
		||||
        return (uri, hash, token)
 | 
			
		||||
 | 
			
		||||
    def _build_bootstrap_url(self, target):
 | 
			
		||||
        (uri, hash, token) = self._parse_target(target)
 | 
			
		||||
        query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
 | 
			
		||||
        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
 | 
			
		||||
 | 
			
		||||
    def _build_meta_url(self, target):
 | 
			
		||||
        (uri, hash, token) = self._parse_target(target)
 | 
			
		||||
        query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
 | 
			
		||||
        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
 | 
			
		||||
 | 
			
		||||
    def _build_stream_url(self, meta):
 | 
			
		||||
        return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
 | 
			
		||||
 | 
			
		||||
    def _build_swf_referer(self, target, obj):
 | 
			
		||||
        (uri, _, _) = self._parse_target(target)
 | 
			
		||||
        return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
 | 
			
		||||
 | 
			
		||||
    def _transform_bootstrap(self, js):
 | 
			
		||||
        return re.split('(?m)^\s*try\s*{', js)[0] \
 | 
			
		||||
                 .split(' = ', 1)[1].strip().rstrip(';')
 | 
			
		||||
 | 
			
		||||
    def _transform_meta(self, js):
 | 
			
		||||
        return js.split('\n')[0].split('=')[1].rstrip(';')
 | 
			
		||||
 | 
			
		||||
    def _get_meta(self, target):
 | 
			
		||||
        (meta_url, token) = self._build_meta_url(target)
 | 
			
		||||
        self.to_screen('Metadata URL: %s' % meta_url)
 | 
			
		||||
 | 
			
		||||
        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
 | 
			
		||||
        req = compat_urllib_request.Request(meta_url, headers=headers)
 | 
			
		||||
        res = self._download_json(req, token,
 | 
			
		||||
                                  transform_source=self._transform_meta)
 | 
			
		||||
 | 
			
		||||
        if 'getStreamKeyWithSong' not in res:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
 | 
			
		||||
 | 
			
		||||
        if res['getStreamKeyWithSong'] is None:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
        return res['getStreamKeyWithSong']
 | 
			
		||||
 | 
			
		||||
    def _get_bootstrap(self, target):
 | 
			
		||||
        (bootstrap_url, token) = self._build_bootstrap_url(target)
 | 
			
		||||
 | 
			
		||||
        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
 | 
			
		||||
        req = compat_urllib_request.Request(bootstrap_url, headers=headers)
 | 
			
		||||
        res = self._download_json(req, token, fatal=False,
 | 
			
		||||
                                  note='Downloading player bootstrap data',
 | 
			
		||||
                                  errnote='Unable to download player bootstrap data',
 | 
			
		||||
                                  transform_source=self._transform_bootstrap)
 | 
			
		||||
        return res
 | 
			
		||||
 | 
			
		||||
    def _get_playerpage(self, target):
 | 
			
		||||
        (_, _, token) = self._parse_target(target)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            target, token,
 | 
			
		||||
            note='Downloading player page',
 | 
			
		||||
            errnote='Unable to download player page',
 | 
			
		||||
            fatal=False)
 | 
			
		||||
 | 
			
		||||
        if webpage is not None:
 | 
			
		||||
            # Search (for example German) error message
 | 
			
		||||
            error_msg = self._html_search_regex(
 | 
			
		||||
                r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
 | 
			
		||||
                'error message', default=None)
 | 
			
		||||
            if error_msg is not None:
 | 
			
		||||
                error_msg = error_msg.replace('\n', ' ')
 | 
			
		||||
                raise ExtractorError('Grooveshark said: %s' % error_msg)
 | 
			
		||||
 | 
			
		||||
        if webpage is not None:
 | 
			
		||||
            o = GroovesharkHtmlParser.extract_object_tags(webpage)
 | 
			
		||||
            return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
 | 
			
		||||
 | 
			
		||||
        return (webpage, None)
 | 
			
		||||
 | 
			
		||||
    def _real_initialize(self):
 | 
			
		||||
        self.ts = int(time.time() * 1000)  # timestamp in millis
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        (target_uri, _, token) = self._parse_target(url)
 | 
			
		||||
 | 
			
		||||
        # 1. Fill cookiejar by making a request to the player page
 | 
			
		||||
        swf_referer = None
 | 
			
		||||
        if self.do_playerpage_request:
 | 
			
		||||
            (_, player_objs) = self._get_playerpage(url)
 | 
			
		||||
            if player_objs is not None:
 | 
			
		||||
                swf_referer = self._build_swf_referer(url, player_objs[0])
 | 
			
		||||
                self.to_screen('SWF Referer: %s' % swf_referer)
 | 
			
		||||
 | 
			
		||||
        # 2. Ask preload.php for swf bootstrap data to better mimic webapp
 | 
			
		||||
        if self.do_bootstrap_request:
 | 
			
		||||
            bootstrap = self._get_bootstrap(url)
 | 
			
		||||
            self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
 | 
			
		||||
 | 
			
		||||
        # 3. Ask preload.php for track metadata.
 | 
			
		||||
        meta = self._get_meta(url)
 | 
			
		||||
 | 
			
		||||
        # 4. Construct stream request for track.
 | 
			
		||||
        stream_url = self._build_stream_url(meta)
 | 
			
		||||
        duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
 | 
			
		||||
        post_dict = {'streamKey': meta['streamKey']['streamKey']}
 | 
			
		||||
        post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
 | 
			
		||||
        headers = {
 | 
			
		||||
            'Content-Length': len(post_data),
 | 
			
		||||
            'Content-Type': 'application/x-www-form-urlencoded'
 | 
			
		||||
        }
 | 
			
		||||
        if swf_referer is not None:
 | 
			
		||||
            headers['Referer'] = swf_referer
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': token,
 | 
			
		||||
            'title': meta['song']['Name'],
 | 
			
		||||
            'http_method': 'POST',
 | 
			
		||||
            'url': stream_url,
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'format': 'mp3 audio',
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'http_post_data': post_data,
 | 
			
		||||
            'http_headers': headers,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										134
									
								
								youtube_dl/extractor/howstuffworks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								youtube_dl/extractor/howstuffworks.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,134 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
import random
 | 
			
		||||
import string
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import find_xpath_attr
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class HowStuffWorksIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '450221',
 | 
			
		||||
                'display_id': 'cool-jobs-iditarod-musher',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Cool Jobs - Iditarod Musher',
 | 
			
		||||
                'description': 'md5:82bb58438a88027b8186a1fccb365f90',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # md5 is not consistent
 | 
			
		||||
                'skip_download': True
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '553470',
 | 
			
		||||
                'display_id': 'deadliest-catch-jakes-farewell-pots',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Deadliest Catch: Jake\'s Farewell Pots',
 | 
			
		||||
                'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # md5 is not consistent
 | 
			
		||||
                'skip_download': True
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '440011',
 | 
			
		||||
                'display_id': 'sword-swallowing-1-by-dan-meyer',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Sword Swallowing #1 by Dan Meyer',
 | 
			
		||||
                'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # md5 is not consistent
 | 
			
		||||
                'skip_download': True
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        display_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
 | 
			
		||||
 | 
			
		||||
        mp4 = self._search_regex(
 | 
			
		||||
            r'''(?xs)var\s+clip\s*=\s*{\s*
 | 
			
		||||
                .+?\s*
 | 
			
		||||
                content_id\s*:\s*%s\s*,\s*
 | 
			
		||||
                .+?\s*
 | 
			
		||||
                mp4\s*:\s*\[(.*?),?\]\s*
 | 
			
		||||
                };\s*
 | 
			
		||||
                videoData\.push\(clip\);''' % content_id,
 | 
			
		||||
            webpage, 'mp4', fatal=False, default=None)
 | 
			
		||||
 | 
			
		||||
        smil = self._download_xml(
 | 
			
		||||
            'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
 | 
			
		||||
            content_id, 'Downloading video SMIL')
 | 
			
		||||
 | 
			
		||||
        http_base = find_xpath_attr(
 | 
			
		||||
            smil,
 | 
			
		||||
            './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
 | 
			
		||||
            'name',
 | 
			
		||||
            'httpBase').get('content')
 | 
			
		||||
 | 
			
		||||
        def random_string(str_len=0):
 | 
			
		||||
            return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
 | 
			
		||||
 | 
			
		||||
        URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        if mp4:
 | 
			
		||||
            for video in json.loads('[%s]' % mp4):
 | 
			
		||||
                bitrate = video['bitrate']
 | 
			
		||||
                fmt = {
 | 
			
		||||
                    'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
 | 
			
		||||
                    'format_id': bitrate,
 | 
			
		||||
                }
 | 
			
		||||
                m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
 | 
			
		||||
                if m:
 | 
			
		||||
                    fmt['vbr'] = int(m.group('vbr'))
 | 
			
		||||
                formats.append(fmt)
 | 
			
		||||
        else:
 | 
			
		||||
            for video in smil.findall(
 | 
			
		||||
                    './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
 | 
			
		||||
                vbr = int(video.attrib['system-bitrate']) / 1000
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
 | 
			
		||||
                    'format_id': '%dk' % vbr,
 | 
			
		||||
                    'vbr': vbr,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(webpage)
 | 
			
		||||
        TITLE_SUFFIX = ' : HowStuffWorks'
 | 
			
		||||
        if title.endswith(TITLE_SUFFIX):
 | 
			
		||||
            title = title[:-len(TITLE_SUFFIX)]
 | 
			
		||||
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': content_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										97
									
								
								youtube_dl/extractor/izlesene.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								youtube_dl/extractor/izlesene.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,97 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    get_element_by_id,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class IzleseneIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
 | 
			
		||||
    _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
 | 
			
		||||
        'md5': '4384f9f0ea65086734b881085ee05ac2',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '7599694',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
 | 
			
		||||
            'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
 | 
			
		||||
            'thumbnail': 're:^http://.*\.jpg',
 | 
			
		||||
            'uploader_id': 'pelikzzle',
 | 
			
		||||
            'timestamp': 1404298698,
 | 
			
		||||
            'upload_date': '20140702',
 | 
			
		||||
            'duration': 95.395,
 | 
			
		||||
            'age_limit': 0,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        url = 'http://www.izlesene.com/video/%s' % video_id
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(webpage)
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
 | 
			
		||||
        uploader = self._html_search_regex(
 | 
			
		||||
            r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
 | 
			
		||||
        timestamp = parse_iso8601(self._html_search_meta(
 | 
			
		||||
            'uploadDate', webpage, 'upload date', fatal=False))
 | 
			
		||||
 | 
			
		||||
        duration = int_or_none(self._html_search_regex(
 | 
			
		||||
            r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
 | 
			
		||||
        if duration:
 | 
			
		||||
            duration /= 1000.0
 | 
			
		||||
 | 
			
		||||
        view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
 | 
			
		||||
        comment_count = self._html_search_regex(
 | 
			
		||||
            r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
 | 
			
		||||
 | 
			
		||||
        family_friendly = self._html_search_meta(
 | 
			
		||||
            'isFamilyFriendly', webpage, 'age limit', fatal=False)
 | 
			
		||||
 | 
			
		||||
        content_url = self._html_search_meta(
 | 
			
		||||
            'contentURL', webpage, 'content URL', fatal=False)
 | 
			
		||||
        ext = determine_ext(content_url, 'mp4')
 | 
			
		||||
 | 
			
		||||
        # Might be empty for some videos.
 | 
			
		||||
        qualities = self._html_search_regex(
 | 
			
		||||
            r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for quality in qualities.split('|'):
 | 
			
		||||
            json = self._download_json(
 | 
			
		||||
                self._STREAM_URL.format(id=video_id, format=quality), video_id,
 | 
			
		||||
                note='Getting video URL for "%s" quality' % quality,
 | 
			
		||||
                errnote='Failed to get video URL for "%s" quality' % quality
 | 
			
		||||
            )
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': json.get('streamurl'),
 | 
			
		||||
                'ext': ext,
 | 
			
		||||
                'format_id': '%sp' % quality if quality else 'sd',
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'uploader_id': uploader,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': int_or_none(view_count),
 | 
			
		||||
            'comment_count': int_or_none(comment_count),
 | 
			
		||||
            'age_limit': 18 if family_friendly == 'False' else 0,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/jove.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/jove.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,80 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    unified_strdate
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class JoveIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
 | 
			
		||||
    _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
 | 
			
		||||
            'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2744',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
 | 
			
		||||
                'description': 'md5:015dd4509649c0908bc27f049e0262c6',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.png$',
 | 
			
		||||
                'upload_date': '20110523',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
 | 
			
		||||
            'md5': '914aeb356f416811d911996434811beb',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '51796',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
 | 
			
		||||
                'description': 'md5:35ff029261900583970c4023b70f1dc9',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.png$',
 | 
			
		||||
                'upload_date': '20140802',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        chapters_id = self._html_search_regex(
 | 
			
		||||
            r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
 | 
			
		||||
 | 
			
		||||
        chapters_xml = self._download_xml(
 | 
			
		||||
            self._CHAPTERS_URL.format(video_id=chapters_id),
 | 
			
		||||
            video_id, note='Downloading chapters XML',
 | 
			
		||||
            errnote='Failed to download chapters XML')
 | 
			
		||||
 | 
			
		||||
        video_url = chapters_xml.attrib.get('video')
 | 
			
		||||
        if not video_url:
 | 
			
		||||
            raise ExtractorError('Failed to get the video URL')
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_meta('citation_title', webpage, 'title')
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
        description = self._html_search_regex(
 | 
			
		||||
            r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
 | 
			
		||||
            webpage, 'description', fatal=False)
 | 
			
		||||
        publish_date = unified_strdate(self._html_search_meta(
 | 
			
		||||
            'citation_publication_date', webpage, 'publish date', fatal=False))
 | 
			
		||||
        comment_count = self._html_search_regex(
 | 
			
		||||
            r'<meta name="num_comments" content="(\d+) Comments?"',
 | 
			
		||||
            webpage, 'comment count', fatal=False)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'upload_date': publish_date,
 | 
			
		||||
            'comment_count': comment_count,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,5 +1,6 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import itertools
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
@@ -43,10 +44,11 @@ class JustinTVIE(InfoExtractor):
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Return count of items, list of *valid* items
 | 
			
		||||
    def _parse_page(self, url, video_id):
 | 
			
		||||
        info_json = self._download_webpage(url, video_id,
 | 
			
		||||
                                           'Downloading video info JSON',
 | 
			
		||||
                                           'unable to download video info JSON')
 | 
			
		||||
    def _parse_page(self, url, video_id, counter):
 | 
			
		||||
        info_json = self._download_webpage(
 | 
			
		||||
            url, video_id,
 | 
			
		||||
            'Downloading video info JSON on page %d' % counter,
 | 
			
		||||
            'Unable to download video info JSON %d' % counter)
 | 
			
		||||
 | 
			
		||||
        response = json.loads(info_json)
 | 
			
		||||
        if type(response) != list:
 | 
			
		||||
@@ -138,11 +140,10 @@ class JustinTVIE(InfoExtractor):
 | 
			
		||||
        entries = []
 | 
			
		||||
        offset = 0
 | 
			
		||||
        limit = self._JUSTIN_PAGE_LIMIT
 | 
			
		||||
        while True:
 | 
			
		||||
            if paged:
 | 
			
		||||
                self.report_download_page(video_id, offset)
 | 
			
		||||
        for counter in itertools.count(1):
 | 
			
		||||
            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
 | 
			
		||||
            page_count, page_info = self._parse_page(page_url, video_id)
 | 
			
		||||
            page_count, page_info = self._parse_page(
 | 
			
		||||
                page_url, video_id, counter)
 | 
			
		||||
            entries.extend(page_info)
 | 
			
		||||
            if not paged or page_count != limit:
 | 
			
		||||
                break
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
class KickStarterIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
 | 
			
		||||
        'md5': 'c81addca81327ffa66c642b5d8b08cab',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -18,22 +18,45 @@ class KickStarterIE(InfoExtractor):
 | 
			
		||||
            'description': 'A unique motocross documentary that examines the '
 | 
			
		||||
                'life and mind of one of sports most elite athletes: Josh Grant.',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    }, {
 | 
			
		||||
        'note': 'Embedded video (not using the native kickstarter video service)',
 | 
			
		||||
        'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178',
 | 
			
		||||
        'playlist': [
 | 
			
		||||
            {
 | 
			
		||||
                'info_dict': {
 | 
			
		||||
                    'id': '78704821',
 | 
			
		||||
                    'ext': 'mp4',
 | 
			
		||||
                    'uploader_id': 'pebble',
 | 
			
		||||
                    'uploader': 'Pebble Technology',
 | 
			
		||||
                    'title': 'Pebble iOS Notifications',
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        ],
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        m = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = m.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(r'data-video-url="(.*?)"',
 | 
			
		||||
            webpage, 'video URL')
 | 
			
		||||
        video_title = self._html_search_regex(r'<title>(.*?)</title>',
 | 
			
		||||
            webpage, 'title').rpartition('— Kickstarter')[0].strip()
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<title>\s*(.*?)(?:\s*— Kickstarter)?\s*</title>',
 | 
			
		||||
            webpage, 'title')
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            r'data-video-url="(.*?)"',
 | 
			
		||||
            webpage, 'video URL', default=None)
 | 
			
		||||
        if video_url is None:  # No native kickstarter, look for embedded videos
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'url_transparent',
 | 
			
		||||
                'ie_key': 'Generic',
 | 
			
		||||
                'url': url,
 | 
			
		||||
                'title': title,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										59
									
								
								youtube_dl/extractor/krasview.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								youtube_dl/extractor/krasview.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,59 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class KrasViewIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'Красвью'
 | 
			
		||||
    _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://krasview.ru/video/512228',
 | 
			
		||||
        'md5': '3b91003cf85fc5db277870c8ebd98eae',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '512228',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Снег, лёд, заносы',
 | 
			
		||||
            'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
 | 
			
		||||
            'duration': 27,
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        flashvars = json.loads(self._search_regex(
 | 
			
		||||
            r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
 | 
			
		||||
 | 
			
		||||
        video_url = flashvars['url']
 | 
			
		||||
        title = unescapeHTML(flashvars['title'])
 | 
			
		||||
        description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
 | 
			
		||||
        thumbnail = flashvars['image']
 | 
			
		||||
        duration = int(flashvars['duration'])
 | 
			
		||||
        filesize = int(flashvars['size'])
 | 
			
		||||
        width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
 | 
			
		||||
        height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'filesize': filesize,
 | 
			
		||||
            'width': width,
 | 
			
		||||
            'height': height,
 | 
			
		||||
        }
 | 
			
		||||
@@ -5,11 +5,14 @@ import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -24,18 +27,82 @@ class LivestreamIE(InfoExtractor):
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Live from Webster Hall NYC',
 | 
			
		||||
            'upload_date': '20121012',
 | 
			
		||||
            'like_count': int,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'thumbnail': 're:^http://.*\.jpg$'
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _parse_smil(self, video_id, smil_url):
 | 
			
		||||
        formats = []
 | 
			
		||||
        _SWITCH_XPATH = (
 | 
			
		||||
            './/{http://www.w3.org/2001/SMIL20/Language}body/'
 | 
			
		||||
            '{http://www.w3.org/2001/SMIL20/Language}switch')
 | 
			
		||||
        smil_doc = self._download_xml(
 | 
			
		||||
            smil_url, video_id,
 | 
			
		||||
            note='Downloading SMIL information',
 | 
			
		||||
            errnote='Unable to download SMIL information',
 | 
			
		||||
            fatal=False)
 | 
			
		||||
        if smil_doc is False:  # Download failed
 | 
			
		||||
            return formats
 | 
			
		||||
        title_node = find_xpath_attr(
 | 
			
		||||
            smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
 | 
			
		||||
            'name', 'title')
 | 
			
		||||
        if title_node is None:
 | 
			
		||||
            self.report_warning('Cannot find SMIL id')
 | 
			
		||||
            switch_node = smil_doc.find(_SWITCH_XPATH)
 | 
			
		||||
        else:
 | 
			
		||||
            title_id = title_node.attrib['content']
 | 
			
		||||
            switch_node = find_xpath_attr(
 | 
			
		||||
                smil_doc, _SWITCH_XPATH, 'id', title_id)
 | 
			
		||||
        if switch_node is None:
 | 
			
		||||
            raise ExtractorError('Cannot find switch node')
 | 
			
		||||
        video_nodes = switch_node.findall(
 | 
			
		||||
            '{http://www.w3.org/2001/SMIL20/Language}video')
 | 
			
		||||
 | 
			
		||||
        for vn in video_nodes:
 | 
			
		||||
            tbr = int_or_none(vn.attrib.get('system-bitrate'))
 | 
			
		||||
            furl = (
 | 
			
		||||
                'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' %
 | 
			
		||||
                (vn.attrib['src']))
 | 
			
		||||
            if 'clipBegin' in vn.attrib:
 | 
			
		||||
                furl += '&ssek=' + vn.attrib['clipBegin']
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': furl,
 | 
			
		||||
                'format_id': 'smil_%d' % tbr,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'tbr': tbr,
 | 
			
		||||
                'preference': -1000,
 | 
			
		||||
            })
 | 
			
		||||
        return formats
 | 
			
		||||
 | 
			
		||||
    def _extract_video_info(self, video_data):
 | 
			
		||||
        video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
 | 
			
		||||
        video_id = compat_str(video_data['id'])
 | 
			
		||||
 | 
			
		||||
        FORMAT_KEYS = (
 | 
			
		||||
            ('sd', 'progressive_url'),
 | 
			
		||||
            ('hd', 'progressive_url_hd'),
 | 
			
		||||
        )
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'format_id': format_id,
 | 
			
		||||
            'url': video_data[key],
 | 
			
		||||
            'quality': i + 1,
 | 
			
		||||
        } for i, (format_id, key) in enumerate(FORMAT_KEYS)
 | 
			
		||||
            if video_data.get(key)]
 | 
			
		||||
 | 
			
		||||
        smil_url = video_data.get('smil_url')
 | 
			
		||||
        if smil_url:
 | 
			
		||||
            formats.extend(self._parse_smil(video_id, smil_url))
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': compat_str(video_data['id']),
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': video_data['caption'],
 | 
			
		||||
            'thumbnail': video_data['thumbnail_url'],
 | 
			
		||||
            'thumbnail': video_data.get('thumbnail_url'),
 | 
			
		||||
            'upload_date': video_data['updated_at'].replace('-', '')[:8],
 | 
			
		||||
            'like_count': video_data.get('likes', {}).get('total'),
 | 
			
		||||
            'view_count': video_data.get('views'),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -44,16 +111,28 @@ class LivestreamIE(InfoExtractor):
 | 
			
		||||
        event_name = mobj.group('event_name')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id or event_name)
 | 
			
		||||
 | 
			
		||||
        if video_id is None:
 | 
			
		||||
            # This is an event page:
 | 
			
		||||
        og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
 | 
			
		||||
        if og_video is None:
 | 
			
		||||
            config_json = self._search_regex(
 | 
			
		||||
                r'window.config = ({.*?});', webpage, 'window config')
 | 
			
		||||
            info = json.loads(config_json)['event']
 | 
			
		||||
 | 
			
		||||
            def is_relevant(vdata, vid):
 | 
			
		||||
                result = vdata['type'] == 'video'
 | 
			
		||||
                if video_id is not None:
 | 
			
		||||
                    result = result and compat_str(vdata['data']['id']) == vid
 | 
			
		||||
                return result
 | 
			
		||||
 | 
			
		||||
            videos = [self._extract_video_info(video_data['data'])
 | 
			
		||||
                for video_data in info['feed']['data'] if video_data['type'] == 'video']
 | 
			
		||||
            return self.playlist_result(videos, info['id'], info['full_name'])
 | 
			
		||||
                      for video_data in info['feed']['data']
 | 
			
		||||
                      if is_relevant(video_data, video_id)]
 | 
			
		||||
            if video_id is None:
 | 
			
		||||
                # This is an event page:
 | 
			
		||||
                return self.playlist_result(videos, info['id'], info['full_name'])
 | 
			
		||||
            else:
 | 
			
		||||
                if videos:
 | 
			
		||||
                    return videos[0]
 | 
			
		||||
        else:
 | 
			
		||||
            og_video = self._og_search_video_url(webpage, 'player url')
 | 
			
		||||
            query_str = compat_urllib_parse_urlparse(og_video).query
 | 
			
		||||
            query = compat_urlparse.parse_qs(query_str)
 | 
			
		||||
            api_url = query['play_url'][0].replace('.smil', '')
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,7 @@ from ..utils import (
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -83,6 +84,21 @@ class MetacafeIE(InfoExtractor):
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        # Movieclips.com video
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'mv-Wy7ZU',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'My Week with Marilyn - Do You Love Me?',
 | 
			
		||||
                'description': 'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.',
 | 
			
		||||
                'uploader': 'movie_trailers',
 | 
			
		||||
                'duration': 176,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': 'requires rtmpdump',
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def report_disclaimer(self):
 | 
			
		||||
@@ -134,6 +150,7 @@ class MetacafeIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Extract URL, uploader and title from webpage
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
        video_url = None
 | 
			
		||||
        mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            mediaURL = compat_urllib_parse.unquote(mobj.group(1))
 | 
			
		||||
@@ -146,16 +163,17 @@ class MetacafeIE(InfoExtractor):
 | 
			
		||||
            else:
 | 
			
		||||
                gdaKey = mobj.group(1)
 | 
			
		||||
                video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 | 
			
		||||
        else:
 | 
			
		||||
        if video_url is None:
 | 
			
		||||
            mobj = re.search(r'<video src="([^"]+)"', webpage)
 | 
			
		||||
            if mobj:
 | 
			
		||||
                video_url = mobj.group(1)
 | 
			
		||||
                video_ext = 'mp4'
 | 
			
		||||
            else:
 | 
			
		||||
                mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
 | 
			
		||||
                if mobj is None:
 | 
			
		||||
                    raise ExtractorError('Unable to extract media URL')
 | 
			
		||||
                vardict = compat_parse_qs(mobj.group(1))
 | 
			
		||||
        if video_url is None:
 | 
			
		||||
            flashvars = self._search_regex(
 | 
			
		||||
                r' name="flashvars" value="(.*?)"', webpage, 'flashvars',
 | 
			
		||||
                default=None)
 | 
			
		||||
            if flashvars:
 | 
			
		||||
                vardict = compat_parse_qs(flashvars)
 | 
			
		||||
                if 'mediaData' not in vardict:
 | 
			
		||||
                    raise ExtractorError('Unable to extract media URL')
 | 
			
		||||
                mobj = re.search(
 | 
			
		||||
@@ -165,26 +183,68 @@ class MetacafeIE(InfoExtractor):
 | 
			
		||||
                mediaURL = mobj.group('mediaURL').replace('\\/', '/')
 | 
			
		||||
                video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
 | 
			
		||||
                video_ext = determine_ext(video_url)
 | 
			
		||||
        if video_url is None:
 | 
			
		||||
            player_url = self._search_regex(
 | 
			
		||||
                r"swfobject\.embedSWF\('([^']+)'",
 | 
			
		||||
                webpage, 'config URL', default=None)
 | 
			
		||||
            if player_url:
 | 
			
		||||
                config_url = self._search_regex(
 | 
			
		||||
                    r'config=(.+)$', player_url, 'config URL')
 | 
			
		||||
                config_doc = self._download_xml(
 | 
			
		||||
                    config_url, video_id,
 | 
			
		||||
                    note='Downloading video config')
 | 
			
		||||
                smil_url = config_doc.find('.//properties').attrib['smil_file']
 | 
			
		||||
                smil_doc = self._download_xml(
 | 
			
		||||
                    smil_url, video_id,
 | 
			
		||||
                    note='Downloading SMIL document')
 | 
			
		||||
                base_url = smil_doc.find('./head/meta').attrib['base']
 | 
			
		||||
                video_url = []
 | 
			
		||||
                for vn in smil_doc.findall('.//video'):
 | 
			
		||||
                    br = int(vn.attrib['system-bitrate'])
 | 
			
		||||
                    play_path = vn.attrib['src']
 | 
			
		||||
                    video_url.append({
 | 
			
		||||
                        'format_id': 'smil-%d' % br,
 | 
			
		||||
                        'url': base_url,
 | 
			
		||||
                        'play_path': play_path,
 | 
			
		||||
                        'page_url': url,
 | 
			
		||||
                        'player_url': player_url,
 | 
			
		||||
                        'ext': play_path.partition(':')[0],
 | 
			
		||||
                    })
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
 | 
			
		||||
        if video_url is None:
 | 
			
		||||
            raise ExtractorError('Unsupported video type')
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(
 | 
			
		||||
            r'(?im)<title>(.*) - Video</title>', webpage, 'title')
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
        video_uploader = self._html_search_regex(
 | 
			
		||||
                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
 | 
			
		||||
                webpage, 'uploader nickname', fatal=False)
 | 
			
		||||
        duration = int_or_none(
 | 
			
		||||
            self._html_search_meta('video:duration', webpage))
 | 
			
		||||
 | 
			
		||||
        if re.search(r'"contentRating":"restricted"', webpage) is not None:
 | 
			
		||||
            age_limit = 18
 | 
			
		||||
        age_limit = (
 | 
			
		||||
            18
 | 
			
		||||
            if re.search(r'"contentRating":"restricted"', webpage)
 | 
			
		||||
            else 0)
 | 
			
		||||
 | 
			
		||||
        if isinstance(video_url, list):
 | 
			
		||||
            formats = video_url
 | 
			
		||||
        else:
 | 
			
		||||
            age_limit = 0
 | 
			
		||||
            formats = [{
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': video_ext,
 | 
			
		||||
            }]
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'uploader': video_uploader,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'thumbnail':thumbnail,
 | 
			
		||||
            'ext': video_ext,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										57
									
								
								youtube_dl/extractor/ministrygrid.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								youtube_dl/extractor/ministrygrid.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MinistryGridIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
 | 
			
		||||
        'md5': '844be0d2a1340422759c2a9101bab017',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '3453494717001',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'The Gospel by Numbers',
 | 
			
		||||
            'description': 'Coming soon from T4G 2014!',
 | 
			
		||||
            'uploader': 'LifeWay Christian Resources (MG)',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        portlets_json = self._search_regex(
 | 
			
		||||
            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
 | 
			
		||||
        portlets = json.loads(portlets_json)
 | 
			
		||||
        pl_id = self._search_regex(
 | 
			
		||||
            r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
 | 
			
		||||
 | 
			
		||||
        for i, portlet in enumerate(portlets):
 | 
			
		||||
            portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
 | 
			
		||||
            portlet_code = self._download_webpage(
 | 
			
		||||
                portlet_url, video_id,
 | 
			
		||||
                note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
 | 
			
		||||
                fatal=False)
 | 
			
		||||
            video_iframe_url = self._search_regex(
 | 
			
		||||
                r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
 | 
			
		||||
                default=None)
 | 
			
		||||
            if video_iframe_url:
 | 
			
		||||
                surl = smuggle_url(
 | 
			
		||||
                    video_iframe_url, {'force_videoid': video_id})
 | 
			
		||||
                return {
 | 
			
		||||
                    '_type': 'url',
 | 
			
		||||
                    'id': video_id,
 | 
			
		||||
                    'url': surl,
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
        raise ExtractorError('Could not find video iframe in any portlets')
 | 
			
		||||
							
								
								
									
										60
									
								
								youtube_dl/extractor/mitele.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								youtube_dl/extractor/mitele.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,60 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    get_element_by_attribute,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    strip_jsonp,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MiTeleIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'mitele.es'
 | 
			
		||||
    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
 | 
			
		||||
        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '0fce117d',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Programa 144 - Tor, la web invisible',
 | 
			
		||||
            'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
 | 
			
		||||
            'display_id': 'programa-144',
 | 
			
		||||
            'duration': 2913,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        episode = mobj.group('episode')
 | 
			
		||||
        webpage = self._download_webpage(url, episode)
 | 
			
		||||
        embed_data_json = self._search_regex(
 | 
			
		||||
            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
 | 
			
		||||
            flags=re.DOTALL
 | 
			
		||||
        ).replace('\'', '"')
 | 
			
		||||
        embed_data = json.loads(embed_data_json)
 | 
			
		||||
 | 
			
		||||
        info_url = embed_data['flashvars']['host']
 | 
			
		||||
        info_el = self._download_xml(info_url, episode).find('./video/info')
 | 
			
		||||
 | 
			
		||||
        video_link = info_el.find('videoUrl/link').text
 | 
			
		||||
        token_query = compat_urllib_parse.urlencode({'id': video_link})
 | 
			
		||||
        token_info = self._download_json(
 | 
			
		||||
            'http://token.mitele.es/?' + token_query, episode,
 | 
			
		||||
            transform_source=strip_jsonp
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': embed_data['videoId'],
 | 
			
		||||
            'display_id': episode,
 | 
			
		||||
            'title': info_el.find('title').text,
 | 
			
		||||
            'url': token_info['tokenizedUrl'],
 | 
			
		||||
            'description': get_element_by_attribute('class', 'text', webpage),
 | 
			
		||||
            'thumbnail': info_el.find('thumb').text,
 | 
			
		||||
            'duration': parse_duration(info_el.find('duration').text),
 | 
			
		||||
        }
 | 
			
		||||
@@ -11,8 +11,22 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MLBIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
 | 
			
		||||
    _VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
 | 
			
		||||
            'md5': 'ff56a598c2cf411a9a38a69709e97079',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '34698933',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': "Ackley's spectacular catch",
 | 
			
		||||
                'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
 | 
			
		||||
                'duration': 66,
 | 
			
		||||
                'timestamp': 1405980600,
 | 
			
		||||
                'upload_date': '20140721',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
 | 
			
		||||
            'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/mojvideo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/mojvideo.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MojvideoIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
 | 
			
		||||
        'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '3d1ed4497707730b2906',
 | 
			
		||||
            'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
 | 
			
		||||
            'thumbnail': 're:^http://.*\.jpg$',
 | 
			
		||||
            'duration': 242,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
 | 
			
		||||
        # XML is malformed
 | 
			
		||||
        playerapi = self._download_webpage(
 | 
			
		||||
            'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
 | 
			
		||||
 | 
			
		||||
        if '<error>true</error>' in playerapi:
 | 
			
		||||
            error_desc = self._html_search_regex(
 | 
			
		||||
                r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<title>([^<]+)</title>', playerapi, 'title')
 | 
			
		||||
        video_url = self._html_search_regex(
 | 
			
		||||
            r'<file>([^<]+)</file>', playerapi, 'video URL')
 | 
			
		||||
        thumbnail = self._html_search_regex(
 | 
			
		||||
            r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
 | 
			
		||||
        duration = parse_duration(self._html_search_regex(
 | 
			
		||||
            r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										78
									
								
								youtube_dl/extractor/movieclips.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								youtube_dl/extractor/movieclips.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,78 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    clean_html,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MovieClipsIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'Wy7ZU',
 | 
			
		||||
            'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'My Week with Marilyn - Do You Love Me?',
 | 
			
		||||
            'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # rtmp download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
        show_id = display_id or video_id
 | 
			
		||||
 | 
			
		||||
        config = self._download_xml(
 | 
			
		||||
            'http://config.movieclips.com/player/config/%s' % video_id,
 | 
			
		||||
            show_id, 'Downloading player config')
 | 
			
		||||
 | 
			
		||||
        if config.find('./country-region').text == 'false':
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
 | 
			
		||||
 | 
			
		||||
        properties = config.find('./video/properties')
 | 
			
		||||
        smil_file = properties.attrib['smil_file']
 | 
			
		||||
 | 
			
		||||
        smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
 | 
			
		||||
        base_url = smil.find('./head/meta').attrib['base']
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for video in smil.findall('./body/switch/video'):
 | 
			
		||||
            vbr = int(video.attrib['system-bitrate']) / 1000
 | 
			
		||||
            src = video.attrib['src']
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': base_url,
 | 
			
		||||
                'play_path': src,
 | 
			
		||||
                'ext': src.split(':')[0],
 | 
			
		||||
                'vbr': vbr,
 | 
			
		||||
                'format_id': '%dk' % vbr,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
 | 
			
		||||
        description = clean_html(compat_str(properties.attrib['clip_description']))
 | 
			
		||||
        thumbnail = properties.attrib['image']
 | 
			
		||||
        categories = properties.attrib['clip_categories'].split(',')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'categories': categories,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
@@ -4,7 +4,11 @@ import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import find_xpath_attr, compat_str
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_str,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NBCIE(InfoExtractor):
 | 
			
		||||
@@ -85,11 +89,25 @@ class NBCNewsIE(InfoExtractor):
 | 
			
		||||
                flags=re.MULTILINE)
 | 
			
		||||
            bootstrap = json.loads(bootstrap_json)
 | 
			
		||||
            info = bootstrap['results'][0]['video']
 | 
			
		||||
            playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI'
 | 
			
		||||
            mpxid = info['mpxId']
 | 
			
		||||
            all_videos = self._download_json(playlist_url, title)['videos']
 | 
			
		||||
            # The response contains additional videos
 | 
			
		||||
            info = next(v for v in all_videos if v['mpxId'] == mpxid)
 | 
			
		||||
 | 
			
		||||
            base_urls = [
 | 
			
		||||
                info['fallbackPlaylistUrl'],
 | 
			
		||||
                info['associatedPlaylistUrl'],
 | 
			
		||||
            ]
 | 
			
		||||
 | 
			
		||||
            for base_url in base_urls:
 | 
			
		||||
                playlist_url = base_url + '?form=MPXNBCNewsAPI'
 | 
			
		||||
                all_videos = self._download_json(playlist_url, title)['videos']
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    info = next(v for v in all_videos if v['mpxId'] == mpxid)
 | 
			
		||||
                    break
 | 
			
		||||
                except StopIteration:
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
            if info is None:
 | 
			
		||||
                raise ExtractorError('Could not find video in playlists')
 | 
			
		||||
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'url',
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,4 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
@@ -8,19 +9,34 @@ from ..utils import ExtractorError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NownessIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
 | 
			
		||||
    _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
 | 
			
		||||
        'md5': '068bc0202558c2e391924cb8cc470676',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2520295746001',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'description': 'Candor: The Art of Gesticulation',
 | 
			
		||||
            'uploader': 'Nowness',
 | 
			
		||||
            'title': 'Candor: The Art of Gesticulation',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
 | 
			
		||||
            'md5': '068bc0202558c2e391924cb8cc470676',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2520295746001',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Candor: The Art of Gesticulation',
 | 
			
		||||
                'description': 'Candor: The Art of Gesticulation',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg',
 | 
			
		||||
                'uploader': 'Nowness',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
 | 
			
		||||
            'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '3716354522001',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
 | 
			
		||||
                'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg',
 | 
			
		||||
                'uploader': 'Nowness',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
 
 | 
			
		||||
@@ -38,7 +38,7 @@ class NuvidIE(InfoExtractor):
 | 
			
		||||
            webpage = self._download_webpage(
 | 
			
		||||
                request, video_id, 'Downloading %s page' % format_id)
 | 
			
		||||
            video_url = self._html_search_regex(
 | 
			
		||||
                r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
 | 
			
		||||
                r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
 | 
			
		||||
            if not video_url:
 | 
			
		||||
                continue
 | 
			
		||||
            formats.append({
 | 
			
		||||
@@ -49,19 +49,24 @@ class NuvidIE(InfoExtractor):
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
 | 
			
		||||
        thumbnail = self._html_search_regex(
 | 
			
		||||
            r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
 | 
			
		||||
            webpage, 'thumbnail URL', fatal=False)
 | 
			
		||||
            [r'<span title="([^"]+)">',
 | 
			
		||||
             r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
 | 
			
		||||
        thumbnails = [
 | 
			
		||||
            {
 | 
			
		||||
                'url': thumb_url,
 | 
			
		||||
            } for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
 | 
			
		||||
        ]
 | 
			
		||||
        thumbnail = thumbnails[0]['url'] if thumbnails else None
 | 
			
		||||
        duration = parse_duration(self._html_search_regex(
 | 
			
		||||
            r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
 | 
			
		||||
            r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
 | 
			
		||||
        upload_date = unified_strdate(self._html_search_regex(
 | 
			
		||||
            r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
 | 
			
		||||
            r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,40 +0,0 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import calendar
 | 
			
		||||
import datetime
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
# audios on oe1.orf.at are only available for 7 days, so we can't
 | 
			
		||||
# add tests.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class OE1IE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'oe1.orf.at'
 | 
			
		||||
    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        show_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        data = self._download_json(
 | 
			
		||||
            'http://oe1.orf.at/programm/%s/konsole' % show_id,
 | 
			
		||||
            show_id
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        timestamp = datetime.datetime.strptime('%s %s' % (
 | 
			
		||||
            data['item']['day_label'],
 | 
			
		||||
            data['item']['time']
 | 
			
		||||
        ), '%d.%m.%Y %H:%M')
 | 
			
		||||
        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': show_id,
 | 
			
		||||
            'title': data['item']['title'],
 | 
			
		||||
            'url': data['item']['url_stream'],
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'description': data['item'].get('info'),
 | 
			
		||||
            'timestamp': unix_timestamp
 | 
			
		||||
        }
 | 
			
		||||
@@ -3,23 +3,38 @@ import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import unescapeHTML
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class OoyalaIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
 | 
			
		||||
        'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
 | 
			
		||||
        'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Explaining Data Recovery from Hard Drives and SSDs',
 | 
			
		||||
            'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
 | 
			
		||||
            'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
 | 
			
		||||
            'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Explaining Data Recovery from Hard Drives and SSDs',
 | 
			
		||||
                'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            # Only available for ipad
 | 
			
		||||
            'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
 | 
			
		||||
            'md5': '4b9754921fddb68106e48c142e2a01e6',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Simulation Overview - Levels of Simulation',
 | 
			
		||||
                'description': '',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _url_for_embed_code(embed_code):
 | 
			
		||||
@@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor):
 | 
			
		||||
        player = self._download_webpage(player_url, embedCode)
 | 
			
		||||
        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
 | 
			
		||||
                                        player, 'mobile player url')
 | 
			
		||||
        mobile_player = self._download_webpage(mobile_url, embedCode)
 | 
			
		||||
        videos_info = self._search_regex(
 | 
			
		||||
            r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
 | 
			
		||||
            mobile_player, 'info').replace('\\"','"')
 | 
			
		||||
        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
 | 
			
		||||
        # Looks like some videos are only available for particular devices
 | 
			
		||||
        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
 | 
			
		||||
        # is only available for ipad)
 | 
			
		||||
        # Working around with fetching URLs for all the devices found starting with 'unknown'
 | 
			
		||||
        # until we succeed or eventually fail for each device.
 | 
			
		||||
        devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
 | 
			
		||||
        devices.remove('unknown')
 | 
			
		||||
        devices.insert(0, 'unknown')
 | 
			
		||||
        for device in devices:
 | 
			
		||||
            mobile_player = self._download_webpage(
 | 
			
		||||
                '%s&device=%s' % (mobile_url, device), embedCode,
 | 
			
		||||
                'Downloading mobile player JS for %s device' % device)
 | 
			
		||||
            videos_info = self._search_regex(
 | 
			
		||||
                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
 | 
			
		||||
                mobile_player, 'info', fatal=False, default=None)
 | 
			
		||||
            if videos_info:
 | 
			
		||||
                break
 | 
			
		||||
        if not videos_info:
 | 
			
		||||
            raise ExtractorError('Unable to extract info')
 | 
			
		||||
        videos_info = videos_info.replace('\\"', '"')
 | 
			
		||||
        videos_more_info = self._search_regex(
 | 
			
		||||
            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
 | 
			
		||||
        videos_info = json.loads(videos_info)
 | 
			
		||||
        videos_more_info =json.loads(videos_more_info)
 | 
			
		||||
        videos_more_info = json.loads(videos_more_info)
 | 
			
		||||
 | 
			
		||||
        if videos_more_info.get('lineup'):
 | 
			
		||||
            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
 | 
			
		||||
 
 | 
			
		||||
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
import calendar
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
@@ -12,7 +14,9 @@ from ..utils import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ORFIE(InfoExtractor):
 | 
			
		||||
class ORFTVthekIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'orf:tvthek'
 | 
			
		||||
    IE_DESC = 'ORF TVthek'
 | 
			
		||||
    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
@@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
 | 
			
		||||
            'entries': entries,
 | 
			
		||||
            'id': playlist_id,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Audios on ORF radio are only available for 7 days, so we can't add tests.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ORFOE1IE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'orf:oe1'
 | 
			
		||||
    IE_DESC = 'Radio Österreich 1'
 | 
			
		||||
    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        show_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        data = self._download_json(
 | 
			
		||||
            'http://oe1.orf.at/programm/%s/konsole' % show_id,
 | 
			
		||||
            show_id
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        timestamp = datetime.datetime.strptime('%s %s' % (
 | 
			
		||||
            data['item']['day_label'],
 | 
			
		||||
            data['item']['time']
 | 
			
		||||
        ), '%d.%m.%Y %H:%M')
 | 
			
		||||
        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': show_id,
 | 
			
		||||
            'title': data['item']['title'],
 | 
			
		||||
            'url': data['item']['url_stream'],
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'description': data['item'].get('info'),
 | 
			
		||||
            'timestamp': unix_timestamp
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ORFFM4IE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'orf:fm4'
 | 
			
		||||
    IE_DESC = 'radio FM4'
 | 
			
		||||
    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        show_date = mobj.group('date')
 | 
			
		||||
        show_id = mobj.group('show')
 | 
			
		||||
 | 
			
		||||
        data = self._download_json(
 | 
			
		||||
            'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
 | 
			
		||||
            show_id
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        def extract_entry_dict(info, title, subtitle):
 | 
			
		||||
            return {
 | 
			
		||||
                'id': info['loopStreamId'].replace('.mp3', ''),
 | 
			
		||||
                'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'description': subtitle,
 | 
			
		||||
                'duration': (info['end'] - info['start']) / 1000,
 | 
			
		||||
                'timestamp': info['start'] / 1000,
 | 
			
		||||
                'ext': 'mp3'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'id': show_id,
 | 
			
		||||
            'title': data['title'],
 | 
			
		||||
            'description': data['subtitle'],
 | 
			
		||||
            'entries': entries
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										100
									
								
								youtube_dl/extractor/patreon.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								youtube_dl/extractor/patreon.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,100 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    js_to_json,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PatreonIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.patreon.com/creation?hid=743933',
 | 
			
		||||
            'md5': 'e25505eec1053a6e6813b8ed369875cc',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '743933',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Episode 166: David Smalley of Dogma Debate',
 | 
			
		||||
                'uploader': 'Cognitive Dissonance Podcast',
 | 
			
		||||
                'thumbnail': 're:^https?://.*$',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.patreon.com/creation?hid=754133',
 | 
			
		||||
            'md5': '3eb09345bf44bf60451b8b0b81759d0a',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '754133',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'CD 167 Extra',
 | 
			
		||||
                'uploader': 'Cognitive Dissonance Podcast',
 | 
			
		||||
                'thumbnail': 're:^https?://.*$',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    # Currently Patreon exposes download URL via hidden CSS, so login is not
 | 
			
		||||
    # needed. Keeping this commented for when this inevitably changes.
 | 
			
		||||
    '''
 | 
			
		||||
    def _login(self):
 | 
			
		||||
        (username, password) = self._get_login_info()
 | 
			
		||||
        if username is None:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        login_form = {
 | 
			
		||||
            'redirectUrl': 'http://www.patreon.com/',
 | 
			
		||||
            'email': username,
 | 
			
		||||
            'password': password,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        request = compat_urllib_request.Request(
 | 
			
		||||
            'https://www.patreon.com/processLogin',
 | 
			
		||||
            compat_urllib_parse.urlencode(login_form).encode('utf-8')
 | 
			
		||||
        )
 | 
			
		||||
        login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
 | 
			
		||||
 | 
			
		||||
        if re.search(r'onLoginFailed', login_page):
 | 
			
		||||
            raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
 | 
			
		||||
 | 
			
		||||
    def _real_initialize(self):
 | 
			
		||||
        self._login()
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        title = self._og_search_title(webpage).strip()
 | 
			
		||||
 | 
			
		||||
        attach_fn = self._html_search_regex(
 | 
			
		||||
            r'<div class="attach"><a target="_blank" href="([^"]+)">',
 | 
			
		||||
            webpage, 'attachment URL', default=None)
 | 
			
		||||
        if attach_fn is not None:
 | 
			
		||||
            video_url = 'http://www.patreon.com' + attach_fn
 | 
			
		||||
            thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
            uploader = self._html_search_regex(
 | 
			
		||||
                r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
 | 
			
		||||
        else:
 | 
			
		||||
            playlist_js = self._search_regex(
 | 
			
		||||
                r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
 | 
			
		||||
                webpage, 'playlist JSON')
 | 
			
		||||
            playlist_json = js_to_json(playlist_js)
 | 
			
		||||
            playlist = json.loads(playlist_json)
 | 
			
		||||
            data = playlist[0]
 | 
			
		||||
            video_url = self._proto_relative_url(data['mp3'])
 | 
			
		||||
            thumbnail = self._proto_relative_url(data.get('cover'))
 | 
			
		||||
            uploader = data.get('artist')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
@@ -20,27 +20,74 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
        )
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
 | 
			
		||||
        'md5': 'ce1888486f0908d555a8093cac9a7362',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2365006249',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'A More Perfect Union',
 | 
			
		||||
            'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
 | 
			
		||||
            'duration': 3190,
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
 | 
			
		||||
            'md5': 'ce1888486f0908d555a8093cac9a7362',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2365006249',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'A More Perfect Union',
 | 
			
		||||
                'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
 | 
			
		||||
                'duration': 3190,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
 | 
			
		||||
            'md5': '143c98aa54a346738a3d78f54c925321',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2365297690',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Losing Iraq',
 | 
			
		||||
                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
 | 
			
		||||
                'duration': 5050,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
 | 
			
		||||
            'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2201174722',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
 | 
			
		||||
                'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
 | 
			
		||||
                'duration': 801,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
 | 
			
		||||
            'md5': 'c62859342be2a0358d6c9eb306595978',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '2365297708',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'description': 'md5:68d87ef760660eb564455eb30ca464fe',
 | 
			
		||||
                'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
 | 
			
		||||
                'duration': 6559,
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
    def _extract_ids(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
 | 
			
		||||
        presumptive_id = mobj.group('presumptive_id')
 | 
			
		||||
        display_id = presumptive_id
 | 
			
		||||
        if presumptive_id:
 | 
			
		||||
            webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
            MEDIA_ID_REGEXES = [
 | 
			
		||||
                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed
 | 
			
		||||
                r'class="coveplayerid">([^<]+)<',                       # coveplayer
 | 
			
		||||
            ]
 | 
			
		||||
 | 
			
		||||
            media_id = self._search_regex(
 | 
			
		||||
                MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
 | 
			
		||||
            if media_id:
 | 
			
		||||
                return media_id, presumptive_id
 | 
			
		||||
 | 
			
		||||
            url = self._search_regex(
 | 
			
		||||
                r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
 | 
			
		||||
                r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
 | 
			
		||||
                webpage, 'player URL')
 | 
			
		||||
            mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
 | 
			
		||||
@@ -57,6 +104,11 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
            video_id = mobj.group('id')
 | 
			
		||||
            display_id = video_id
 | 
			
		||||
 | 
			
		||||
        return video_id, display_id
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id, display_id = self._extract_ids(url)
 | 
			
		||||
 | 
			
		||||
        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
 | 
			
		||||
        info = self._download_json(info_url, display_id)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										82
									
								
								youtube_dl/extractor/playfm.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								youtube_dl/extractor/playfm.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,82 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PlayFMIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'play.fm'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
 | 
			
		||||
        'md5': 'c505f8307825a245d0c7ad1850001f22',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '137220',
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
 | 
			
		||||
            'uploader': 'Sven Tasnadi',
 | 
			
		||||
            'uploader_id': 'sventasnadi',
 | 
			
		||||
            'duration': 5627.428,
 | 
			
		||||
            'upload_date': '20140712',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        upload_date = mobj.group('upload_date')
 | 
			
		||||
 | 
			
		||||
        rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
 | 
			
		||||
        req = compat_urllib_request.Request(
 | 
			
		||||
            'http://www.play.fm/flexRead/recording', data=rec_data)
 | 
			
		||||
        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
 | 
			
		||||
        rec_doc = self._download_xml(req, video_id)
 | 
			
		||||
 | 
			
		||||
        error_node = rec_doc.find('./error')
 | 
			
		||||
        if error_node is not None:
 | 
			
		||||
            raise ExtractorError('An error occured: %s (code %s)' % (
 | 
			
		||||
                error_node.text, rec_doc.find('./status').text))
 | 
			
		||||
 | 
			
		||||
        recording = rec_doc.find('./recording')
 | 
			
		||||
        title = recording.find('./title').text
 | 
			
		||||
        view_count = int_or_none(recording.find('./stats/playcount').text)
 | 
			
		||||
        duration = float_or_none(recording.find('./duration').text, scale=1000)
 | 
			
		||||
        thumbnail = recording.find('./image').text
 | 
			
		||||
 | 
			
		||||
        artist = recording.find('./artists/artist')
 | 
			
		||||
        uploader = artist.find('./name').text
 | 
			
		||||
        uploader_id = artist.find('./slug').text
 | 
			
		||||
 | 
			
		||||
        video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
 | 
			
		||||
            'http:', recording.find('./url').text,
 | 
			
		||||
            recording.find('./_class').text, recording.find('./file_id').text,
 | 
			
		||||
            rec_doc.find('./uuid').text, video_id,
 | 
			
		||||
            rec_doc.find('./jingle/file_id').text,
 | 
			
		||||
            'http%3A%2F%2Fwww.play.fm%2Fplayer',
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'filesize': int_or_none(recording.find('./size').text),
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'uploader_id': uploader_id,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
@@ -9,15 +11,16 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PornotubeIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
 | 
			
		||||
        u'file': u'1689755.flv',
 | 
			
		||||
        u'md5': u'374dd6dcedd24234453b295209aa69b6',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"upload_date": u"20090708", 
 | 
			
		||||
            u"title": u"Marilyn-Monroe-Bathing",
 | 
			
		||||
            u"age_limit": 18
 | 
			
		||||
        'url': 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
 | 
			
		||||
        'md5': '374dd6dcedd24234453b295209aa69b6',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1689755',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'upload_date': '20090708',
 | 
			
		||||
            'title': 'Marilyn-Monroe-Bathing',
 | 
			
		||||
            'age_limit': 18
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -32,22 +35,22 @@ class PornotubeIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Get the video URL
 | 
			
		||||
        VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
 | 
			
		||||
        video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
 | 
			
		||||
        video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
 | 
			
		||||
        video_url = compat_urllib_parse.unquote(video_url)
 | 
			
		||||
 | 
			
		||||
        #Get the uploaded date
 | 
			
		||||
        VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
 | 
			
		||||
        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
 | 
			
		||||
        if upload_date: upload_date = unified_strdate(upload_date)
 | 
			
		||||
        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
 | 
			
		||||
        if upload_date:
 | 
			
		||||
            upload_date = unified_strdate(upload_date)
 | 
			
		||||
        age_limit = self._rta_search(webpage)
 | 
			
		||||
 | 
			
		||||
        info = {'id': video_id,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'uploader': None,
 | 
			
		||||
                'upload_date': upload_date,
 | 
			
		||||
                'title': video_title,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'format': 'flv',
 | 
			
		||||
                'age_limit': age_limit}
 | 
			
		||||
 | 
			
		||||
        return [info]
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'format': 'flv',
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,23 +1,23 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import strip_jsonp
 | 
			
		||||
from ..utils import str_or_none
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ReverbNationIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
 | 
			
		||||
        'file': '16965047.mp3',
 | 
			
		||||
        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            "id": "16965047",
 | 
			
		||||
            "ext": "mp3",
 | 
			
		||||
            "title": "MONA LISA",
 | 
			
		||||
            "uploader": "ALKILADOS",
 | 
			
		||||
            "uploader_id": 216429,
 | 
			
		||||
            "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
 | 
			
		||||
            "uploader_id": "216429",
 | 
			
		||||
            "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
@@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
 | 
			
		||||
        song_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        api_res = self._download_json(
 | 
			
		||||
            'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
 | 
			
		||||
                % (song_id, int(time.time() * 1000)),
 | 
			
		||||
            'https://api.reverbnation.com/song/%s' % song_id,
 | 
			
		||||
            song_id,
 | 
			
		||||
            transform_source=strip_jsonp,
 | 
			
		||||
            note='Downloading information of song %s' % song_id
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
@@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
 | 
			
		||||
            'title': api_res.get('name'),
 | 
			
		||||
            'url': api_res.get('url'),
 | 
			
		||||
            'uploader': api_res.get('artist', {}).get('name'),
 | 
			
		||||
            'uploader_id': api_res.get('artist', {}).get('id'),
 | 
			
		||||
            'thumbnail': api_res.get('image', api_res.get('thumbnail')),
 | 
			
		||||
            'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
 | 
			
		||||
            'thumbnail': self._proto_relative_url(
 | 
			
		||||
                api_res.get('image', api_res.get('thumbnail'))),
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'vcodec': 'none',
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/rtlnl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/rtlnl.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,55 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import parse_duration
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RtlXlIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'rtlxl.nl'
 | 
			
		||||
    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'RTL Nieuws - Laat',
 | 
			
		||||
            'description': 'Dagelijks het laatste nieuws uit binnen- en '
 | 
			
		||||
                'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
 | 
			
		||||
                'onze mobiele apps.',
 | 
			
		||||
            'timestamp': 1408051800,
 | 
			
		||||
            'upload_date': '20140814',
 | 
			
		||||
            'duration': 576.880,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # We download the first bytes of the first fragment, it can't be
 | 
			
		||||
            # processed by the f4m downloader beacuse it isn't complete
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        uuid = mobj.group('uuid')
 | 
			
		||||
 | 
			
		||||
        info = self._download_json(
 | 
			
		||||
            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
 | 
			
		||||
            uuid)
 | 
			
		||||
 | 
			
		||||
        material = info['material'][0]
 | 
			
		||||
        episode_info = info['episodes'][0]
 | 
			
		||||
 | 
			
		||||
        f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
 | 
			
		||||
        progname = info['abstracts'][0]['name']
 | 
			
		||||
        subtitle = material['title'] or info['episodes'][0]['name']
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': uuid,
 | 
			
		||||
            'title': '%s - %s' % (progname, subtitle),
 | 
			
		||||
            'formats': self._extract_f4m_formats(f4m_url, uuid),
 | 
			
		||||
            'timestamp': material['original_date'],
 | 
			
		||||
            'description': episode_info['synopsis'],
 | 
			
		||||
            'duration': parse_duration(material.get('duration')),
 | 
			
		||||
        }
 | 
			
		||||
@@ -92,16 +92,7 @@ class RTLnowIE(InfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '153819',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner',
 | 
			
		||||
                'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631',
 | 
			
		||||
                'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg',
 | 
			
		||||
                'upload_date': '20140221',
 | 
			
		||||
                'duration': 2429,
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Only works from Germany',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,70 +1,82 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import base64
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    struct_unpack,
 | 
			
		||||
    remove_end,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _decrypt_url(png):
 | 
			
		||||
    encrypted_data = base64.b64decode(png)
 | 
			
		||||
    text_index = encrypted_data.find(b'tEXt')
 | 
			
		||||
    text_chunk = encrypted_data[text_index - 4:]
 | 
			
		||||
    length = struct_unpack('!I', text_chunk[:4])[0]
 | 
			
		||||
    # Use bytearray to get integers when iterating in both python 2.x and 3.x
 | 
			
		||||
    data = bytearray(text_chunk[8:8 + length])
 | 
			
		||||
    data = [chr(b) for b in data if b != 0]
 | 
			
		||||
    hash_index = data.index('#')
 | 
			
		||||
    alphabet_data = data[:hash_index]
 | 
			
		||||
    url_data = data[hash_index + 1:]
 | 
			
		||||
 | 
			
		||||
    alphabet = []
 | 
			
		||||
    e = 0
 | 
			
		||||
    d = 0
 | 
			
		||||
    for l in alphabet_data:
 | 
			
		||||
        if d == 0:
 | 
			
		||||
            alphabet.append(l)
 | 
			
		||||
            d = e = (e + 1) % 4
 | 
			
		||||
        else:
 | 
			
		||||
            d -= 1
 | 
			
		||||
    url = ''
 | 
			
		||||
    f = 0
 | 
			
		||||
    e = 3
 | 
			
		||||
    b = 1
 | 
			
		||||
    for letter in url_data:
 | 
			
		||||
        if f == 0:
 | 
			
		||||
            l = int(letter) * 10
 | 
			
		||||
            f = 1
 | 
			
		||||
        else:
 | 
			
		||||
            if e == 0:
 | 
			
		||||
                l += int(letter)
 | 
			
		||||
                url += alphabet[l]
 | 
			
		||||
                e = (b + 3) % 4
 | 
			
		||||
                f = 0
 | 
			
		||||
                b += 1
 | 
			
		||||
            else:
 | 
			
		||||
                e -= 1
 | 
			
		||||
 | 
			
		||||
    return url
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RTVEALaCartaIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'rtve.es:alacarta'
 | 
			
		||||
    IE_DESC = 'RTVE a la carta'
 | 
			
		||||
    _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
 | 
			
		||||
        'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
 | 
			
		||||
        'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2491869',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _decrypt_url(self, png):
 | 
			
		||||
        encrypted_data = base64.b64decode(png)
 | 
			
		||||
        text_index = encrypted_data.find(b'tEXt')
 | 
			
		||||
        text_chunk = encrypted_data[text_index-4:]
 | 
			
		||||
        length = struct_unpack('!I', text_chunk[:4])[0]
 | 
			
		||||
        # Use bytearray to get integers when iterating in both python 2.x and 3.x
 | 
			
		||||
        data = bytearray(text_chunk[8:8+length])
 | 
			
		||||
        data = [chr(b) for b in data if b != 0]
 | 
			
		||||
        hash_index = data.index('#')
 | 
			
		||||
        alphabet_data = data[:hash_index]
 | 
			
		||||
        url_data = data[hash_index+1:]
 | 
			
		||||
 | 
			
		||||
        alphabet = []
 | 
			
		||||
        e = 0
 | 
			
		||||
        d = 0
 | 
			
		||||
        for l in alphabet_data:
 | 
			
		||||
            if d == 0:
 | 
			
		||||
                alphabet.append(l)
 | 
			
		||||
                d = e = (e + 1) % 4
 | 
			
		||||
            else:
 | 
			
		||||
                d -= 1
 | 
			
		||||
        url = ''
 | 
			
		||||
        f = 0
 | 
			
		||||
        e = 3
 | 
			
		||||
        b = 1
 | 
			
		||||
        for letter in url_data:
 | 
			
		||||
            if f == 0:
 | 
			
		||||
                l = int(letter)*10
 | 
			
		||||
                f = 1
 | 
			
		||||
            else:
 | 
			
		||||
                if e == 0:
 | 
			
		||||
                    l += int(letter)
 | 
			
		||||
                    url += alphabet[l]
 | 
			
		||||
                    e = (b + 3) % 4
 | 
			
		||||
                    f = 0
 | 
			
		||||
                    b += 1
 | 
			
		||||
                else:
 | 
			
		||||
                    e -= 1
 | 
			
		||||
 | 
			
		||||
        return url
 | 
			
		||||
    }, {
 | 
			
		||||
        'note': 'Live stream',
 | 
			
		||||
        'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1694255',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'TODO',
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
@@ -74,11 +86,57 @@ class RTVEALaCartaIE(InfoExtractor):
 | 
			
		||||
            video_id)['page']['items'][0]
 | 
			
		||||
        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
 | 
			
		||||
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
 | 
			
		||||
        video_url = self._decrypt_url(png)
 | 
			
		||||
        video_url = _decrypt_url(png)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': info['title'],
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'thumbnail': info['image'],
 | 
			
		||||
            'thumbnail': info.get('image'),
 | 
			
		||||
            'page_url': url,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RTVELiveIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'rtve.es:live'
 | 
			
		||||
    IE_DESC = 'RTVE.es live streams'
 | 
			
		||||
    _VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.rtve.es/noticias/directo-la-1/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'directo-la-1',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': 'live stream',
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        start_time = time.gmtime()
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        player_url = self._search_regex(
 | 
			
		||||
            r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
 | 
			
		||||
        title = remove_end(self._og_search_title(webpage), ' en directo')
 | 
			
		||||
        title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
 | 
			
		||||
 | 
			
		||||
        vidplayer_id = self._search_regex(
 | 
			
		||||
            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
 | 
			
		||||
        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
 | 
			
		||||
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
 | 
			
		||||
        video_url = _decrypt_url(png)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'app': 'rtve-live-live?ovpfv=2.1.2',
 | 
			
		||||
            'player_url': player_url,
 | 
			
		||||
            'rtmp_live': True,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor):
 | 
			
		||||
            'upload_date': '20120816',
 | 
			
		||||
            'uploader': 'Howcast',
 | 
			
		||||
            'uploader_id': 'Howcast',
 | 
			
		||||
            'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
 | 
			
		||||
            'description': 're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/sbs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/sbs.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,56 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    remove_end,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SBSIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'sbs.com.au'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # Original URL is handled by the generic IE which finds the iframe:
 | 
			
		||||
        # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
 | 
			
		||||
        'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
 | 
			
		||||
        'md5': '3150cf278965eeabb5b4cea1c963fe0a',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '320403011771',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Dingo Conservation',
 | 
			
		||||
            'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
 | 
			
		||||
            'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
        },
 | 
			
		||||
        'add_ies': ['generic'],
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        release_urls_json = js_to_json(self._search_regex(
 | 
			
		||||
            r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
 | 
			
		||||
            webpage, ''))
 | 
			
		||||
        release_urls = json.loads(release_urls_json)
 | 
			
		||||
        theplatform_url = (
 | 
			
		||||
            release_urls.get('progressive') or release_urls.get('standard'))
 | 
			
		||||
 | 
			
		||||
        title = remove_end(self._og_search_title(webpage), ' (The Feed)')
 | 
			
		||||
        description = self._html_search_meta('description', webpage)
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': theplatform_url,
 | 
			
		||||
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										57
									
								
								youtube_dl/extractor/shared.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								youtube_dl/extractor/shared.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import base64
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SharedIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://shared.sx/0060718775',
 | 
			
		||||
        'md5': '106fefed92a8a2adb8c98e6a0652f49b',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '0060718775',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Bmp4',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        page = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        if re.search(r'>File does not exist<', page) is not None:
 | 
			
		||||
            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 | 
			
		||||
 | 
			
		||||
        download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
 | 
			
		||||
 | 
			
		||||
        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
 | 
			
		||||
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 | 
			
		||||
 | 
			
		||||
        video_page = self._download_webpage(request, video_id, 'Downloading video page')
 | 
			
		||||
 | 
			
		||||
        video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
 | 
			
		||||
        title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
 | 
			
		||||
        filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
 | 
			
		||||
        thumbnail = self._html_search_regex(
 | 
			
		||||
            r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'filesize': filesize,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										68
									
								
								youtube_dl/extractor/snotr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								youtube_dl/extractor/snotr.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,68 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SnotrIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '13708',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Drone flying through fireworks!',
 | 
			
		||||
            'duration': 247,
 | 
			
		||||
            'filesize_approx': 98566144,
 | 
			
		||||
            'description': 'A drone flying through Fourth of July Fireworks',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '530',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'David Letteman - George W. Bush Top 10',
 | 
			
		||||
            'duration': 126,
 | 
			
		||||
            'filesize_approx': 8912896,
 | 
			
		||||
            'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        title = self._og_search_title(webpage)
 | 
			
		||||
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
        video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
 | 
			
		||||
 | 
			
		||||
        view_count = str_to_int(self._html_search_regex(
 | 
			
		||||
            r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
 | 
			
		||||
            webpage, 'view count', fatal=False))
 | 
			
		||||
 | 
			
		||||
        duration = parse_duration(self._html_search_regex(
 | 
			
		||||
            r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>',
 | 
			
		||||
            webpage, 'duration', fatal=False))
 | 
			
		||||
 | 
			
		||||
        filesize_approx = float_or_none(self._html_search_regex(
 | 
			
		||||
            r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>',
 | 
			
		||||
            webpage, 'filesize', fatal=False), invscale=1024 * 1024)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'filesize_approx': filesize_approx,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/sockshare.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/sockshare.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,80 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
)
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SockshareIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)'
 | 
			
		||||
    _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.sockshare.com/file/437BE28B89D799D7',
 | 
			
		||||
        'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '437BE28B89D799D7',
 | 
			
		||||
            'title': 'big_buck_bunny_720p_surround.avi',
 | 
			
		||||
            'ext': 'avi',
 | 
			
		||||
            'thumbnail': 're:^http://.*\.jpg$',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        url = 'http://sockshare.com/file/%s' % video_id
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
 | 
			
		||||
            raise ExtractorError('Video %s does not exist' % video_id,
 | 
			
		||||
                                 expected=True)
 | 
			
		||||
 | 
			
		||||
        confirm_hash = self._html_search_regex(r'''(?x)<input\s+
 | 
			
		||||
            type="hidden"\s+
 | 
			
		||||
            value="([^"]*)"\s+
 | 
			
		||||
            name="hash"
 | 
			
		||||
            ''', webpage, 'hash')
 | 
			
		||||
 | 
			
		||||
        fields = {
 | 
			
		||||
            "hash": confirm_hash,
 | 
			
		||||
            "confirm": "Continue as Free User"
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        post = compat_urllib_parse.urlencode(fields)
 | 
			
		||||
        req = compat_urllib_request.Request(url, post)
 | 
			
		||||
        # Apparently, this header is required for confirmation to work.
 | 
			
		||||
        req.add_header('Host', 'www.sockshare.com')
 | 
			
		||||
        req.add_header('Content-type', 'application/x-www-form-urlencoded')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            req, video_id, 'Downloading video page')
 | 
			
		||||
 | 
			
		||||
        video_url = self._html_search_regex(
 | 
			
		||||
            r'<a href="([^"]*)".+class="download_file_link"',
 | 
			
		||||
            webpage, 'file url')
 | 
			
		||||
        video_url = "http://www.sockshare.com" + video_url
 | 
			
		||||
        title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title')
 | 
			
		||||
        thumbnail = self._html_search_regex(
 | 
			
		||||
            r'<img\s+src="([^"]*)".+?name="bg"',
 | 
			
		||||
            webpage, 'thumbnail')
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'format_id': 'sd',
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': determine_ext(title),
 | 
			
		||||
        }]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
@@ -82,10 +82,10 @@ class SoundcloudIE(InfoExtractor):
 | 
			
		||||
        # downloadable song
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://soundcloud.com/oddsamples/bus-brakes',
 | 
			
		||||
            'md5': 'fee7b8747b09bb755cefd4b853e7249a',
 | 
			
		||||
            'md5': '7624f2351f8a3b2e7cd51522496e7631',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '128590877',
 | 
			
		||||
                'ext': 'wav',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Bus Brakes',
 | 
			
		||||
                'description': 'md5:0170be75dd395c96025d210d261c784e',
 | 
			
		||||
                'uploader': 'oddsamples',
 | 
			
		||||
 
 | 
			
		||||
@@ -53,7 +53,7 @@ class SteamIE(InfoExtractor):
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'upload_date': '20140329',
 | 
			
		||||
            'title': 'FRONTIERS - Final Greenlight Trailer',
 | 
			
		||||
            'description': 'md5:6df4fe8dd494ae811869672b0767e025',
 | 
			
		||||
            'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
 | 
			
		||||
            'uploader': 'AAD Productions',
 | 
			
		||||
            'uploader_id': 'AtomicAgeDogGames',
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,6 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
@@ -10,18 +12,18 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class StreamcloudIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = u'streamcloud.eu'
 | 
			
		||||
    IE_NAME = 'streamcloud.eu'
 | 
			
		||||
    _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
 | 
			
		||||
        u'file': u'skp9j99s4bpz.mp4',
 | 
			
		||||
        u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'youtube-dl test video  \'/\\ ä ↭',
 | 
			
		||||
            u'duration': 9,
 | 
			
		||||
        'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
 | 
			
		||||
        'md5': '6bea4c7fa5daaacc2a946b7146286686',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'skp9j99s4bpz',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'youtube-dl test video  \'/\\ ä ↭',
 | 
			
		||||
        },
 | 
			
		||||
        u'skip': u'Only available from the EU'
 | 
			
		||||
        'skip': 'Only available from the EU'
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor):
 | 
			
		||||
        req = compat_urllib_request.Request(url, post, headers)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            req, video_id, note=u'Downloading video page ...')
 | 
			
		||||
            req, video_id, note='Downloading video page ...')
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<h1[^>]*>([^<]+)<', webpage, u'title')
 | 
			
		||||
            r'<h1[^>]*>([^<]+)<', webpage, 'title')
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            r'file:\s*"([^"]+)"', webpage, u'video URL')
 | 
			
		||||
        duration_str = self._search_regex(
 | 
			
		||||
            r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
 | 
			
		||||
        duration = None if duration_str is None else int(duration_str)
 | 
			
		||||
            r'file:\s*"([^"]+)"', webpage, 'video URL')
 | 
			
		||||
        thumbnail = self._search_regex(
 | 
			
		||||
            r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
 | 
			
		||||
            r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ from ..utils import parse_duration
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SWRMediathekIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
 | 
			
		||||
@@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor):
 | 
			
		||||
            'uploader': 'SWR 2',
 | 
			
		||||
            'uploader_id': '284670',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
 | 
			
		||||
        'md5': '881531487d0633080a8cc88d31ef896f',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Familienspaß am Bodensee',
 | 
			
		||||
            'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
 | 
			
		||||
            'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
            'duration': 1784,
 | 
			
		||||
            'upload_date': '20140727',
 | 
			
		||||
            'uploader': 'SWR Fernsehen BW',
 | 
			
		||||
            'uploader_id': '281130',
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -19,16 +19,6 @@ class TagesschauIE(InfoExtractor):
 | 
			
		||||
            'description': 'md5:69da3c61275b426426d711bde96463ab',
 | 
			
		||||
            'thumbnail': 're:^http:.*\.jpg$',
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
 | 
			
		||||
        'md5': '66652566900963a3f962333579eeffcf',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '5964',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
 | 
			
		||||
            'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
 | 
			
		||||
            'thumbnail': 're:http://.*\.jpg',
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    _FORMATS = {
 | 
			
		||||
 
 | 
			
		||||
@@ -62,7 +62,7 @@ class TeacherTubeIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_meta('title', webpage, 'title')
 | 
			
		||||
        title = self._html_search_meta('title', webpage, 'title', fatal=True)
 | 
			
		||||
        TITLE_SUFFIX = ' - TeacherTube'
 | 
			
		||||
        if title.endswith(TITLE_SUFFIX):
 | 
			
		||||
            title = title[:-len(TITLE_SUFFIX)].strip()
 | 
			
		||||
@@ -101,7 +101,11 @@ class TeacherTubeUserIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
 | 
			
		||||
 | 
			
		||||
    _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">'
 | 
			
		||||
    _MEDIA_RE = r'''(?sx)
 | 
			
		||||
        class="?sidebar_thumb_time"?>[0-9:]+</div>
 | 
			
		||||
        \s*
 | 
			
		||||
        <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
@@ -111,14 +115,12 @@ class TeacherTubeUserIE(InfoExtractor):
 | 
			
		||||
        webpage = self._download_webpage(url, user_id)
 | 
			
		||||
        urls.extend(re.findall(self._MEDIA_RE, webpage))
 | 
			
		||||
        
 | 
			
		||||
        pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1]
 | 
			
		||||
        pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
 | 
			
		||||
        for p in pages:
 | 
			
		||||
            more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
 | 
			
		||||
            webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1))
 | 
			
		||||
            urls.extend(re.findall(self._MEDIA_RE, webpage))
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for url in urls:
 | 
			
		||||
            entries.append(self.url_result(url, 'TeacherTube'))
 | 
			
		||||
            webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
 | 
			
		||||
            video_urls = re.findall(self._MEDIA_RE, webpage)
 | 
			
		||||
            urls.extend(video_urls)
 | 
			
		||||
 | 
			
		||||
        entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
 | 
			
		||||
        return self.playlist_result(entries, user_id)
 | 
			
		||||
 
 | 
			
		||||
@@ -37,7 +37,7 @@ class TeamcocoIE(InfoExtractor):
 | 
			
		||||
        video_id = mobj.group("video_id")
 | 
			
		||||
        if not video_id:
 | 
			
		||||
            video_id = self._html_search_regex(
 | 
			
		||||
                r'<article class="video" data-id="(\d+?)"',
 | 
			
		||||
                r'data-node-id="(\d+?)"',
 | 
			
		||||
                webpage, 'video id')
 | 
			
		||||
 | 
			
		||||
        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,6 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										85
									
								
								youtube_dl/extractor/tvplay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								youtube_dl/extractor/tvplay.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,85 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
    qualities,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TVPlayIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '418113',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'Kādi ir īri? - Viņas melo labāk',
 | 
			
		||||
                'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
 | 
			
		||||
                'duration': 25,
 | 
			
		||||
                'timestamp': 1406097056,
 | 
			
		||||
                'upload_date': '20140723',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                # rtmp download
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        video = self._download_json(
 | 
			
		||||
            'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
 | 
			
		||||
 | 
			
		||||
        if video['is_geo_blocked']:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'This content is not available in your country due to copyright reasons', expected=True)
 | 
			
		||||
 | 
			
		||||
        streams = self._download_json(
 | 
			
		||||
            'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
 | 
			
		||||
 | 
			
		||||
        quality = qualities(['hls', 'medium', 'high'])
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_id, video_url in streams['streams'].items():
 | 
			
		||||
            if not video_url:
 | 
			
		||||
                continue
 | 
			
		||||
            fmt = {
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'preference': quality(format_id),
 | 
			
		||||
            }
 | 
			
		||||
            if video_url.startswith('rtmp'):
 | 
			
		||||
                m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
 | 
			
		||||
                if not m:
 | 
			
		||||
                    continue
 | 
			
		||||
                fmt.update({
 | 
			
		||||
                    'ext': 'flv',
 | 
			
		||||
                    'url': m.group('url'),
 | 
			
		||||
                    'app': m.group('app'),
 | 
			
		||||
                    'play_path': m.group('playpath'),
 | 
			
		||||
                })
 | 
			
		||||
            else:
 | 
			
		||||
                fmt.update({
 | 
			
		||||
                    'url': video_url,
 | 
			
		||||
                })
 | 
			
		||||
            formats.append(fmt)
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': video['title'],
 | 
			
		||||
            'description': video['description'],
 | 
			
		||||
            'duration': video['duration'],
 | 
			
		||||
            'timestamp': parse_iso8601(video['created_at']),
 | 
			
		||||
            'view_count': video['views']['total'],
 | 
			
		||||
            'age_limit': video.get('age_limit', 0),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/ubu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/ubu.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,56 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import int_or_none
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class UbuIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://ubu.com/film/her_noise.html',
 | 
			
		||||
        'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'her_noise',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Her Noise - The Making Of (2007)',
 | 
			
		||||
            'duration': 3600,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title')
 | 
			
		||||
 | 
			
		||||
        duration = int_or_none(self._html_search_regex(
 | 
			
		||||
            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
 | 
			
		||||
        if duration:
 | 
			
		||||
            duration *= 60
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        FORMAT_REGEXES = [
 | 
			
		||||
            ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
 | 
			
		||||
            ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        for format_id, format_regex in FORMAT_REGEXES:
 | 
			
		||||
            m = re.search(format_regex, webpage)
 | 
			
		||||
            if m:
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': m.group(1),
 | 
			
		||||
                    'format_id': format_id,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
@@ -177,6 +177,7 @@ class VevoIE(InfoExtractor):
 | 
			
		||||
            self._downloader.report_warning(
 | 
			
		||||
                'Cannot download SMIL information, falling back to JSON ..')
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
        timestamp_ms = int(self._search_regex(
 | 
			
		||||
            r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										68
									
								
								youtube_dl/extractor/vidme.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								youtube_dl/extractor/vidme.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,68 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VidmeIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://vid.me/QNB',
 | 
			
		||||
        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'QNB',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Fishing for piranha - the easy way',
 | 
			
		||||
            'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
 | 
			
		||||
            'duration': 119.92,
 | 
			
		||||
            'timestamp': 1406313244,
 | 
			
		||||
            'upload_date': '20140725',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpg',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(webpage)
 | 
			
		||||
        description = self._og_search_description(webpage, default='')
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
        timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
 | 
			
		||||
        width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
 | 
			
		||||
        height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
 | 
			
		||||
        duration = float_or_none(self._html_search_regex(
 | 
			
		||||
            r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
 | 
			
		||||
        view_count = str_to_int(self._html_search_regex(
 | 
			
		||||
            r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
 | 
			
		||||
        like_count = str_to_int(self._html_search_regex(
 | 
			
		||||
            r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
 | 
			
		||||
            webpage, 'like count', fatal=False))
 | 
			
		||||
        comment_count = str_to_int(self._html_search_regex(
 | 
			
		||||
            r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
 | 
			
		||||
            webpage, 'comment count', fatal=False))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'width': width,
 | 
			
		||||
            'height': height,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'like_count': like_count,
 | 
			
		||||
            'comment_count': comment_count,
 | 
			
		||||
        }
 | 
			
		||||
@@ -98,7 +98,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '54469442',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software',
 | 
			
		||||
                'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
 | 
			
		||||
                'uploader': 'The BLN & Business of Software',
 | 
			
		||||
                'uploader_id': 'theblnbusinessofsoftware',
 | 
			
		||||
                'duration': 3610,
 | 
			
		||||
@@ -121,6 +121,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
                'videopassword': 'youtube-dl',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://vimeo.com/channels/keypeele/75629013',
 | 
			
		||||
            'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
 | 
			
		||||
            'note': 'Video is freely available via original URL '
 | 
			
		||||
                    'and protected with password when accessed via http://vimeo.com/75629013',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '75629013',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Key & Peele: Terrorist Interrogation',
 | 
			
		||||
                'description': 'md5:8678b246399b070816b12313e8b4eb5c',
 | 
			
		||||
                'uploader_id': 'atencio',
 | 
			
		||||
                'uploader': 'Peter Atencio',
 | 
			
		||||
                'duration': 187,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://vimeo.com/76979871',
 | 
			
		||||
            'md5': '3363dd6ffebe3784d56f4132317fd446',
 | 
			
		||||
@@ -136,6 +151,19 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
                'duration': 62,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'note': 'video player needs Referer',
 | 
			
		||||
            'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
 | 
			
		||||
            'md5': '6295fdab8f4bf6a002d058b2c6dce276',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '91613211',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
 | 
			
		||||
                'uploader': 'DevWeek Events',
 | 
			
		||||
                'duration': 2773,
 | 
			
		||||
                'thumbnail': 're:^https?://.*\.jpg$',
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
@@ -190,14 +218,14 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
        if data is not None:
 | 
			
		||||
            headers = headers.copy()
 | 
			
		||||
            headers.update(data)
 | 
			
		||||
        if 'Referer' not in headers:
 | 
			
		||||
            headers['Referer'] = url
 | 
			
		||||
 | 
			
		||||
        # Extract ID from URL
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        if mobj.group('pro') or mobj.group('player'):
 | 
			
		||||
            url = 'http://player.vimeo.com/video/' + video_id
 | 
			
		||||
        else:
 | 
			
		||||
            url = 'https://vimeo.com/' + video_id
 | 
			
		||||
 | 
			
		||||
        # Retrieve video webpage to extract further information
 | 
			
		||||
        request = compat_urllib_request.Request(url, None, headers)
 | 
			
		||||
@@ -263,7 +291,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
 | 
			
		||||
        if video_thumbnail is None:
 | 
			
		||||
            video_thumbs = config["video"].get("thumbs")
 | 
			
		||||
            if video_thumbs and isinstance(video_thumbs, dict):
 | 
			
		||||
                _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
 | 
			
		||||
                _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
 | 
			
		||||
 | 
			
		||||
        # Extract video description
 | 
			
		||||
        video_description = None
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VodlockerIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://vodlocker.com/e8wvyzz4sl42',
 | 
			
		||||
@@ -44,7 +44,7 @@ class VodlockerIE(InfoExtractor):
 | 
			
		||||
                req, video_id, 'Downloading video page')
 | 
			
		||||
 | 
			
		||||
        title = self._search_regex(
 | 
			
		||||
            r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
 | 
			
		||||
            r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
 | 
			
		||||
        thumbnail = self._search_regex(
 | 
			
		||||
            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
 | 
			
		||||
        url = self._search_regex(
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import int_or_none
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    compat_str,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VubeIE(InfoExtractor):
 | 
			
		||||
@@ -20,12 +23,15 @@ class VubeIE(InfoExtractor):
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Chiara Grispo - Price Tag by Jessie J',
 | 
			
		||||
                'description': 'md5:8ea652a1f36818352428cb5134933313',
 | 
			
		||||
                'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
 | 
			
		||||
                'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$',
 | 
			
		||||
                'uploader': 'Chiara.Grispo',
 | 
			
		||||
                'uploader_id': '1u3hX0znhP',
 | 
			
		||||
                'timestamp': 1388743358,
 | 
			
		||||
                'upload_date': '20140103',
 | 
			
		||||
                'duration': 170.56
 | 
			
		||||
                'duration': 170.56,
 | 
			
		||||
                'like_count': int,
 | 
			
		||||
                'dislike_count': int,
 | 
			
		||||
                'comment_count': int,
 | 
			
		||||
                'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
@@ -36,12 +42,33 @@ class VubeIE(InfoExtractor):
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'My 7 year old Sister and I singing "Alive" by Krewella',
 | 
			
		||||
                'description': 'md5:40bcacb97796339f1690642c21d56f4a',
 | 
			
		||||
                'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102265d5a9f-0f17-4f6b-5753-adf08484ee1e.jpg',
 | 
			
		||||
                'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$',
 | 
			
		||||
                'uploader': 'Seraina',
 | 
			
		||||
                'uploader_id': 'XU9VE2BQ2q',
 | 
			
		||||
                'timestamp': 1396492438,
 | 
			
		||||
                'upload_date': '20140403',
 | 
			
		||||
                'duration': 240.107
 | 
			
		||||
                'duration': 240.107,
 | 
			
		||||
                'like_count': int,
 | 
			
		||||
                'dislike_count': int,
 | 
			
		||||
                'comment_count': int,
 | 
			
		||||
                'categories': ['seraina', 'jessica', 'krewella', 'alive'],
 | 
			
		||||
            }
 | 
			
		||||
        }, {
 | 
			
		||||
            'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
 | 
			
		||||
            'md5': '0584fc13b50f887127d9d1007589d27f',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '0nmsMY5vEq',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Frozen - Let It Go Cover by Siren Gene',
 | 
			
		||||
                'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
 | 
			
		||||
                'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
 | 
			
		||||
                'uploader': 'Siren',
 | 
			
		||||
                'timestamp': 1395448018,
 | 
			
		||||
                'upload_date': '20140322',
 | 
			
		||||
                'duration': 221.788,
 | 
			
		||||
                'like_count': int,
 | 
			
		||||
                'dislike_count': int,
 | 
			
		||||
                'comment_count': int,
 | 
			
		||||
                'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
@@ -51,39 +78,52 @@ class VubeIE(InfoExtractor):
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        video = self._download_json(
 | 
			
		||||
            'http://vube.com/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
 | 
			
		||||
            'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
 | 
			
		||||
 | 
			
		||||
        public_id = video['public_id']
 | 
			
		||||
 | 
			
		||||
        formats = [
 | 
			
		||||
            {
 | 
			
		||||
                'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
 | 
			
		||||
                'height': int(fmt['height']),
 | 
			
		||||
                'abr': int(fmt['audio_bitrate']),
 | 
			
		||||
                'vbr': int(fmt['video_bitrate']),
 | 
			
		||||
                'format_id': fmt['media_resolution_id']
 | 
			
		||||
            } for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed'
 | 
			
		||||
        ]
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        for media in video['media'].get('video', []) + video['media'].get('audio', []):
 | 
			
		||||
            if media['transcoding_status'] != 'processed':
 | 
			
		||||
                continue
 | 
			
		||||
            fmt = {
 | 
			
		||||
                'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
 | 
			
		||||
                'abr': int(media['audio_bitrate']),
 | 
			
		||||
                'format_id': compat_str(media['media_resolution_id']),
 | 
			
		||||
            }
 | 
			
		||||
            vbr = int(media['video_bitrate'])
 | 
			
		||||
            if vbr:
 | 
			
		||||
                fmt.update({
 | 
			
		||||
                    'vbr': vbr,
 | 
			
		||||
                    'height': int(media['height']),
 | 
			
		||||
                })
 | 
			
		||||
            formats.append(fmt)
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        title = video['title']
 | 
			
		||||
        description = video.get('description')
 | 
			
		||||
        thumbnail = video['thumbnail_src']
 | 
			
		||||
        if thumbnail.startswith('//'):
 | 
			
		||||
            thumbnail = 'http:' + thumbnail
 | 
			
		||||
        uploader = video['user_alias']
 | 
			
		||||
        uploader_id = video['user_url_id']
 | 
			
		||||
        timestamp = int(video['upload_time'])
 | 
			
		||||
        thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
 | 
			
		||||
        uploader = video.get('user_alias') or video.get('channel')
 | 
			
		||||
        timestamp = int_or_none(video.get('upload_time'))
 | 
			
		||||
        duration = video['duration']
 | 
			
		||||
        view_count = video.get('raw_view_count')
 | 
			
		||||
        like_count = video.get('total_likes')
 | 
			
		||||
        dislike_count= video.get('total_hates')
 | 
			
		||||
        dislike_count = video.get('total_hates')
 | 
			
		||||
 | 
			
		||||
        comment = self._download_json(
 | 
			
		||||
            'http://vube.com/api/video/%s/comment' % video_id, video_id, 'Downloading video comment JSON')
 | 
			
		||||
        comments = video.get('comments')
 | 
			
		||||
        comment_count = None
 | 
			
		||||
        if comments is None:
 | 
			
		||||
            comment_data = self._download_json(
 | 
			
		||||
                'http://vube.com/api/video/%s/comment' % video_id,
 | 
			
		||||
                video_id, 'Downloading video comment JSON', fatal=False)
 | 
			
		||||
            if comment_data is not None:
 | 
			
		||||
                comment_count = int_or_none(comment_data.get('total'))
 | 
			
		||||
        else:
 | 
			
		||||
            comment_count = len(comments)
 | 
			
		||||
 | 
			
		||||
        comment_count = int_or_none(comment.get('total'))
 | 
			
		||||
        categories = [tag['text'] for tag in video['tags']]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
@@ -92,11 +132,11 @@ class VubeIE(InfoExtractor):
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'uploader_id': uploader_id,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'like_count': like_count,
 | 
			
		||||
            'dislike_count': dislike_count,
 | 
			
		||||
            'comment_count': comment_count,
 | 
			
		||||
            'categories': categories,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -2,29 +2,43 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import hashlib
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import unified_strdate
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WatIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
 | 
			
		||||
    _VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
 | 
			
		||||
    IE_NAME = 'wat.tv'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '10631273',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'World War Z - Philadelphia VOST',
 | 
			
		||||
            'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
 | 
			
		||||
            'md5': 'ce70e9223945ed26a8056d413ca55dc9',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '11713067',
 | 
			
		||||
                'display_id': 'soupe-figues-l-orange-aux-epices',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Soupe de figues à l\'orange et aux épices',
 | 
			
		||||
                'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
 | 
			
		||||
                'upload_date': '20140819',
 | 
			
		||||
                'duration': 120,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # Sometimes wat serves the whole file with the --test option
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
 | 
			
		||||
            'md5': 'fbc84e4378165278e743956d9c1bf16b',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '11713075',
 | 
			
		||||
                'display_id': 'gregory-lemarchal-voix-ange',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
 | 
			
		||||
                'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
 | 
			
		||||
                'upload_date': '20140816',
 | 
			
		||||
                'duration': 2910,
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def download_video_info(self, real_id):
 | 
			
		||||
        # 'contentv4' is used in the website, but it also returns the related
 | 
			
		||||
@@ -36,13 +50,20 @@ class WatIE(InfoExtractor):
 | 
			
		||||
        def real_id_for_chapter(chapter):
 | 
			
		||||
            return chapter['tc_start'].split('-')[0]
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        short_id = mobj.group('shortID')
 | 
			
		||||
        webpage = self._download_webpage(url, short_id)
 | 
			
		||||
        short_id = mobj.group('short_id')
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
        webpage = self._download_webpage(url, display_id or short_id)
 | 
			
		||||
        real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
 | 
			
		||||
 | 
			
		||||
        video_info = self.download_video_info(real_id)
 | 
			
		||||
 | 
			
		||||
        geo_list = video_info.get('geoList')
 | 
			
		||||
        country = geo_list[0] if geo_list else ''
 | 
			
		||||
 | 
			
		||||
        chapters = video_info['chapters']
 | 
			
		||||
        first_chapter = chapters[0]
 | 
			
		||||
        files = video_info['files']
 | 
			
		||||
        first_file = files[0]
 | 
			
		||||
 | 
			
		||||
        if real_id_for_chapter(first_chapter) != real_id:
 | 
			
		||||
            self.to_screen('Multipart video detected')
 | 
			
		||||
@@ -61,12 +82,47 @@ class WatIE(InfoExtractor):
 | 
			
		||||
            upload_date = unified_strdate(first_chapter['date_diffusion'])
 | 
			
		||||
        # Otherwise we can continue and extract just one part, we have to use
 | 
			
		||||
        # the short id for getting the video url
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
 | 
			
		||||
            'format_id': 'Mobile',
 | 
			
		||||
        }]
 | 
			
		||||
 | 
			
		||||
        fmts = [('SD', 'web')]
 | 
			
		||||
        if first_file.get('hasHD'):
 | 
			
		||||
            fmts.append(('HD', 'webhd'))
 | 
			
		||||
 | 
			
		||||
        def compute_token(param):
 | 
			
		||||
            timestamp = '%08x' % int(self._download_webpage(
 | 
			
		||||
                'http://www.wat.tv/servertime', real_id,
 | 
			
		||||
                'Downloading server time').split('|')[0])
 | 
			
		||||
            magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
 | 
			
		||||
            return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
 | 
			
		||||
 | 
			
		||||
        for fmt in fmts:
 | 
			
		||||
            webid = '/%s/%s' % (fmt[1], real_id)
 | 
			
		||||
            video_url = self._download_webpage(
 | 
			
		||||
                'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
 | 
			
		||||
                real_id,
 | 
			
		||||
                'Downloding %s video URL' % fmt[0],
 | 
			
		||||
                'Failed to download %s video URL' % fmt[0],
 | 
			
		||||
                False)
 | 
			
		||||
            if not video_url:
 | 
			
		||||
                continue
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'format_id': fmt[0],
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': real_id,
 | 
			
		||||
            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'title': first_chapter['title'],
 | 
			
		||||
            'thumbnail': first_chapter['preview'],
 | 
			
		||||
            'description': first_chapter['description'],
 | 
			
		||||
            'view_count': video_info['views'],
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'duration': first_file['duration'],
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										52
									
								
								youtube_dl/extractor/wayofthemaster.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								youtube_dl/extractor/wayofthemaster.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,52 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WayOfTheMasterIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.wayofthemaster.com/hbks.shtml',
 | 
			
		||||
        'md5': '5316b57487ada8480606a93cb3d18d24',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'hbks',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Intelligent Design vs. Evolution',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._search_regex(
 | 
			
		||||
            r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
 | 
			
		||||
            webpage, 'title', default=None)
 | 
			
		||||
        if title is None:
 | 
			
		||||
            title = self._html_search_regex(
 | 
			
		||||
                r'<title>(.*?)</title>', webpage, 'page title')
 | 
			
		||||
 | 
			
		||||
        url_base = self._search_regex(
 | 
			
		||||
            r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
 | 
			
		||||
            webpage, 'URL base')
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'format_id': 'low',
 | 
			
		||||
            'quality': 1,
 | 
			
		||||
            'url': url_base + '_low.mp4',
 | 
			
		||||
        }, {
 | 
			
		||||
            'format_id': 'high',
 | 
			
		||||
            'quality': 2,
 | 
			
		||||
            'url': url_base + '_high.mp4',
 | 
			
		||||
        }]
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
@@ -55,14 +55,14 @@ class WDRIE(InfoExtractor):
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html',
 | 
			
		||||
            'md5': '24e83813e832badb0a8d7d1ef9ef0691',
 | 
			
		||||
            'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
 | 
			
		||||
            'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'mdb-463528',
 | 
			
		||||
                'id': 'mdb-478135',
 | 
			
		||||
                'ext': 'mp3',
 | 
			
		||||
                'title': 'Süpersong: Soul Bossa Nova',
 | 
			
		||||
                'title': 'Flavia Coelho: Amar é Amar',
 | 
			
		||||
                'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
 | 
			
		||||
                'upload_date': '20140630',
 | 
			
		||||
                'upload_date': '20140717',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
@@ -81,7 +81,7 @@ class WDRIE(InfoExtractor):
 | 
			
		||||
            ]
 | 
			
		||||
            return self.playlist_result(entries, page_id)
 | 
			
		||||
 | 
			
		||||
        flashvars = compat_urlparse.parse_qs(
 | 
			
		||||
        flashvars = compat_parse_qs(
 | 
			
		||||
            self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
 | 
			
		||||
 | 
			
		||||
        page_id = flashvars['trackerClipId'][0]
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										57
									
								
								youtube_dl/extractor/xboxclips.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								youtube_dl/extractor/xboxclips.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,57 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XboxClipsIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
 | 
			
		||||
        'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Iabdulelah playing Upload Studio',
 | 
			
		||||
            'filesize_approx': 28101836.8,
 | 
			
		||||
            'timestamp': 1407388500,
 | 
			
		||||
            'upload_date': '20140807',
 | 
			
		||||
            'duration': 56,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_url = self._html_search_regex(
 | 
			
		||||
            r'>Link: <a href="([^"]+)">', webpage, 'video URL')
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
 | 
			
		||||
        timestamp = parse_iso8601(self._html_search_regex(
 | 
			
		||||
            r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
 | 
			
		||||
        filesize = float_or_none(self._html_search_regex(
 | 
			
		||||
            r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
 | 
			
		||||
        duration = int_or_none(self._html_search_regex(
 | 
			
		||||
            r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
 | 
			
		||||
        view_count = int_or_none(self._html_search_regex(
 | 
			
		||||
            r'>Views: (\d+)<', webpage, 'view count', fatal=False))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'filesize_approx': filesize,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
        }
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user