Compare commits
	
		
			245 Commits
		
	
	
		
			2017.02.22
			...
			2017.04.03
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | b022f4f600 | ||
|   | e2435ba5f3 | ||
|   | a9bb61a425 | ||
|   | dbf70c489f | ||
|   | 61e2331ad8 | ||
|   | fd47550885 | ||
|   | 4457823dda | ||
|   | b3633fa0ce | ||
|   | b56e41a701 | ||
|   | a76c25146a | ||
|   | 361f293ab8 | ||
|   | b8d8cced9b | ||
|   | 51342717cd | ||
|   | 48ab554feb | ||
|   | a6f3a162f3 | ||
|   | 91399b2fcc | ||
|   | eecea00d36 | ||
|   | 2cd668ee59 | ||
|   | ca77b92f94 | ||
|   | e97fc8d6b8 | ||
|   | be61efdf17 | ||
|   | 77c8ebe631 | ||
|   | 7453999580 | ||
|   | 1640eb0961 | ||
|   | 3e943cfe09 | ||
|   | 82be732b17 | ||
|   | 639e5b2a84 | ||
|   | 128244657b | ||
|   | 12ee65ea0d | ||
|   | aea1dccbd0 | ||
|   | 9e691da067 | ||
|   | 82eefd0be0 | ||
|   | f7923a4c39 | ||
|   | cc63259d18 | ||
|   | 2bfaf89b6c | ||
|   | 4f06c1c9fc | ||
|   | 942b44a052 | ||
|   | a426ef6d78 | ||
|   | 41c5e60dd5 | ||
|   | d212c93d16 | ||
|   | 15495cf3e5 | ||
|   | 5b7cc56b05 | ||
|   | 590bc6f6a1 | ||
|   | 51098426b8 | ||
|   | c73e330e7a | ||
|   | fb4fc44928 | ||
|   | 03486dbb01 | ||
|   | 51ef4919df | ||
|   | d66d43c554 | ||
|   | 610a6d1053 | ||
|   | c6c22e984d | ||
|   | d97729c83a | ||
|   | 7aa0ee321b | ||
|   | e8e4cc5a6a | ||
|   | c7301e677b | ||
|   | 048086920b | ||
|   | 1088d76da6 | ||
|   | 31a1214076 | ||
|   | d0ba55871e | ||
|   | 54b960f340 | ||
|   | a3ccd6bd11 | ||
|   | 7963b6cba8 | ||
|   | bea7af6947 | ||
|   | a5d783f525 | ||
|   | d0572557c2 | ||
|   | 52d5ecabd5 | ||
|   | b0f7f21cb9 | ||
|   | 579c99a284 | ||
|   | ca5ed022e9 | ||
|   | 391d076d7c | ||
|   | c183e14f89 | ||
|   | 093dad9e25 | ||
|   | e8686e51d7 | ||
|   | 8e5a7c5e67 | ||
|   | e1e35d1ac6 | ||
|   | 21fbf0f955 | ||
|   | 97952bdb78 | ||
|   | 8a8cc339b6 | ||
|   | 957f453429 | ||
|   | 0e9a73e612 | ||
|   | 0ecdd3adbd | ||
|   | 9487ce03e9 | ||
|   | 45e6ad21b4 | ||
|   | 68220649fa | ||
|   | 46b18f2349 | ||
|   | 772b5ff57f | ||
|   | f68ef1e2ab | ||
|   | febfe1e262 | ||
|   | 5f0daab1ca | ||
|   | 2a721cdff2 | ||
|   | e7a51a4c02 | ||
|   | 3e5856d860 | ||
|   | ea883a687c | ||
|   | 7f3590c43b | ||
|   | 7d539ee10a | ||
|   | 6ad476079d | ||
|   | 0efbc6b56d | ||
|   | 21bfcd3d6e | ||
|   | b51dc9db0e | ||
|   | a309684285 | ||
|   | ba448445b8 | ||
|   | 5db83d79bf | ||
|   | 2a751e137f | ||
|   | 398887b4c0 | ||
|   | 66bf351f80 | ||
|   | 9d08963022 | ||
|   | e313d209c2 | ||
|   | ff9d509d20 | ||
|   | c1795ca6c8 | ||
|   | 8c99623259 | ||
|   | 57b0ddb35f | ||
|   | a28f8d7396 | ||
|   | 7049799470 | ||
|   | 4605c94d1a | ||
|   | a8e687a4da | ||
|   | f9e5c92c94 | ||
|   | c2ee861c6d | ||
|   | bd34c32bd7 | ||
|   | f802c48660 | ||
|   | 76bee08fe7 | ||
|   | 2913821723 | ||
|   | 0e7f9a9b48 | ||
|   | 0cf2352e85 | ||
|   | 0f6b87d067 | ||
|   | d7344d33b1 | ||
|   | b08cc749d6 | ||
|   | b68a812ea8 | ||
|   | 2e76bdc850 | ||
|   | fe646a2f10 | ||
|   | 9df53ea36e | ||
|   | d7d7f84c95 | ||
|   | dccd0ab35d | ||
|   | 80146dcc6c | ||
|   | e30ccf7047 | ||
|   | 54a3a8827b | ||
|   | 92cb5763f4 | ||
|   | da92da4b88 | ||
|   | 1664702626 | ||
|   | 3f116b189b | ||
|   | 4b5de77bdb | ||
|   | 96182695e4 | ||
|   | fc11ad3833 | ||
|   | d2b64e04b4 | ||
|   | 5dd376345b | ||
|   | 1a2192cb90 | ||
|   | 0236cd0dfd | ||
|   | ed0cf9b383 | ||
|   | a50862b735 | ||
|   | 6d0fe752bf | ||
|   | afa4597618 | ||
|   | 75027364ba | ||
|   | 5316566edc | ||
|   | c64c03be35 | ||
|   | bcefc59279 | ||
|   | 6f211dc936 | ||
|   | f24c1e5584 | ||
|   | 466274fe9a | ||
|   | 30f8f142d4 | ||
|   | a3ba8a7acf | ||
|   | 054a587de8 | ||
|   | 64b7ccef3e | ||
|   | 6f4e4132d8 | ||
|   | eb3079b6ce | ||
|   | bc82f22879 | ||
|   | 4d058c9862 | ||
|   | d16f27ca27 | ||
|   | cbb127568a | ||
|   | d02d4fa0a9 | ||
|   | 692fa200ca | ||
|   | 9bae185ba6 | ||
|   | 4d345bf17b | ||
|   | 250eea6821 | ||
|   | 28d15b73f8 | ||
|   | 11bb6ad1a5 | ||
|   | c9612c0487 | ||
|   | af5049f128 | ||
|   | 158af5242e | ||
|   | 40df485f55 | ||
|   | 4b8a984c67 | ||
|   | 83e8fce628 | ||
|   | aa9cc2ecbf | ||
|   | 1dc24093f8 | ||
|   | 11bae9cdde | ||
|   | 43b38424a9 | ||
|   | 948519b35d | ||
|   | 87dadd456a | ||
|   | 7c4aa6fd6f | ||
|   | 9bd05b5a18 | ||
|   | 0a5445ddbe | ||
|   | f48409c7ac | ||
|   | c9619f0a17 | ||
|   | f4c68ba372 | ||
|   | ef48a1175d | ||
|   | c6184bcf7b | ||
|   | 18abb74376 | ||
|   | dbc01fdb6f | ||
|   | f264c62334 | ||
|   | 0dc5a86a32 | ||
|   | 0e879f432a | ||
|   | 892b47ab6c | ||
|   | fdeea72611 | ||
|   | 7fd4655256 | ||
|   | fd5c4aab59 | ||
|   | 8878789f11 | ||
|   | a5cf17989b | ||
|   | b3aec47665 | ||
|   | 9d0c08a02c | ||
|   | e498758b9c | ||
|   | 5fc8d89361 | ||
|   | d374d943f3 | ||
|   | 103f8c8d36 | ||
|   | 922ab7840b | ||
|   | 831217291a | ||
|   | db182c63fb | ||
|   | eeb0a95684 | ||
|   | 231bcd0b6b | ||
|   | 204efc8509 | ||
|   | 5d3a51e1b9 | ||
|   | ad3033037c | ||
|   | f3bc281239 | ||
|   | 441d7a32e5 | ||
|   | 51ed496307 | ||
|   | 68f17a9c2d | ||
|   | 39e7277ed1 | ||
|   | 42dcdbe11c | ||
|   | 6b097cff27 | ||
|   | f2f7961820 | ||
|   | be5df5ee31 | ||
|   | f2980fddeb | ||
|   | 0f57447de7 | ||
|   | 19f3821821 | ||
|   | 8e1409fd80 | ||
|   | 050f143c12 | ||
|   | fafc2bf5a9 | ||
|   | b3175982c3 | ||
|   | 89db639dfe | ||
|   | d0d9ade486 | ||
|   | 28572a1a0b | ||
|   | 0f3d41b44d | ||
|   | d5fd9a3be3 | ||
|   | ada77fa544 | ||
|   | 9e03aa75c7 | ||
|   | 30eaa3a702 | ||
|   | c59f703610 | ||
|   | bc61c80c14 | 
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @@ -6,8 +6,8 @@ | ||||
|  | ||||
| --- | ||||
|  | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.22** | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.03** | ||||
|  | ||||
| ### Before submitting an *issue* make sure you have: | ||||
| - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections | ||||
| @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> | ||||
| [debug] User config: [] | ||||
| [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
| [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
| [debug] youtube-dl version 2017.02.22 | ||||
| [debug] youtube-dl version 2017.04.03 | ||||
| [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
| [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
| [debug] Proxy map: {} | ||||
|   | ||||
							
								
								
									
										8
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -202,3 +202,11 @@ Fabian Stahl | ||||
| Bagira | ||||
| Odd Stråbø | ||||
| Philip Herzog | ||||
| Thomas Christlieb | ||||
| Marek Rusinowski | ||||
| Tobias Gruetzmacher | ||||
| Olivier Bilodeau | ||||
| Lars Vierbergen | ||||
| Juanjo Benages | ||||
| Xiao Di Guan | ||||
| Thomas Winant | ||||
|   | ||||
							
								
								
									
										258
									
								
								ChangeLog
									
									
									
									
									
								
							
							
						
						
									
										258
									
								
								ChangeLog
									
									
									
									
									
								
							| @@ -1,3 +1,261 @@ | ||||
| version 2017.04.03 | ||||
|  | ||||
| Core | ||||
| + [extractor/common] Add censorship check for TransTelekom ISP | ||||
| * [extractor/common] Move censorship checks to a separate method | ||||
|  | ||||
| Extractors | ||||
| + [discoveryvr] Add support for discoveryvr.com (#12578) | ||||
| + [tv5mondeplus] Add support for tv5mondeplus.com (#11386) | ||||
| + [periscope] Add support for pscp.tv URLs (#12618, #12625) | ||||
|  | ||||
|  | ||||
| version 2017.04.02 | ||||
|  | ||||
| Core | ||||
| * [YoutubeDL] Return early when extraction of url_transparent fails | ||||
|  | ||||
| Extractors | ||||
| * [rai] Fix and improve extraction (#11790) | ||||
| + [vrv] Add support for series pages | ||||
| * [limelight] Improve extraction for audio only formats | ||||
| * [funimation] Fix extraction (#10696, #11773) | ||||
| + [xfileshare] Add support for vidabc.com (#12589) | ||||
| + [xfileshare] Improve extraction and extract hls formats | ||||
| + [crunchyroll] Pass geo verifcation proxy | ||||
| + [cwtv] Extract ISM formats | ||||
| + [tvplay] Bypass geo restriction | ||||
| + [vrv] Add support for vrv.co | ||||
| + [packtpub] Add support for packtpub.com (#12610) | ||||
| + [generic] Pass base_url to _parse_jwplayer_data | ||||
| + [adn] Add support for animedigitalnetwork.fr (#4866) | ||||
| + [allocine] Extract more metadata | ||||
| * [allocine] Fix extraction (#12592) | ||||
| * [openload] Fix extraction | ||||
|  | ||||
|  | ||||
| version 2017.03.26 | ||||
|  | ||||
| Core | ||||
| * Don't raise an error if JWPlayer config data is not a Javascript object | ||||
|   literal. _find_jwplayer_data now returns a dict rather than an str. (#12307) | ||||
| * Expand environment variables for options representing paths (#12556) | ||||
| + [utils] Introduce expand_path | ||||
| * [downloader/hls] Delegate downloading to ffmpeg immediately for live streams | ||||
|  | ||||
| Extractors | ||||
| * [afreecatv] Fix extraction (#12179) | ||||
| + [atvat] Add support for atv.at (#5325) | ||||
| + [fox] Add metadata extraction (#12391) | ||||
| + [atresplayer] Extract DASH formats | ||||
| + [atresplayer] Extract HD manifest (#12548) | ||||
| * [atresplayer] Fix login error detection (#12548) | ||||
| * [franceculture] Fix extraction (#12547) | ||||
| * [youtube] Improve URL regular expression (#12538) | ||||
| * [generic] Do not follow redirects to the same URL | ||||
|  | ||||
|  | ||||
| version 2017.03.24 | ||||
|  | ||||
| Extractors | ||||
| - [9c9media] Remove mp4 URL extraction request | ||||
| + [bellmedia] Add support for etalk.ca and space.ca (#12447) | ||||
| * [channel9] Fix extraction (#11323) | ||||
| * [cloudy] Fix extraction (#12525) | ||||
| + [hbo] Add support for free episode URLs and new formats extraction (#12519) | ||||
| * [condenast] Fix extraction and style (#12526) | ||||
| * [viu] Relax URL regular expression (#12529) | ||||
|  | ||||
|  | ||||
| version 2017.03.22 | ||||
|  | ||||
| Extractors | ||||
| - [pluralsight] Omit module title from video title (#12506) | ||||
| * [pornhub] Decode obfuscated video URL (#12470, #12515) | ||||
| * [senateisvp] Allow https URL scheme for embeds (#12512) | ||||
|  | ||||
|  | ||||
| version 2017.03.20 | ||||
|  | ||||
| Core | ||||
| + [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as | ||||
|   output template | ||||
| + [adobepass] Detect and output error on authz token extraction (#12472) | ||||
|  | ||||
| Extractors | ||||
| + [bostonglobe] Add extractor for bostonglobe.com (#12099) | ||||
| + [toongoggles] Add support for toongoggles.com (#12171) | ||||
| + [medialaan] Add support for Medialaan sites (#9974, #11912) | ||||
| + [discoverynetworks] Add support for more domains and bypass geo restiction | ||||
| * [openload] Fix extraction (#10408) | ||||
|  | ||||
|  | ||||
| version 2017.03.16 | ||||
|  | ||||
| Core | ||||
| + [postprocessor/ffmpeg] Add support for flac | ||||
| + [extractor/common] Extract SMIL formats from jwplayer | ||||
|  | ||||
| Extractors | ||||
| + [generic] Add forgotten return for jwplayer formats | ||||
| * [redbulltv] Improve extraction | ||||
|  | ||||
|  | ||||
| version 2017.03.15 | ||||
|  | ||||
| Core | ||||
| * Fix missing subtitles if --add-metadata is used (#12423) | ||||
|  | ||||
| Extractors | ||||
| * [facebook] Make title optional (#12443) | ||||
| + [mitele] Add support for ooyala videos (#12430) | ||||
| * [openload] Fix extraction (#12435, #12446) | ||||
| * [streamable] Update API URL (#12433) | ||||
| + [crunchyroll] Extract season name (#12428) | ||||
| * [discoverygo] Bypass geo restriction | ||||
| + [discoverygo:playlist] Add support for playlists (#12424) | ||||
|  | ||||
|  | ||||
| version 2017.03.10 | ||||
|  | ||||
| Extractors | ||||
| * [generic] Make title optional for jwplayer embeds (#12410) | ||||
| * [wdr:maus] Fix extraction (#12373) | ||||
| * [prosiebensat1] Improve title extraction (#12318, #12327) | ||||
| * [dplayit] Separate and rewrite extractor and bypass geo restriction (#12393) | ||||
| * [miomio] Fix extraction (#12291, #12388, #12402) | ||||
| * [telequebec] Fix description extraction (#12399) | ||||
| * [openload] Fix extraction (#12357) | ||||
| * [brightcove:legacy] Relax videoPlayer validation check (#12381) | ||||
|  | ||||
|  | ||||
| version 2017.03.07 | ||||
|  | ||||
| Core | ||||
| * Metadata are now added after conversion (#5594) | ||||
|  | ||||
| Extractors | ||||
| * [soundcloud] Update client id (#12376) | ||||
| * [openload] Fix extraction (#10408, #12357) | ||||
|  | ||||
|  | ||||
| version 2017.03.06 | ||||
|  | ||||
| Core | ||||
| + [utils] Process bytestrings in urljoin (#12369) | ||||
| * [extractor/common] Improve height extraction and extract bitrate | ||||
| * [extractor/common] Move jwplayer formats extraction in separate method | ||||
| + [external:ffmpeg] Limit test download size to 10KiB (#12362) | ||||
|  | ||||
| Extractors | ||||
| + [drtv] Add geo countries to GeoRestrictedError | ||||
| + [drtv:live] Bypass geo restriction | ||||
| + [tunepk] Add extractor (#12197, #12243) | ||||
|  | ||||
|  | ||||
| version 2017.03.05 | ||||
|  | ||||
| Extractors | ||||
| + [twitch] Add basic support for two-factor authentication (#11974) | ||||
| + [vier] Add support for vijf.be (#12304) | ||||
| + [redbulltv] Add support for redbull.tv (#3919, #11948) | ||||
| * [douyutv] Switch to the PC API to escape the 5-min limitation (#12316) | ||||
| + [generic] Add support for rutube embeds | ||||
| + [rutube] Relax URL regular expression | ||||
| + [vrak] Add support for vrak.tv (#11452) | ||||
| + [brightcove:new] Add ability to smuggle geo_countries into URL | ||||
| + [brightcove:new] Raise GeoRestrictedError | ||||
| * [go] Relax URL regular expression (#12341) | ||||
| * [24video] Use original host for requests (#12339) | ||||
| * [ruutu] Disable DASH formats (#12322) | ||||
|  | ||||
|  | ||||
| version 2017.03.02 | ||||
|  | ||||
| Core | ||||
| + [adobepass] Add support for Charter Spectrum (#11465) | ||||
| * [YoutubeDL] Don't sanitize identifiers in output template (#12317) | ||||
|  | ||||
| Extractors | ||||
| * [facebook] Fix extraction (#12323, #12330) | ||||
| * [youtube] Mark errors about rental videos as expected (#12324) | ||||
| + [npo] Add support for audio | ||||
| * [npo] Adapt to app.php API (#12311, #12320) | ||||
|  | ||||
|  | ||||
| version 2017.02.28 | ||||
|  | ||||
| Core | ||||
| + [utils] Add bytes_to_long and long_to_bytes | ||||
| + [utils] Add pkcs1pad | ||||
| + [aes] Add aes_cbc_encrypt | ||||
|  | ||||
| Extractors | ||||
| + [azmedien:showplaylist] Add support for show playlists (#12160) | ||||
| + [youtube:playlist] Recognize another playlist pattern (#11928, #12286) | ||||
| + [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776, | ||||
|   #10060) | ||||
| * [douyu] Fix extraction (#12301) | ||||
|  | ||||
|  | ||||
| version 2017.02.27 | ||||
|  | ||||
| Core | ||||
| * [downloader/common] Limit displaying 2 digits after decimal point in sleep | ||||
|   interval message (#12183) | ||||
| + [extractor/common] Add preference to _parse_html5_media_entries | ||||
|  | ||||
| Extractors | ||||
| + [npo] Add support for zapp.nl | ||||
| + [npo] Add support for hetklokhuis.nl (#12293) | ||||
| - [scivee] Remove extractor (#9315) | ||||
| + [cda] Decode download URL (#12255) | ||||
| + [crunchyroll] Improve uploader extraction (#12267) | ||||
| + [youtube] Raise GeoRestrictedError | ||||
| + [dailymotion] Raise GeoRestrictedError | ||||
| + [mdr] Recognize more URL patterns (#12169) | ||||
| + [tvigle] Raise GeoRestrictedError | ||||
| * [vevo] Fix extraction for videos with the new streams/streamsV3 format | ||||
|   (#11719) | ||||
| + [freshlive] Add support for freshlive.tv (#12175) | ||||
| + [xhamster] Capture and output videoClosed error (#12263) | ||||
| + [etonline] Add support for etonline.com (#12236) | ||||
| + [njpwworld] Add support for njpwworld.com (#11561) | ||||
| * [amcnetworks] Relax URL regular expression (#12127) | ||||
|  | ||||
|  | ||||
| version 2017.02.24.1 | ||||
|  | ||||
| Extractors | ||||
| * [noco] Modernize | ||||
| * [noco] Switch login URL to https (#12246) | ||||
| + [thescene] Extract more metadata | ||||
| * [thescene] Fix extraction (#12235) | ||||
| + [tubitv] Use geo bypass mechanism | ||||
| * [openload] Fix extraction (#10408) | ||||
| + [ivi] Raise GeoRestrictedError | ||||
|  | ||||
|  | ||||
| version 2017.02.24 | ||||
|  | ||||
| Core | ||||
| * [options] Hide deprecated options from --help | ||||
| * [options] Deprecate --autonumber-size | ||||
| + [YoutubeDL] Add support for string formatting operations in output template | ||||
|   (#5185, #5748, #6841, #9929, #9966 #9978, #12189) | ||||
|  | ||||
| Extractors | ||||
| + [lynda:course] Add webpage extraction fallback (#12238) | ||||
| * [go] Sign all uplynk URLs and use geo bypass only for free videos | ||||
|   (#12087, #12210) | ||||
| + [skylinewebcams] Add support for skylinewebcams.com (#12221) | ||||
| + [instagram] Add support for multi video posts (#12226) | ||||
| + [crunchyroll] Extract playlist entries ids | ||||
| * [mgtv] Fix extraction | ||||
| + [sohu] Raise GeoRestrictedError | ||||
| + [leeco] Raise GeoRestrictedError and use geo bypass mechanism | ||||
|  | ||||
|  | ||||
| version 2017.02.22 | ||||
|  | ||||
| Extractors | ||||
|   | ||||
							
								
								
									
										161
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										161
									
								
								README.md
									
									
									
									
									
								
							| @@ -181,10 +181,10 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo | ||||
|     -R, --retries RETRIES            Number of retries (default is 10), or | ||||
|                                      "infinite". | ||||
|     --fragment-retries RETRIES       Number of retries for a fragment (default | ||||
|                                      is 10), or "infinite" (DASH and hlsnative | ||||
|                                      only) | ||||
|     --skip-unavailable-fragments     Skip unavailable fragments (DASH and | ||||
|                                      hlsnative only) | ||||
|                                      is 10), or "infinite" (DASH, hlsnative and | ||||
|                                      ISM) | ||||
|     --skip-unavailable-fragments     Skip unavailable fragments (DASH, hlsnative | ||||
|                                      and ISM) | ||||
|     --abort-on-unavailable-fragment  Abort downloading when some fragment is not | ||||
|                                      available | ||||
|     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K) | ||||
| @@ -217,21 +217,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo | ||||
|     --id                             Use only video ID in file name | ||||
|     -o, --output TEMPLATE            Output filename template, see the "OUTPUT | ||||
|                                      TEMPLATE" for all the info | ||||
|     --autonumber-size NUMBER         Specify the number of digits in | ||||
|                                      %(autonumber)s when it is present in output | ||||
|                                      filename template or --auto-number option | ||||
|                                      is given (default is 5) | ||||
|     --autonumber-start NUMBER        Specify the start value for %(autonumber)s | ||||
|                                      (default is 1) | ||||
|     --restrict-filenames             Restrict filenames to only ASCII | ||||
|                                      characters, and avoid "&" and spaces in | ||||
|                                      filenames | ||||
|     -A, --auto-number                [deprecated; use -o | ||||
|                                      "%(autonumber)s-%(title)s.%(ext)s" ] Number | ||||
|                                      downloaded files starting from 00000 | ||||
|     -t, --title                      [deprecated] Use title in file name | ||||
|                                      (default) | ||||
|     -l, --literal                    [deprecated] Alias of --title | ||||
|     -w, --no-overwrites              Do not overwrite files | ||||
|     -c, --continue                   Force resume of partially downloaded files. | ||||
|                                      By default, youtube-dl will resume | ||||
| @@ -385,8 +375,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo | ||||
|                                      (requires ffmpeg or avconv and ffprobe or | ||||
|                                      avprobe) | ||||
|     --audio-format FORMAT            Specify audio format: "best", "aac", | ||||
|                                      "vorbis", "mp3", "m4a", "opus", or "wav"; | ||||
|                                      "best" by default; No effect without -x | ||||
|                                      "flac", "mp3", "m4a", "opus", "vorbis", or | ||||
|                                      "wav"; "best" by default; No effect without | ||||
|                                      -x | ||||
|     --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert | ||||
|                                      a value between 0 (better) and 9 (worse) | ||||
|                                      for VBR or a specific bitrate like 128K | ||||
| @@ -486,87 +477,89 @@ The `-o` option allows users to indicate a template for the output file names. | ||||
|  | ||||
| **tl;dr:** [navigate me to examples](#output-template-examples). | ||||
|  | ||||
| The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are: | ||||
| The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: | ||||
|  | ||||
|  - `id`: Video identifier | ||||
|  - `title`: Video title | ||||
|  - `url`: Video URL | ||||
|  - `ext`: Video filename extension | ||||
|  - `alt_title`: A secondary title of the video | ||||
|  - `display_id`: An alternative identifier for the video | ||||
|  - `uploader`: Full name of the video uploader | ||||
|  - `license`: License name the video is licensed under | ||||
|  - `creator`: The creator of the video | ||||
|  - `release_date`: The date (YYYYMMDD) when the video was released | ||||
|  - `timestamp`: UNIX timestamp of the moment the video became available | ||||
|  - `upload_date`: Video upload date (YYYYMMDD) | ||||
|  - `uploader_id`: Nickname or id of the video uploader | ||||
|  - `location`: Physical location where the video was filmed | ||||
|  - `duration`: Length of the video in seconds | ||||
|  - `view_count`: How many users have watched the video on the platform | ||||
|  - `like_count`: Number of positive ratings of the video | ||||
|  - `dislike_count`: Number of negative ratings of the video | ||||
|  - `repost_count`: Number of reposts of the video | ||||
|  - `average_rating`: Average rating give by users, the scale used depends on the webpage | ||||
|  - `comment_count`: Number of comments on the video | ||||
|  - `age_limit`: Age restriction for the video (years) | ||||
|  - `format`: A human-readable description of the format  | ||||
|  - `format_id`: Format code specified by `--format` | ||||
|  - `format_note`: Additional info about the format | ||||
|  - `width`: Width of the video | ||||
|  - `height`: Height of the video | ||||
|  - `resolution`: Textual description of width and height | ||||
|  - `tbr`: Average bitrate of audio and video in KBit/s | ||||
|  - `abr`: Average audio bitrate in KBit/s | ||||
|  - `acodec`: Name of the audio codec in use | ||||
|  - `asr`: Audio sampling rate in Hertz | ||||
|  - `vbr`: Average video bitrate in KBit/s | ||||
|  - `fps`: Frame rate | ||||
|  - `vcodec`: Name of the video codec in use | ||||
|  - `container`: Name of the container format | ||||
|  - `filesize`: The number of bytes, if known in advance | ||||
|  - `filesize_approx`: An estimate for the number of bytes | ||||
|  - `protocol`: The protocol that will be used for the actual download | ||||
|  - `extractor`: Name of the extractor | ||||
|  - `extractor_key`: Key name of the extractor | ||||
|  - `epoch`: Unix epoch when creating the file | ||||
|  - `autonumber`: Five-digit number that will be increased with each download, starting at zero | ||||
|  - `playlist`: Name or id of the playlist that contains the video | ||||
|  - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist | ||||
|  - `playlist_id`: Playlist identifier | ||||
|  - `playlist_title`: Playlist title | ||||
|  - `id` (string): Video identifier | ||||
|  - `title` (string): Video title | ||||
|  - `url` (string): Video URL | ||||
|  - `ext` (string): Video filename extension | ||||
|  - `alt_title` (string): A secondary title of the video | ||||
|  - `display_id` (string): An alternative identifier for the video | ||||
|  - `uploader` (string): Full name of the video uploader | ||||
|  - `license` (string): License name the video is licensed under | ||||
|  - `creator` (string): The creator of the video | ||||
|  - `release_date` (string): The date (YYYYMMDD) when the video was released | ||||
|  - `timestamp` (numeric): UNIX timestamp of the moment the video became available | ||||
|  - `upload_date` (string): Video upload date (YYYYMMDD) | ||||
|  - `uploader_id` (string): Nickname or id of the video uploader | ||||
|  - `location` (string): Physical location where the video was filmed | ||||
|  - `duration` (numeric): Length of the video in seconds | ||||
|  - `view_count` (numeric): How many users have watched the video on the platform | ||||
|  - `like_count` (numeric): Number of positive ratings of the video | ||||
|  - `dislike_count` (numeric): Number of negative ratings of the video | ||||
|  - `repost_count` (numeric): Number of reposts of the video | ||||
|  - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage | ||||
|  - `comment_count` (numeric): Number of comments on the video | ||||
|  - `age_limit` (numeric): Age restriction for the video (years) | ||||
|  - `format` (string): A human-readable description of the format  | ||||
|  - `format_id` (string): Format code specified by `--format` | ||||
|  - `format_note` (string): Additional info about the format | ||||
|  - `width` (numeric): Width of the video | ||||
|  - `height` (numeric): Height of the video | ||||
|  - `resolution` (string): Textual description of width and height | ||||
|  - `tbr` (numeric): Average bitrate of audio and video in KBit/s | ||||
|  - `abr` (numeric): Average audio bitrate in KBit/s | ||||
|  - `acodec` (string): Name of the audio codec in use | ||||
|  - `asr` (numeric): Audio sampling rate in Hertz | ||||
|  - `vbr` (numeric): Average video bitrate in KBit/s | ||||
|  - `fps` (numeric): Frame rate | ||||
|  - `vcodec` (string): Name of the video codec in use | ||||
|  - `container` (string): Name of the container format | ||||
|  - `filesize` (numeric): The number of bytes, if known in advance | ||||
|  - `filesize_approx` (numeric): An estimate for the number of bytes | ||||
|  - `protocol` (string): The protocol that will be used for the actual download | ||||
|  - `extractor` (string): Name of the extractor | ||||
|  - `extractor_key` (string): Key name of the extractor | ||||
|  - `epoch` (numeric): Unix epoch when creating the file | ||||
|  - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero | ||||
|  - `playlist` (string): Name or id of the playlist that contains the video | ||||
|  - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist | ||||
|  - `playlist_id` (string): Playlist identifier | ||||
|  - `playlist_title` (string): Playlist title | ||||
|  | ||||
|  | ||||
| Available for the video that belongs to some logical chapter or section: | ||||
|  - `chapter`: Name or title of the chapter the video belongs to | ||||
|  - `chapter_number`: Number of the chapter the video belongs to | ||||
|  - `chapter_id`: Id of the chapter the video belongs to | ||||
|  - `chapter` (string): Name or title of the chapter the video belongs to | ||||
|  - `chapter_number` (numeric): Number of the chapter the video belongs to | ||||
|  - `chapter_id` (string): Id of the chapter the video belongs to | ||||
|  | ||||
| Available for the video that is an episode of some series or programme: | ||||
|  - `series`: Title of the series or programme the video episode belongs to | ||||
|  - `season`: Title of the season the video episode belongs to | ||||
|  - `season_number`: Number of the season the video episode belongs to | ||||
|  - `season_id`: Id of the season the video episode belongs to | ||||
|  - `episode`: Title of the video episode | ||||
|  - `episode_number`: Number of the video episode within a season | ||||
|  - `episode_id`: Id of the video episode | ||||
|  - `series` (string): Title of the series or programme the video episode belongs to | ||||
|  - `season` (string): Title of the season the video episode belongs to | ||||
|  - `season_number` (numeric): Number of the season the video episode belongs to | ||||
|  - `season_id` (string): Id of the season the video episode belongs to | ||||
|  - `episode` (string): Title of the video episode | ||||
|  - `episode_number` (numeric): Number of the video episode within a season | ||||
|  - `episode_id` (string): Id of the video episode | ||||
|  | ||||
| Available for the media that is a track or a part of a music album: | ||||
|  - `track`: Title of the track | ||||
|  - `track_number`: Number of the track within an album or a disc | ||||
|  - `track_id`: Id of the track | ||||
|  - `artist`: Artist(s) of the track | ||||
|  - `genre`: Genre(s) of the track | ||||
|  - `album`: Title of the album the track belongs to | ||||
|  - `album_type`: Type of the album | ||||
|  - `album_artist`: List of all artists appeared on the album | ||||
|  - `disc_number`: Number of the disc or other physical medium the track belongs to | ||||
|  - `release_year`: Year (YYYY) when the album was released | ||||
|  - `track` (string): Title of the track | ||||
|  - `track_number` (numeric): Number of the track within an album or a disc | ||||
|  - `track_id` (string): Id of the track | ||||
|  - `artist` (string): Artist(s) of the track | ||||
|  - `genre` (string): Genre(s) of the track | ||||
|  - `album` (string): Title of the album the track belongs to | ||||
|  - `album_type` (string): Type of the album | ||||
|  - `album_artist` (string): List of all artists appeared on the album | ||||
|  - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to | ||||
|  - `release_year` (numeric): Year (YYYY) when the album was released | ||||
|  | ||||
| Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`. | ||||
|  | ||||
| For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory. | ||||
|  | ||||
| For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. | ||||
|  | ||||
| Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. | ||||
|  | ||||
| To use percent literals in an output template use `%%`. To output to stdout use `-o -`. | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from __future__ import unicode_literals, print_function | ||||
|  | ||||
| from inspect import getsource | ||||
| import io | ||||
| import os | ||||
| from os.path import dirname as dirn | ||||
| import sys | ||||
| @@ -95,5 +96,5 @@ module_contents.append( | ||||
|  | ||||
| module_src = '\n'.join(module_contents) + '\n' | ||||
|  | ||||
| with open(lazy_extractors_filename, 'wt') as f: | ||||
| with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: | ||||
|     f.write(module_src) | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" | ||||
| DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists" | ||||
|  | ||||
| test_set="" | ||||
| multiprocess_args="" | ||||
|   | ||||
| @@ -28,6 +28,7 @@ | ||||
|  - **acast** | ||||
|  - **acast:channel** | ||||
|  - **AddAnime** | ||||
|  - **ADN**: Anime Digital Network | ||||
|  - **AdobeTV** | ||||
|  - **AdobeTVChannel** | ||||
|  - **AdobeTVShow** | ||||
| @@ -67,6 +68,7 @@ | ||||
|  - **arte.tv:playlist** | ||||
|  - **AtresPlayer** | ||||
|  - **ATTTechChannel** | ||||
|  - **ATVAt** | ||||
|  - **AudiMedia** | ||||
|  - **AudioBoom** | ||||
|  - **audiomack** | ||||
| @@ -78,6 +80,7 @@ | ||||
|  - **awaan:video** | ||||
|  - **AZMedien**: AZ Medien videos | ||||
|  - **AZMedienPlaylist**: AZ Medien playlists | ||||
|  - **AZMedienShowPlaylist**: AZ Medien show playlists | ||||
|  - **Azubu** | ||||
|  - **AzubuLive** | ||||
|  - **BaiduVideo**: 百度视频 | ||||
| @@ -107,6 +110,7 @@ | ||||
|  - **blinkx** | ||||
|  - **Bloomberg** | ||||
|  - **BokeCC** | ||||
|  - **BostonGlobe** | ||||
|  - **Bpb**: Bundeszentrale für politische Bildung | ||||
|  - **BR**: Bayerischer Rundfunk Mediathek | ||||
|  - **BravoTV** | ||||
| @@ -191,6 +195,8 @@ | ||||
|  - **dailymotion:playlist** | ||||
|  - **dailymotion:user** | ||||
|  - **DailymotionCloud** | ||||
|  - **Daisuki** | ||||
|  - **DaisukiPlaylist** | ||||
|  - **daum.net** | ||||
|  - **daum.net:clip** | ||||
|  - **daum.net:playlist** | ||||
| @@ -205,10 +211,14 @@ | ||||
|  - **Digiteka** | ||||
|  - **Discovery** | ||||
|  - **DiscoveryGo** | ||||
|  - **DiscoveryGoPlaylist** | ||||
|  - **DiscoveryNetworksDe** | ||||
|  - **DiscoveryVR** | ||||
|  - **Disney** | ||||
|  - **Dotsub** | ||||
|  - **DouyuTV**: 斗鱼 | ||||
|  - **DPlay** | ||||
|  - **DPlayIt** | ||||
|  - **dramafever** | ||||
|  - **dramafever:series** | ||||
|  - **DRBonanza** | ||||
| @@ -239,6 +249,7 @@ | ||||
|  - **ESPN** | ||||
|  - **ESPNArticle** | ||||
|  - **EsriVideo** | ||||
|  - **ETOnline** | ||||
|  - **Europa** | ||||
|  - **EveryonesMixtape** | ||||
|  - **ExpoTV** | ||||
| @@ -274,6 +285,7 @@ | ||||
|  - **francetvinfo.fr** | ||||
|  - **Freesound** | ||||
|  - **freespeech.org** | ||||
|  - **FreshLive** | ||||
|  - **Funimation** | ||||
|  - **FunnyOrDie** | ||||
|  - **Fusion** | ||||
| @@ -303,13 +315,14 @@ | ||||
|  - **GPUTechConf** | ||||
|  - **Groupon** | ||||
|  - **Hark** | ||||
|  - **HBO** | ||||
|  - **HBOEpisode** | ||||
|  - **hbo** | ||||
|  - **hbo:episode** | ||||
|  - **HearThisAt** | ||||
|  - **Heise** | ||||
|  - **HellPorno** | ||||
|  - **Helsinki**: helsinki.fi | ||||
|  - **HentaiStigma** | ||||
|  - **hetklokhuis** | ||||
|  - **hgtv.com:show** | ||||
|  - **HistoricFilms** | ||||
|  - **history:topic**: History.com Topic | ||||
| @@ -417,6 +430,7 @@ | ||||
|  - **MatchTV** | ||||
|  - **MDR**: MDR.DE and KiKA | ||||
|  - **media.ccc.de** | ||||
|  - **Medialaan** | ||||
|  - **Meipai**: 美拍 | ||||
|  - **MelonVOD** | ||||
|  - **META** | ||||
| @@ -511,6 +525,7 @@ | ||||
|  - **Nintendo** | ||||
|  - **njoy**: N-JOY | ||||
|  - **njoy:embed** | ||||
|  - **NJPWWorld**: 新日本プロレスワールド | ||||
|  - **NobelPrize** | ||||
|  - **Noco** | ||||
|  - **Normalboots** | ||||
| @@ -559,6 +574,8 @@ | ||||
|  - **orf:iptv**: iptv.ORF.at | ||||
|  - **orf:oe1**: Radio Österreich 1 | ||||
|  - **orf:tvthek**: ORF TVthek | ||||
|  - **PacktPub** | ||||
|  - **PacktPubCourse** | ||||
|  - **PandaTV**: 熊猫TV | ||||
|  - **pandora.tv**: 판도라TV | ||||
|  - **parliamentlive.tv**: UK parliament videos | ||||
| @@ -616,9 +633,10 @@ | ||||
|  - **radiofrance** | ||||
|  - **RadioJavan** | ||||
|  - **Rai** | ||||
|  - **RaiTV** | ||||
|  - **RaiPlay** | ||||
|  - **RBMARadio** | ||||
|  - **RDS**: RDS.ca | ||||
|  - **RedBullTV** | ||||
|  - **RedTube** | ||||
|  - **RegioTV** | ||||
|  - **RENTV** | ||||
| @@ -666,7 +684,6 @@ | ||||
|  - **savefrom.net** | ||||
|  - **SBS**: sbs.com.au | ||||
|  - **schooltv** | ||||
|  - **SciVee** | ||||
|  - **screen.yahoo:search**: Yahoo screen search | ||||
|  - **Screencast** | ||||
|  - **ScreencastOMatic** | ||||
| @@ -680,6 +697,7 @@ | ||||
|  - **Shared**: shared.sx | ||||
|  - **ShowRoomLive** | ||||
|  - **Sina** | ||||
|  - **SkylineWebcams** | ||||
|  - **skynewsarabia:article** | ||||
|  - **skynewsarabia:video** | ||||
|  - **SkySports** | ||||
| @@ -767,12 +785,12 @@ | ||||
|  - **ThisAV** | ||||
|  - **ThisOldHouse** | ||||
|  - **tinypic**: tinypic.com videos | ||||
|  - **tlc.de** | ||||
|  - **TMZ** | ||||
|  - **TMZArticle** | ||||
|  - **TNAFlix** | ||||
|  - **TNAFlixNetworkEmbed** | ||||
|  - **toggle** | ||||
|  - **ToonGoggles** | ||||
|  - **Tosh**: Tosh.0 | ||||
|  - **tou.tv** | ||||
|  - **Toypics**: Toypics user profile | ||||
| @@ -790,6 +808,7 @@ | ||||
|  - **tunein:program** | ||||
|  - **tunein:station** | ||||
|  - **tunein:topic** | ||||
|  - **TunePk** | ||||
|  - **Turbo** | ||||
|  - **Tutv** | ||||
|  - **tv.dfb.de** | ||||
| @@ -797,6 +816,7 @@ | ||||
|  - **TV2Article** | ||||
|  - **TV3** | ||||
|  - **TV4**: tv4.se and tv4play.se | ||||
|  - **TV5MondePlus**: TV5MONDE+ | ||||
|  - **TVA** | ||||
|  - **TVANouvelles** | ||||
|  - **TVANouvellesArticle** | ||||
| @@ -909,7 +929,10 @@ | ||||
|  - **VoxMedia** | ||||
|  - **Vporn** | ||||
|  - **vpro**: npo.nl and ntr.nl | ||||
|  - **Vrak** | ||||
|  - **VRT** | ||||
|  - **vrv** | ||||
|  - **vrv:series** | ||||
|  - **vube**: Vube.com | ||||
|  - **VuClip** | ||||
|  - **VVVVID** | ||||
| @@ -937,7 +960,7 @@ | ||||
|  - **WSJ**: Wall Street Journal | ||||
|  - **XBef** | ||||
|  - **XboxClips** | ||||
|  - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE | ||||
|  - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC | ||||
|  - **XHamster** | ||||
|  - **XHamsterEmbed** | ||||
|  - **xiami:album**: 虾米音乐 - 专辑 | ||||
|   | ||||
| @@ -8,7 +8,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from test.helper import FakeYDL, expect_dict | ||||
| from youtube_dl.extractor.common import InfoExtractor | ||||
| from youtube_dl.extractor import YoutubeIE, get_info_extractor | ||||
| from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError | ||||
| @@ -84,6 +84,97 @@ class TestInfoExtractor(unittest.TestCase): | ||||
|         self.assertRaises(ExtractorError, self.ie._download_json, uri, None) | ||||
|         self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) | ||||
|  | ||||
|     def test_extract_jwplayer_data_realworld(self): | ||||
|         # from http://www.suffolk.edu/sjc/ | ||||
|         expect_dict( | ||||
|             self, | ||||
|             self.ie._extract_jwplayer_data(r''' | ||||
|                 <script type='text/javascript'> | ||||
|                     jwplayer('my-video').setup({ | ||||
|                         file: 'rtmp://192.138.214.154/live/sjclive', | ||||
|                         fallback: 'true', | ||||
|                         width: '95%', | ||||
|                       aspectratio: '16:9', | ||||
|                       primary: 'flash', | ||||
|                       mediaid:'XEgvuql4' | ||||
|                     }); | ||||
|                 </script> | ||||
|                 ''', None, require_title=False), | ||||
|             { | ||||
|                 'id': 'XEgvuql4', | ||||
|                 'formats': [{ | ||||
|                     'url': 'rtmp://192.138.214.154/live/sjclive', | ||||
|                     'ext': 'flv' | ||||
|                 }] | ||||
|             }) | ||||
|  | ||||
|         # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ | ||||
|         expect_dict( | ||||
|             self, | ||||
|             self.ie._extract_jwplayer_data(r''' | ||||
| <script type="text/javascript"> | ||||
|     jwplayer("mediaplayer").setup({ | ||||
|         'videoid': "7564", | ||||
|         'width': "100%", | ||||
|         'aspectratio': "16:9", | ||||
|         'stretching': "exactfit", | ||||
|         'autostart': 'false', | ||||
|         'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf", | ||||
|         'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv", | ||||
|         'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg", | ||||
|         'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4", | ||||
|         'logo.hide': true, | ||||
|         'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip", | ||||
|         'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf", | ||||
|         'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370", | ||||
|         'controlbar': 'bottom', | ||||
|         'modes': [ | ||||
|             {type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'} | ||||
|         ], | ||||
|         'provider': 'http' | ||||
|     }); | ||||
|     //noinspection JSAnnotator | ||||
|     invideo.setup({ | ||||
|         adsUrl: "/banner-iframe/?zoneId=32", | ||||
|         adsUrl2: "", | ||||
|         autostart: false | ||||
|     }); | ||||
| </script> | ||||
|             ''', 'dummy', require_title=False), | ||||
|             { | ||||
|                 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', | ||||
|                 'formats': [{ | ||||
|                     'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', | ||||
|                     'ext': 'flv' | ||||
|                 }] | ||||
|             }) | ||||
|  | ||||
|         # from http://www.indiedb.com/games/king-machine/videos | ||||
|         expect_dict( | ||||
|             self, | ||||
|             self.ie._extract_jwplayer_data(r''' | ||||
| <script> | ||||
| jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) { | ||||
|             videoAnalytics("play"); | ||||
| }).once("complete", function(event) { | ||||
|     videoAnalytics("completed"); | ||||
| }); | ||||
| </script> | ||||
|                 ''', 'dummy'), | ||||
|             { | ||||
|                 'title': 'king machine trailer 1', | ||||
|                 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg', | ||||
|                 'formats': [{ | ||||
|                     'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', | ||||
|                     'height': 360, | ||||
|                     'ext': 'mp4' | ||||
|                 }, { | ||||
|                     'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', | ||||
|                     'height': 720, | ||||
|                     'ext': 'mp4' | ||||
|                 }] | ||||
|             }) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -526,6 +526,7 @@ class TestYoutubeDL(unittest.TestCase): | ||||
|             'id': '1234', | ||||
|             'ext': 'mp4', | ||||
|             'width': None, | ||||
|             'height': 1080, | ||||
|         } | ||||
|  | ||||
|         def fname(templ): | ||||
| @@ -535,6 +536,19 @@ class TestYoutubeDL(unittest.TestCase): | ||||
|         self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') | ||||
|         # Replace missing fields with 'NA' | ||||
|         self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') | ||||
|         self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4') | ||||
|         self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4') | ||||
|         self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4') | ||||
|         self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4') | ||||
|         self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') | ||||
|         self.assertEqual(fname('%(height)   06d.%(ext)s'), ' 01080.mp4') | ||||
|         self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') | ||||
|         self.assertEqual(fname('%(height)0   6d.%(ext)s'), ' 01080.mp4') | ||||
|         self.assertEqual(fname('%(height)   0   6d.%(ext)s'), ' 01080.mp4') | ||||
|         self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') | ||||
|         self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') | ||||
|         self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') | ||||
|         self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') | ||||
|  | ||||
|     def test_format_note(self): | ||||
|         ydl = YoutubeDL() | ||||
|   | ||||
| @@ -8,7 +8,7 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text | ||||
| from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text | ||||
| from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes | ||||
| import base64 | ||||
|  | ||||
| @@ -34,6 +34,13 @@ class TestAES(unittest.TestCase): | ||||
|         decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) | ||||
|         self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) | ||||
|  | ||||
|     def test_cbc_encrypt(self): | ||||
|         data = bytes_to_intlist(self.secret_msg) | ||||
|         encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) | ||||
|         self.assertEqual( | ||||
|             encrypted, | ||||
|             b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd") | ||||
|  | ||||
|     def test_decrypt_text(self): | ||||
|         password = intlist_to_bytes(self.key).decode('utf-8') | ||||
|         encrypted = base64.b64encode( | ||||
|   | ||||
| @@ -27,11 +27,11 @@ from youtube_dl.compat import ( | ||||
| class TestCompat(unittest.TestCase): | ||||
|     def test_compat_getenv(self): | ||||
|         test_str = 'тест' | ||||
|         compat_setenv('YOUTUBE-DL-TEST', test_str) | ||||
|         self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) | ||||
|         compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str) | ||||
|         self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str) | ||||
|  | ||||
|     def test_compat_setenv(self): | ||||
|         test_var = 'YOUTUBE-DL-TEST' | ||||
|         test_var = 'YOUTUBE_DL_COMPAT_SETENV' | ||||
|         test_str = 'тест' | ||||
|         compat_setenv(test_var, test_str) | ||||
|         compat_getenv(test_var) | ||||
|   | ||||
| @@ -71,6 +71,18 @@ class TestDownload(unittest.TestCase): | ||||
|  | ||||
|     maxDiff = None | ||||
|  | ||||
|     def __str__(self): | ||||
|         """Identify each test with the `add_ie` attribute, if available.""" | ||||
|  | ||||
|         def strclass(cls): | ||||
|             """From 2.7's unittest; 2.6 had _strclass so we can't import it.""" | ||||
|             return '%s.%s' % (cls.__module__, cls.__name__) | ||||
|  | ||||
|         add_ie = getattr(self, self._testMethodName).add_ie | ||||
|         return '%s (%s)%s:' % (self._testMethodName, | ||||
|                                strclass(self.__class__), | ||||
|                                ' [%s]' % add_ie if add_ie else '') | ||||
|  | ||||
|     def setUp(self): | ||||
|         self.defs = defs | ||||
|  | ||||
| @@ -233,6 +245,8 @@ for n, test_case in enumerate(defs): | ||||
|         i += 1 | ||||
|     test_method = generator(test_case, tname) | ||||
|     test_method.__name__ = str(tname) | ||||
|     ie_list = test_case.get('add_ie') | ||||
|     test_method.add_ie = ie_list and ','.join(ie_list) | ||||
|     setattr(TestDownload, test_method.__name__, test_method) | ||||
|     del test_method | ||||
|  | ||||
|   | ||||
| @@ -52,9 +52,11 @@ from youtube_dl.utils import ( | ||||
|     parse_filesize, | ||||
|     parse_count, | ||||
|     parse_iso8601, | ||||
|     pkcs1pad, | ||||
|     read_batch_urls, | ||||
|     sanitize_filename, | ||||
|     sanitize_path, | ||||
|     expand_path, | ||||
|     prepend_extension, | ||||
|     replace_extension, | ||||
|     remove_start, | ||||
| @@ -94,6 +96,8 @@ from youtube_dl.utils import ( | ||||
| from youtube_dl.compat import ( | ||||
|     compat_chr, | ||||
|     compat_etree_fromstring, | ||||
|     compat_getenv, | ||||
|     compat_setenv, | ||||
|     compat_urlparse, | ||||
|     compat_parse_qs, | ||||
| ) | ||||
| @@ -213,6 +217,18 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(sanitize_path('./abc'), 'abc') | ||||
|         self.assertEqual(sanitize_path('./../abc'), '..\\abc') | ||||
|  | ||||
|     def test_expand_path(self): | ||||
|         def env(var): | ||||
|             return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) | ||||
|  | ||||
|         compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded') | ||||
|         self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded') | ||||
|         self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) | ||||
|         self.assertEqual(expand_path('~'), compat_getenv('HOME')) | ||||
|         self.assertEqual( | ||||
|             expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')), | ||||
|             '%s/expanded' % compat_getenv('HOME')) | ||||
|  | ||||
|     def test_prepend_extension(self): | ||||
|         self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') | ||||
|         self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') | ||||
| @@ -454,6 +470,9 @@ class TestUtil(unittest.TestCase): | ||||
|  | ||||
|     def test_urljoin(self): | ||||
|         self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') | ||||
|         self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') | ||||
|         self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') | ||||
|         self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') | ||||
|         self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') | ||||
|         self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') | ||||
|         self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') | ||||
| @@ -1104,6 +1123,14 @@ The first line | ||||
|             ohdave_rsa_encrypt(b'aa111222', e, N), | ||||
|             '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') | ||||
|  | ||||
|     def test_pkcs1pad(self): | ||||
|         data = [1, 2, 3] | ||||
|         padded_data = pkcs1pad(data, 32) | ||||
|         self.assertEqual(padded_data[:2], [0, 2]) | ||||
|         self.assertEqual(padded_data[28:], [0, 1, 2, 3]) | ||||
|  | ||||
|         self.assertRaises(ValueError, pkcs1pad, data, 8) | ||||
|  | ||||
|     def test_encode_base_n(self): | ||||
|         self.assertEqual(encode_base_n(0, 30), '0') | ||||
|         self.assertEqual(encode_base_n(80, 30), '2k') | ||||
|   | ||||
| @@ -29,10 +29,10 @@ import random | ||||
| from .compat import ( | ||||
|     compat_basestring, | ||||
|     compat_cookiejar, | ||||
|     compat_expanduser, | ||||
|     compat_get_terminal_size, | ||||
|     compat_http_client, | ||||
|     compat_kwargs, | ||||
|     compat_numeric_types, | ||||
|     compat_os_name, | ||||
|     compat_str, | ||||
|     compat_tokenize_tokenize, | ||||
| @@ -53,6 +53,7 @@ from .utils import ( | ||||
|     encode_compat_str, | ||||
|     encodeFilename, | ||||
|     error_to_compat_str, | ||||
|     expand_path, | ||||
|     ExtractorError, | ||||
|     format_bytes, | ||||
|     formatSeconds, | ||||
| @@ -327,11 +328,21 @@ class YoutubeDL(object): | ||||
|         self.params.update(params) | ||||
|         self.cache = Cache(self) | ||||
|  | ||||
|         if self.params.get('cn_verification_proxy') is not None: | ||||
|             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.') | ||||
|         def check_deprecated(param, option, suggestion): | ||||
|             if self.params.get(param) is not None: | ||||
|                 self.report_warning( | ||||
|                     '%s is deprecated. Use %s instead.' % (option, suggestion)) | ||||
|                 return True | ||||
|             return False | ||||
|  | ||||
|         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): | ||||
|             if self.params.get('geo_verification_proxy') is None: | ||||
|                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] | ||||
|  | ||||
|         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') | ||||
|         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') | ||||
|         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') | ||||
|  | ||||
|         if params.get('bidi_workaround', False): | ||||
|             try: | ||||
|                 import pty | ||||
| @@ -593,10 +604,7 @@ class YoutubeDL(object): | ||||
|             autonumber_size = self.params.get('autonumber_size') | ||||
|             if autonumber_size is None: | ||||
|                 autonumber_size = 5 | ||||
|             autonumber_templ = '%0' + str(autonumber_size) + 'd' | ||||
|             template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads) | ||||
|             if template_dict.get('playlist_index') is not None: | ||||
|                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index']) | ||||
|             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads | ||||
|             if template_dict.get('resolution') is None: | ||||
|                 if template_dict.get('width') and template_dict.get('height'): | ||||
|                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) | ||||
| @@ -608,14 +616,63 @@ class YoutubeDL(object): | ||||
|             sanitize = lambda k, v: sanitize_filename( | ||||
|                 compat_str(v), | ||||
|                 restricted=self.params.get('restrictfilenames'), | ||||
|                 is_id=(k == 'id')) | ||||
|             template_dict = dict((k, sanitize(k, v)) | ||||
|                 is_id=(k == 'id' or k.endswith('_id'))) | ||||
|             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) | ||||
|                                  for k, v in template_dict.items() | ||||
|                                  if v is not None and not isinstance(v, (list, tuple, dict))) | ||||
|             template_dict = collections.defaultdict(lambda: 'NA', template_dict) | ||||
|  | ||||
|             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|             tmpl = compat_expanduser(outtmpl) | ||||
|  | ||||
|             # For fields playlist_index and autonumber convert all occurrences | ||||
|             # of %(field)s to %(field)0Nd for backward compatibility | ||||
|             field_size_compat_map = { | ||||
|                 'playlist_index': len(str(template_dict['n_entries'])), | ||||
|                 'autonumber': autonumber_size, | ||||
|             } | ||||
|             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s' | ||||
|             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl) | ||||
|             if mobj: | ||||
|                 outtmpl = re.sub( | ||||
|                     FIELD_SIZE_COMPAT_RE, | ||||
|                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], | ||||
|                     outtmpl) | ||||
|  | ||||
|             NUMERIC_FIELDS = set(( | ||||
|                 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', | ||||
|                 'upload_year', 'upload_month', 'upload_day', | ||||
|                 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', | ||||
|                 'average_rating', 'comment_count', 'age_limit', | ||||
|                 'start_time', 'end_time', | ||||
|                 'chapter_number', 'season_number', 'episode_number', | ||||
|                 'track_number', 'disc_number', 'release_year', | ||||
|                 'playlist_index', | ||||
|             )) | ||||
|  | ||||
|             # Missing numeric fields used together with integer presentation types | ||||
|             # in format specification will break the argument substitution since | ||||
|             # string 'NA' is returned for missing fields. We will patch output | ||||
|             # template for missing fields to meet string presentation type. | ||||
|             for numeric_field in NUMERIC_FIELDS: | ||||
|                 if numeric_field not in template_dict: | ||||
|                     # As of [1] format syntax is: | ||||
|                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type | ||||
|                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting | ||||
|                     FORMAT_RE = r'''(?x) | ||||
|                         (?<!%) | ||||
|                         % | ||||
|                         \({0}\)  # mapping key | ||||
|                         (?:[#0\-+ ]+)?  # conversion flags (optional) | ||||
|                         (?:\d+)?  # minimum field width (optional) | ||||
|                         (?:\.\d+)?  # precision (optional) | ||||
|                         [hlL]?  # length modifier (optional) | ||||
|                         [diouxXeEfFgGcrs%]  # conversion type | ||||
|                     ''' | ||||
|                     outtmpl = re.sub( | ||||
|                         FORMAT_RE.format(numeric_field), | ||||
|                         r'%({0})s'.format(numeric_field), outtmpl) | ||||
|  | ||||
|             tmpl = expand_path(outtmpl) | ||||
|             filename = tmpl % template_dict | ||||
|             # Temporary fix for #4787 | ||||
|             # 'Treat' all problem characters by passing filename through preferredencoding | ||||
| @@ -780,6 +837,12 @@ class YoutubeDL(object): | ||||
|                 ie_result['url'], ie_key=ie_result.get('ie_key'), | ||||
|                 extra_info=extra_info, download=False, process=False) | ||||
|  | ||||
|             # extract_info may return None when ignoreerrors is enabled and | ||||
|             # extraction failed with an error, don't crash and return early | ||||
|             # in this case | ||||
|             if not info: | ||||
|                 return info | ||||
|  | ||||
|             force_properties = dict( | ||||
|                 (k, v) for k, v in ie_result.items() if v is not None) | ||||
|             for f in ('_type', 'url', 'ie_key'): | ||||
| @@ -1815,6 +1878,7 @@ class YoutubeDL(object): | ||||
|         """Download a given list of URLs.""" | ||||
|         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|         if (len(url_list) > 1 and | ||||
|                 outtmpl != '-' and | ||||
|                 '%' not in outtmpl and | ||||
|                 self.params.get('max_downloads') != 1): | ||||
|             raise SameFileError(outtmpl) | ||||
| @@ -2112,7 +2176,7 @@ class YoutubeDL(object): | ||||
|         if opts_cookiefile is None: | ||||
|             self.cookiejar = compat_cookiejar.CookieJar() | ||||
|         else: | ||||
|             opts_cookiefile = compat_expanduser(opts_cookiefile) | ||||
|             opts_cookiefile = expand_path(opts_cookiefile) | ||||
|             self.cookiejar = compat_cookiejar.MozillaCookieJar( | ||||
|                 opts_cookiefile) | ||||
|             if os.access(opts_cookiefile, os.R_OK): | ||||
|   | ||||
| @@ -16,7 +16,6 @@ from .options import ( | ||||
|     parseOpts, | ||||
| ) | ||||
| from .compat import ( | ||||
|     compat_expanduser, | ||||
|     compat_getpass, | ||||
|     compat_shlex_split, | ||||
|     workaround_optparse_bug9161, | ||||
| @@ -26,6 +25,7 @@ from .utils import ( | ||||
|     decodeOption, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     DownloadError, | ||||
|     expand_path, | ||||
|     match_filter_func, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
| @@ -88,7 +88,7 @@ def _real_main(argv=None): | ||||
|                 batchfd = sys.stdin | ||||
|             else: | ||||
|                 batchfd = io.open( | ||||
|                     compat_expanduser(opts.batchfile), | ||||
|                     expand_path(opts.batchfile), | ||||
|                     'r', encoding='utf-8', errors='ignore') | ||||
|             batch_urls = read_batch_urls(batchfd) | ||||
|             if opts.verbose: | ||||
| @@ -196,7 +196,7 @@ def _real_main(argv=None): | ||||
|     if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: | ||||
|         raise ValueError('Playlist end must be greater than playlist start') | ||||
|     if opts.extractaudio: | ||||
|         if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: | ||||
|         if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: | ||||
|             parser.error('invalid audio format specified') | ||||
|     if opts.audioquality: | ||||
|         opts.audioquality = opts.audioquality.strip('k').strip('K') | ||||
| @@ -238,18 +238,15 @@ def _real_main(argv=None): | ||||
|  | ||||
|     any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json | ||||
|     any_printing = opts.print_json | ||||
|     download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive | ||||
|     download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive | ||||
|  | ||||
|     # PostProcessors | ||||
|     postprocessors = [] | ||||
|     # Add the metadata pp first, the other pps will copy it | ||||
|     if opts.metafromtitle: | ||||
|         postprocessors.append({ | ||||
|             'key': 'MetadataFromTitle', | ||||
|             'titleformat': opts.metafromtitle | ||||
|         }) | ||||
|     if opts.addmetadata: | ||||
|         postprocessors.append({'key': 'FFmpegMetadata'}) | ||||
|     if opts.extractaudio: | ||||
|         postprocessors.append({ | ||||
|             'key': 'FFmpegExtractAudio', | ||||
| @@ -262,6 +259,16 @@ def _real_main(argv=None): | ||||
|             'key': 'FFmpegVideoConvertor', | ||||
|             'preferedformat': opts.recodevideo, | ||||
|         }) | ||||
|     # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and | ||||
|     # FFmpegExtractAudioPP as containers before conversion may not support | ||||
|     # metadata (3gp, webm, etc.) | ||||
|     # And this post-processor should be placed before other metadata | ||||
|     # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of | ||||
|     # extra metadata. By default ffmpeg preserves metadata applicable for both | ||||
|     # source and target containers. From this point the container won't change, | ||||
|     # so metadata can be added here. | ||||
|     if opts.addmetadata: | ||||
|         postprocessors.append({'key': 'FFmpegMetadata'}) | ||||
|     if opts.convertsubtitles: | ||||
|         postprocessors.append({ | ||||
|             'key': 'FFmpegSubtitlesConvertor', | ||||
| @@ -416,6 +423,9 @@ def _real_main(argv=None): | ||||
|         'config_location': opts.config_location, | ||||
|         'geo_bypass': opts.geo_bypass, | ||||
|         'geo_bypass_country': opts.geo_bypass_country, | ||||
|         # just for deprecation check | ||||
|         'autonumber': opts.autonumber if opts.autonumber is True else None, | ||||
|         'usetitle': opts.usetitle if opts.usetitle is True else None, | ||||
|     } | ||||
|  | ||||
|     with YoutubeDL(ydl_opts) as ydl: | ||||
| @@ -439,7 +449,7 @@ def _real_main(argv=None): | ||||
|  | ||||
|         try: | ||||
|             if opts.load_info_filename is not None: | ||||
|                 retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename)) | ||||
|                 retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) | ||||
|             else: | ||||
|                 retcode = ydl.download(all_urls) | ||||
|         except MaxDownloadsReached: | ||||
|   | ||||
| @@ -60,6 +60,34 @@ def aes_cbc_decrypt(data, key, iv): | ||||
|     return decrypted_data | ||||
|  | ||||
|  | ||||
| def aes_cbc_encrypt(data, key, iv): | ||||
|     """ | ||||
|     Encrypt with aes in CBC mode. Using PKCS#7 padding | ||||
|  | ||||
|     @param {int[]} data        cleartext | ||||
|     @param {int[]} key         16/24/32-Byte cipher key | ||||
|     @param {int[]} iv          16-Byte IV | ||||
|     @returns {int[]}           encrypted data | ||||
|     """ | ||||
|     expanded_key = key_expansion(key) | ||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) | ||||
|  | ||||
|     encrypted_data = [] | ||||
|     previous_cipher_block = iv | ||||
|     for i in range(block_count): | ||||
|         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] | ||||
|         remaining_length = BLOCK_SIZE_BYTES - len(block) | ||||
|         block += [remaining_length] * remaining_length | ||||
|         mixed_block = xor(block, previous_cipher_block) | ||||
|  | ||||
|         encrypted_block = aes_encrypt(mixed_block, expanded_key) | ||||
|         encrypted_data += encrypted_block | ||||
|  | ||||
|         previous_cipher_block = encrypted_block | ||||
|  | ||||
|     return encrypted_data | ||||
|  | ||||
|  | ||||
| def key_expansion(data): | ||||
|     """ | ||||
|     Generate key schedule | ||||
|   | ||||
| @@ -8,8 +8,11 @@ import re | ||||
| import shutil | ||||
| import traceback | ||||
|  | ||||
| from .compat import compat_expanduser, compat_getenv | ||||
| from .utils import write_json_file | ||||
| from .compat import compat_getenv | ||||
| from .utils import ( | ||||
|     expand_path, | ||||
|     write_json_file, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Cache(object): | ||||
| @@ -21,7 +24,7 @@ class Cache(object): | ||||
|         if res is None: | ||||
|             cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') | ||||
|             res = os.path.join(cache_root, 'youtube-dl') | ||||
|         return compat_expanduser(res) | ||||
|         return expand_path(res) | ||||
|  | ||||
|     def _get_cache_fn(self, section, key, dtype): | ||||
|         assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ | ||||
|   | ||||
| @@ -2760,6 +2760,12 @@ else: | ||||
|     compat_kwargs = lambda kwargs: kwargs | ||||
|  | ||||
|  | ||||
| try: | ||||
|     compat_numeric_types = (int, float, long, complex) | ||||
| except NameError:  # Python 3 | ||||
|     compat_numeric_types = (int, float, complex) | ||||
|  | ||||
|  | ||||
| if sys.version_info < (2, 7): | ||||
|     def compat_socket_create_connection(address, timeout, source_address=None): | ||||
|         host, port = address | ||||
| @@ -2895,6 +2901,7 @@ __all__ = [ | ||||
|     'compat_input', | ||||
|     'compat_itertools_count', | ||||
|     'compat_kwargs', | ||||
|     'compat_numeric_types', | ||||
|     'compat_ord', | ||||
|     'compat_os_name', | ||||
|     'compat_parse_qs', | ||||
|   | ||||
| @@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}): | ||||
|         if ed.can_download(info_dict): | ||||
|             return ed | ||||
|  | ||||
|     if protocol.startswith('m3u8') and info_dict.get('is_live'): | ||||
|         return FFmpegFD | ||||
|  | ||||
|     if protocol == 'm3u8' and params.get('hls_prefer_native') is True: | ||||
|         return HlsFD | ||||
|  | ||||
|   | ||||
| @@ -347,7 +347,10 @@ class FileDownloader(object): | ||||
|         if min_sleep_interval: | ||||
|             max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) | ||||
|             sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) | ||||
|             self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) | ||||
|             self.to_screen( | ||||
|                 '[download] Sleeping %s seconds...' % ( | ||||
|                     int(sleep_interval) if sleep_interval.is_integer() | ||||
|                     else '%.2f' % sleep_interval)) | ||||
|             time.sleep(sleep_interval) | ||||
|  | ||||
|         return self.real_download(filename, info_dict) | ||||
|   | ||||
| @@ -6,7 +6,10 @@ import sys | ||||
| import re | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..compat import compat_setenv | ||||
| from ..compat import ( | ||||
|     compat_setenv, | ||||
|     compat_str, | ||||
| ) | ||||
| from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS | ||||
| from ..utils import ( | ||||
|     cli_option, | ||||
| @@ -270,6 +273,10 @@ class FFmpegFD(ExternalFD): | ||||
|                 args += ['-rtmp_live', 'live'] | ||||
|  | ||||
|         args += ['-i', url, '-c', 'copy'] | ||||
|  | ||||
|         if self.params.get('test', False): | ||||
|             args += ['-fs', compat_str(self._TEST_FILE_SIZE)] | ||||
|  | ||||
|         if protocol in ('m3u8', 'm3u8_native'): | ||||
|             if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': | ||||
|                 args += ['-f', 'mpegts'] | ||||
|   | ||||
| @@ -25,7 +25,8 @@ class AddAnimeIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'description': 'One Piece 606', | ||||
|             'title': 'One Piece 606', | ||||
|         } | ||||
|         }, | ||||
|         'skip': 'Video is gone', | ||||
|     }, { | ||||
|         'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', | ||||
|         'only_matching': True, | ||||
|   | ||||
							
								
								
									
										136
									
								
								youtube_dl/extractor/adn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								youtube_dl/extractor/adn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import json | ||||
| import os | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..aes import aes_cbc_decrypt | ||||
| from ..compat import compat_ord | ||||
| from ..utils import ( | ||||
|     bytes_to_intlist, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     intlist_to_bytes, | ||||
|     srt_subtitles_timecode, | ||||
|     strip_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ADNIE(InfoExtractor): | ||||
|     IE_DESC = 'Anime Digital Network' | ||||
|     _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', | ||||
|         'md5': 'e497370d847fd79d9d4c74be55575c7a', | ||||
|         'info_dict': { | ||||
|             'id': '7778', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Blue Exorcist - Kyôto Saga - Épisode 1', | ||||
|             'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _get_subtitles(self, sub_path, video_id): | ||||
|         if not sub_path: | ||||
|             return None | ||||
|  | ||||
|         enc_subtitles = self._download_webpage( | ||||
|             'http://animedigitalnetwork.fr/' + sub_path, | ||||
|             video_id, fatal=False) | ||||
|         if not enc_subtitles: | ||||
|             return None | ||||
|  | ||||
|         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js | ||||
|         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( | ||||
|             bytes_to_intlist(base64.b64decode(enc_subtitles[24:])), | ||||
|             bytes_to_intlist(b'\xb5@\xcfq\xa3\x98"N\xe4\xf3\x12\x98}}\x16\xd8'), | ||||
|             bytes_to_intlist(base64.b64decode(enc_subtitles[:24])) | ||||
|         )) | ||||
|         subtitles_json = self._parse_json( | ||||
|             dec_subtitles[:-compat_ord(dec_subtitles[-1])], | ||||
|             None, fatal=False) | ||||
|         if not subtitles_json: | ||||
|             return None | ||||
|  | ||||
|         subtitles = {} | ||||
|         for sub_lang, sub in subtitles_json.items(): | ||||
|             srt = '' | ||||
|             for num, current in enumerate(sub): | ||||
|                 start, end, text = ( | ||||
|                     float_or_none(current.get('startTime')), | ||||
|                     float_or_none(current.get('endTime')), | ||||
|                     current.get('text')) | ||||
|                 if start is None or end is None or text is None: | ||||
|                     continue | ||||
|                 srt += os.linesep.join( | ||||
|                     ( | ||||
|                         '%d' % num, | ||||
|                         '%s --> %s' % ( | ||||
|                             srt_subtitles_timecode(start), | ||||
|                             srt_subtitles_timecode(end)), | ||||
|                         text, | ||||
|                         os.linesep, | ||||
|                     )) | ||||
|  | ||||
|             if sub_lang == 'vostf': | ||||
|                 sub_lang = 'fr' | ||||
|             subtitles.setdefault(sub_lang, []).extend([{ | ||||
|                 'ext': 'json', | ||||
|                 'data': json.dumps(sub), | ||||
|             }, { | ||||
|                 'ext': 'srt', | ||||
|                 'data': srt, | ||||
|             }]) | ||||
|         return subtitles | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         player_config = self._parse_json(self._search_regex( | ||||
|             r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id) | ||||
|  | ||||
|         video_info = {} | ||||
|         video_info_str = self._search_regex( | ||||
|             r'videoInfo\s*=\s*({.+});', webpage, | ||||
|             'video info', fatal=False) | ||||
|         if video_info_str: | ||||
|             video_info = self._parse_json( | ||||
|                 video_info_str, video_id, fatal=False) or {} | ||||
|  | ||||
|         options = player_config.get('options') or {} | ||||
|         metas = options.get('metas') or {} | ||||
|         title = metas.get('title') or video_info['title'] | ||||
|         links = player_config.get('links') or {} | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, qualities in links.items(): | ||||
|             for load_balancer_url in qualities.values(): | ||||
|                 load_balancer_data = self._download_json( | ||||
|                     load_balancer_url, video_id, fatal=False) or {} | ||||
|                 m3u8_url = load_balancer_data.get('location') | ||||
|                 if not m3u8_url: | ||||
|                     continue | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     m3u8_url, video_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id=format_id, fatal=False) | ||||
|                 if format_id == 'vf': | ||||
|                     for f in m3u8_formats: | ||||
|                         f['language'] = 'fr' | ||||
|                 formats.extend(m3u8_formats) | ||||
|         error = options.get('error') | ||||
|         if not formats and error: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': strip_or_none(metas.get('summary') or video_info.get('resume')), | ||||
|             'thumbnail': video_info.get('image'), | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id), | ||||
|             'episode': metas.get('subtitle') or video_info.get('videoTitle'), | ||||
|             'series': video_info.get('playlistTitle'), | ||||
|         } | ||||
| @@ -36,6 +36,11 @@ MSO_INFO = { | ||||
|         'username_field': 'Ecom_User_ID', | ||||
|         'password_field': 'Ecom_Password', | ||||
|     }, | ||||
|     'Charter_Direct': { | ||||
|         'name': 'Charter Spectrum', | ||||
|         'username_field': 'IDToken1', | ||||
|         'password_field': 'IDToken2', | ||||
|     }, | ||||
|     'thr030': { | ||||
|         'name': '3 Rivers Communications' | ||||
|     }, | ||||
| @@ -1453,6 +1458,8 @@ class AdobePassIE(InfoExtractor): | ||||
|                     self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) | ||||
|                     count += 1 | ||||
|                     continue | ||||
|                 if '<error' in authorize: | ||||
|                     raise ExtractorError(xml_text(authorize, 'details'), expected=True) | ||||
|                 authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) | ||||
|                 requestor_info[guid] = authz_token | ||||
|                 self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) | ||||
|   | ||||
| @@ -4,15 +4,10 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..compat import compat_xpath | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     update_url_query, | ||||
|     xpath_element, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
| @@ -43,7 +38,8 @@ class AfreecaTVIE(InfoExtractor): | ||||
|             'uploader': 'dailyapril', | ||||
|             'uploader_id': 'dailyapril', | ||||
|             'upload_date': '20160503', | ||||
|         } | ||||
|         }, | ||||
|         'skip': 'Video is gone', | ||||
|     }, { | ||||
|         'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', | ||||
|         'info_dict': { | ||||
| @@ -71,6 +67,19 @@ class AfreecaTVIE(InfoExtractor): | ||||
|                 'upload_date': '20160502', | ||||
|             }, | ||||
|         }], | ||||
|         'skip': 'Video is gone', | ||||
|     }, { | ||||
|         'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793', | ||||
|         'info_dict': { | ||||
|             'id': '18650793', | ||||
|             'ext': 'flv', | ||||
|             'uploader': '윈아디', | ||||
|             'uploader_id': 'badkids', | ||||
|             'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires rtmpdump | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', | ||||
|         'only_matching': True, | ||||
| @@ -90,40 +99,33 @@ class AfreecaTVIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         parsed_url = compat_urllib_parse_urlparse(url) | ||||
|         info_url = compat_urlparse.urlunparse(parsed_url._replace( | ||||
|             netloc='afbbs.afreecatv.com:8080', | ||||
|             path='/api/video/get_video_info.php')) | ||||
|  | ||||
|         video_xml = self._download_xml( | ||||
|             update_url_query(info_url, {'nTitleNo': video_id}), video_id) | ||||
|             'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', | ||||
|             video_id, query={'nTitleNo': video_id}) | ||||
|  | ||||
|         if xpath_element(video_xml, './track/video/file') is None: | ||||
|         video_element = video_xml.findall(compat_xpath('./track/video'))[1] | ||||
|         if video_element is None or video_element.text is None: | ||||
|             raise ExtractorError('Specified AfreecaTV video does not exist', | ||||
|                                  expected=True) | ||||
|  | ||||
|         title = xpath_text(video_xml, './track/title', 'title') | ||||
|         video_url_raw = video_element.text | ||||
|  | ||||
|         app, playpath = video_url_raw.split('mp4:') | ||||
|  | ||||
|         title = xpath_text(video_xml, './track/title', 'title', fatal=True) | ||||
|         uploader = xpath_text(video_xml, './track/nickname', 'uploader') | ||||
|         uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') | ||||
|         duration = int_or_none(xpath_text(video_xml, './track/duration', | ||||
|                                           'duration')) | ||||
|         thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') | ||||
|  | ||||
|         entries = [] | ||||
|         for i, video_file in enumerate(video_xml.findall('./track/video/file')): | ||||
|             video_key = self.parse_video_key(video_file.get('key', '')) | ||||
|             if not video_key: | ||||
|                 continue | ||||
|             entries.append({ | ||||
|                 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), | ||||
|                 'title': title, | ||||
|                 'upload_date': video_key.get('upload_date'), | ||||
|                 'duration': int_or_none(video_file.get('duration')), | ||||
|                 'url': video_file.text, | ||||
|             }) | ||||
|  | ||||
|         info = { | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': app, | ||||
|             'ext': 'flv', | ||||
|             'play_path': 'mp4:' + playpath, | ||||
|             'rtmp_live': True,  # downloading won't end without this | ||||
|             'title': title, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
| @@ -131,20 +133,6 @@ class AfreecaTVIE(InfoExtractor): | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|         if len(entries) > 1: | ||||
|             info['_type'] = 'multi_video' | ||||
|             info['entries'] = entries | ||||
|         elif len(entries) == 1: | ||||
|             info['url'] = entries[0]['url'] | ||||
|             info['upload_date'] = entries[0].get('upload_date') | ||||
|         else: | ||||
|             raise ExtractorError( | ||||
|                 'No files found for the specified AfreecaTV video, either' | ||||
|                 ' the URL is incorrect or the video has been made private.', | ||||
|                 expected=True) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class AfreecaTVGlobalIE(AfreecaTVIE): | ||||
|     IE_NAME = 'afreecatv:global' | ||||
|   | ||||
| @@ -2,9 +2,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     remove_end, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     remove_end, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
| @@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor): | ||||
|             'title': 'Astérix - Le Domaine des Dieux Teaser VF', | ||||
|             'description': 'md5:4a754271d9c6f16c72629a8a993ee884', | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'duration': 39, | ||||
|             'timestamp': 1404273600, | ||||
|             'upload_date': '20140702', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', | ||||
| @@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor): | ||||
|             'title': 'Planes 2 Bande-annonce VF', | ||||
|             'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'duration': 69, | ||||
|             'timestamp': 1385659800, | ||||
|             'upload_date': '20131128', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', | ||||
| @@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor): | ||||
|             'title': 'Dragons 2 - Bande annonce finale VF', | ||||
|             'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'duration': 144, | ||||
|             'timestamp': 1397589900, | ||||
|             'upload_date': '20140415', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.allocine.fr/video/video-19550147/', | ||||
| @@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor): | ||||
|             r'data-model="([^"]+)"', webpage, 'data model', default=None) | ||||
|         if model: | ||||
|             model_data = self._parse_json(model, display_id) | ||||
|  | ||||
|             for video_url in model_data['sources'].values(): | ||||
|             video = model_data['videos'][0] | ||||
|             title = video['title'] | ||||
|             for video_url in video['sources'].values(): | ||||
|                 video_id, format_id = url_basename(video_url).split('_')[:2] | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                     'url': video_url, | ||||
|                 }) | ||||
|  | ||||
|             title = model_data['title'] | ||||
|             duration = int_or_none(video.get('duration')) | ||||
|             view_count = int_or_none(video.get('view_count')) | ||||
|             timestamp = unified_timestamp(try_get( | ||||
|                 video, lambda x: x['added_at']['date'], compat_str)) | ||||
|         else: | ||||
|             video_id = display_id | ||||
|             media_data = self._download_json( | ||||
|                 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) | ||||
|             title = remove_end( | ||||
|                 self._html_search_regex( | ||||
|                     r'(?s)<title>(.+?)</title>', webpage, 'title').strip(), | ||||
|                 ' - AlloCiné') | ||||
|             for key, value in media_data['video'].items(): | ||||
|                 if not key.endswith('Path'): | ||||
|                     continue | ||||
|  | ||||
|                 format_id = key[:-len('Path')] | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                     'url': value, | ||||
|                 }) | ||||
|  | ||||
|             title = remove_end(self._html_search_regex( | ||||
|                 r'(?s)<title>(.+?)</title>', webpage, 'title' | ||||
|             ).strip(), ' - AlloCiné') | ||||
|             duration, view_count, timestamp = [None] * 3 | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class AMCNetworksIE(ThePlatformIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', | ||||
|         'md5': '', | ||||
| @@ -44,6 +44,12 @@ class AMCNetworksIE(ThePlatformIE): | ||||
|     }, { | ||||
|         'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor): | ||||
|                 exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None)) | ||||
|                 if kind == 'm3u8' or 'm3u8' in exts: | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         f_url, video_id, 'mp4', | ||||
|                         entry_protocol='m3u8' if is_live else 'm3u8_native', | ||||
|                         f_url, video_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id=kind, fatal=False, live=is_live)) | ||||
|                 elif kind == 'flash' or 'f4m' in exts: | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|   | ||||
| @@ -90,7 +90,8 @@ class AtresPlayerIE(InfoExtractor): | ||||
|             request, None, 'Logging in as %s' % username) | ||||
|  | ||||
|         error = self._html_search_regex( | ||||
|             r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None) | ||||
|             r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>', | ||||
|             response, 'error', default=None) | ||||
|         if error: | ||||
|             raise ExtractorError( | ||||
|                 'Unable to login: %s' % error, expected=True) | ||||
| @@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor): | ||||
|             if format_id == 'token' or not video_url.startswith('http'): | ||||
|                 continue | ||||
|             if 'geodeswowsmpra3player' in video_url: | ||||
|                 f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] | ||||
|                 f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) | ||||
|                 # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] | ||||
|                 # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) | ||||
|                 # this videos are protected by DRM, the f4m downloader doesn't support them | ||||
|                 continue | ||||
|             else: | ||||
|                 f4m_url = video_url[:-9] + '/manifest.f4m' | ||||
|             formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) | ||||
|             video_url_hd = video_url.replace('free_es', 'es') | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', | ||||
|                 fatal=False)) | ||||
|             formats.extend(self._extract_mpd_formats( | ||||
|                 video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', | ||||
|                 fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         path_data = player.get('pathData') | ||||
|   | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/atvat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/atvat.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ATVAtIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', | ||||
|         'md5': 'c3b6b975fb3150fc628572939df205f2', | ||||
|         'info_dict': { | ||||
|             'id': '1698447', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_data = self._parse_json(unescapeHTML(self._search_regex( | ||||
|             r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"', | ||||
|             webpage, 'player data')), display_id)['config']['initial_video'] | ||||
|  | ||||
|         video_id = video_data['id'] | ||||
|         video_title = video_data['title'] | ||||
|  | ||||
|         parts = [] | ||||
|         for part in video_data.get('parts', []): | ||||
|             part_id = part['id'] | ||||
|             part_title = part['title'] | ||||
|  | ||||
|             formats = [] | ||||
|             for source in part.get('sources', []): | ||||
|                 source_url = source.get('src') | ||||
|                 if not source_url: | ||||
|                     continue | ||||
|                 ext = determine_ext(source_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         source_url, part_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls', fatal=False)) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'format_id': source.get('delivery'), | ||||
|                         'url': source_url, | ||||
|                     }) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             parts.append({ | ||||
|                 'id': part_id, | ||||
|                 'title': part_title, | ||||
|                 'thumbnail': part.get('preview_image_url'), | ||||
|                 'duration': int_or_none(part.get('duration')), | ||||
|                 'is_live': part.get('is_livestream'), | ||||
|                 'formats': formats, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'multi_video', | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'entries': parts, | ||||
|         } | ||||
| @@ -1,3 +1,4 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| @@ -5,6 +6,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from .kaltura import KalturaIE | ||||
| from ..utils import ( | ||||
|     get_element_by_class, | ||||
|     get_element_by_id, | ||||
|     strip_or_none, | ||||
|     urljoin, | ||||
| @@ -170,3 +172,42 @@ class AZMedienPlaylistIE(AZMedienBaseIE): | ||||
|                 'video-title', webpage)), group='title') | ||||
|  | ||||
|         return self.playlist_result(entries, show_id, title) | ||||
|  | ||||
|  | ||||
| class AZMedienShowPlaylistIE(AZMedienBaseIE): | ||||
|     IE_DESC = 'AZ Medien show playlists' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:www\.)? | ||||
|                         (?: | ||||
|                             telezueri\.ch| | ||||
|                             telebaern\.tv| | ||||
|                             telem1\.ch | ||||
|                         )/ | ||||
|                         (?: | ||||
|                             all-episodes| | ||||
|                             alle-episoden | ||||
|                         )/ | ||||
|                         (?P<id>[^/?#&]+) | ||||
|                     ''' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.telezueri.ch/all-episodes/astrotalk', | ||||
|         'info_dict': { | ||||
|             'id': 'astrotalk', | ||||
|             'title': 'TeleZüri: AstroTalk - alle episoden', | ||||
|             'description': 'md5:4c0f7e7d741d906004266e295ceb4a26', | ||||
|         }, | ||||
|         'playlist_mincount': 13, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         episodes = get_element_by_class('search-mobile-box', webpage) | ||||
|         entries = [self.url_result( | ||||
|             urljoin(url, m.group('url'))) for m in re.finditer( | ||||
|                 r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)] | ||||
|         title = self._og_search_title(webpage, fatal=False) | ||||
|         description = self._og_search_description(webpage) | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
|   | ||||
| @@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor): | ||||
|                 animalplanet| | ||||
|                 bravo| | ||||
|                 mtv| | ||||
|                 space | ||||
|                 space| | ||||
|                 etalk | ||||
|             )\.ca| | ||||
|             much\.com | ||||
|         )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})''' | ||||
|         )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ctv.ca/video/player?vid=706966', | ||||
|         'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', | ||||
| @@ -58,6 +59,9 @@ class BellMediaIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.etalk.ca/video?videoid=663455', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _DOMAINS = { | ||||
|         'thecomedynetwork': 'comedy', | ||||
| @@ -65,6 +69,7 @@ class BellMediaIE(InfoExtractor): | ||||
|         'sciencechannel': 'discsci', | ||||
|         'investigationdiscovery': 'invdisc', | ||||
|         'animalplanet': 'aniplan', | ||||
|         'etalk': 'ctv', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										72
									
								
								youtube_dl/extractor/bostonglobe.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								youtube_dl/extractor/bostonglobe.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BostonGlobeIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html', | ||||
|             'md5': '0a62181079c85c2d2b618c9a738aedaf', | ||||
|             'info_dict': { | ||||
|                 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood', | ||||
|                 'id': '5320421710001', | ||||
|                 'ext': 'mp4', | ||||
|                 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.', | ||||
|                 'timestamp': 1486877593, | ||||
|                 'upload_date': '20170212', | ||||
|                 'uploader_id': '245991542', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # Embedded youtube video; we hand it off to the Generic extractor. | ||||
|             'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html', | ||||
|             'md5': '582b40327089d5c0c949b3c54b13c24b', | ||||
|             'info_dict': { | ||||
|                 'title': "Who Is Matt Damon's Favorite Batman?", | ||||
|                 'id': 'ZW1QCnlA6Qc', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20170217', | ||||
|                 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb', | ||||
|                 'uploader': 'The Late Late Show with James Corden', | ||||
|                 'uploader_id': 'TheLateLateShow', | ||||
|             }, | ||||
|             'expected_warnings': ['404'], | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|  | ||||
|         page_title = self._og_search_title(webpage, default=None) | ||||
|  | ||||
|         # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject"> | ||||
|         entries = [] | ||||
|         for video in re.findall(r'(?i)(<video[^>]+>)', webpage): | ||||
|             attrs = extract_attributes(video) | ||||
|  | ||||
|             video_id = attrs.get('data-brightcove-video-id') | ||||
|             account_id = attrs.get('data-account') | ||||
|             player_id = attrs.get('data-player') | ||||
|             embed = attrs.get('data-embed') | ||||
|  | ||||
|             if video_id and account_id and player_id and embed: | ||||
|                 entries.append( | ||||
|                     'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' | ||||
|                     % (account_id, player_id, embed, video_id)) | ||||
|  | ||||
|         if len(entries) == 0: | ||||
|             return self.url_result(url, 'Generic') | ||||
|         elif len(entries) == 1: | ||||
|             return self.url_result(entries[0], 'BrightcoveNew') | ||||
|         else: | ||||
|             return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew') | ||||
| @@ -193,7 +193,13 @@ class BrightcoveLegacyIE(InfoExtractor): | ||||
|         if videoPlayer is not None: | ||||
|             if isinstance(videoPlayer, list): | ||||
|                 videoPlayer = videoPlayer[0] | ||||
|             if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')): | ||||
|             videoPlayer = videoPlayer.strip() | ||||
|             # UUID is also possible for videoPlayer (e.g. | ||||
|             # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd | ||||
|             # or http://www8.hp.com/cn/zh/home.html) | ||||
|             if not (re.match( | ||||
|                     r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', | ||||
|                     videoPlayer) or videoPlayer.startswith('ref:')): | ||||
|                 return None | ||||
|             params['@videoPlayer'] = videoPlayer | ||||
|         linkBase = find_param('linkBaseURL') | ||||
| @@ -515,6 +521,9 @@ class BrightcoveNewIE(InfoExtractor): | ||||
|         return entries | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass(smuggled_data.get('geo_countries')) | ||||
|  | ||||
|         account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
| @@ -544,8 +553,10 @@ class BrightcoveNewIE(InfoExtractor): | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 json_data = self._parse_json(e.cause.read().decode(), video_id)[0] | ||||
|                 raise ExtractorError( | ||||
|                     json_data.get('message') or json_data['error_code'], expected=True) | ||||
|                 message = json_data.get('message') or json_data['error_code'] | ||||
|                 if json_data.get('error_subcode') == 'CLIENT_GEO': | ||||
|                     self.raise_geo_restricted(msg=message) | ||||
|                 raise ExtractorError(message, expected=True) | ||||
|             raise | ||||
|  | ||||
|         title = json_data['name'].strip() | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import codecs | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -96,6 +97,10 @@ class CDAIE(InfoExtractor): | ||||
|             if not video or 'file' not in video: | ||||
|                 self.report_warning('Unable to extract %s version information' % version) | ||||
|                 return | ||||
|             if video['file'].startswith('uggc'): | ||||
|                 video['file'] = codecs.decode(video['file'], 'rot_13') | ||||
|                 if video['file'].endswith('adc.mp4'): | ||||
|                     video['file'] = video['file'].replace('adc.mp4', '.mp4') | ||||
|             f = { | ||||
|                 'url': video['file'], | ||||
|             } | ||||
|   | ||||
| @@ -160,8 +160,7 @@ class CeskaTelevizeIE(InfoExtractor): | ||||
|                 for format_id, stream_url in item.get('streamUrls', {}).items(): | ||||
|                     if 'playerType=flash' in stream_url: | ||||
|                         stream_formats = self._extract_m3u8_formats( | ||||
|                             stream_url, playlist_id, 'mp4', | ||||
|                             entry_protocol='m3u8' if is_live else 'm3u8_native', | ||||
|                             stream_url, playlist_id, 'mp4', 'm3u8_native', | ||||
|                             m3u8_id='hls-%s' % format_id, fatal=False) | ||||
|                     else: | ||||
|                         stream_formats = self._extract_mpd_formats( | ||||
|   | ||||
| @@ -4,62 +4,62 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     parse_filesize, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     qualities, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Channel9IE(InfoExtractor): | ||||
|     ''' | ||||
|     Common extractor for channel9.msdn.com. | ||||
|  | ||||
|     The type of provided URL (video or playlist) is determined according to | ||||
|     meta Search.PageType from web page HTML rather than URL itself, as it is | ||||
|     not always possible to do. | ||||
|     ''' | ||||
|     IE_DESC = 'Channel 9' | ||||
|     IE_NAME = 'channel9' | ||||
|     _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', | ||||
|         'md5': 'bbd75296ba47916b754e73c3a4bbdf10', | ||||
|         'md5': '32083d4eaf1946db6d454313f44510ca', | ||||
|         'info_dict': { | ||||
|             'id': 'Events/TechEd/Australia/2013/KOS002', | ||||
|             'ext': 'mp4', | ||||
|             'id': '6c413323-383a-49dc-88f9-a22800cab024', | ||||
|             'ext': 'wmv', | ||||
|             'title': 'Developer Kick-Off Session: Stuff We Love', | ||||
|             'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', | ||||
|             'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731', | ||||
|             'duration': 4576, | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'thumbnail': r're:https?://.*\.jpg', | ||||
|             'timestamp': 1377717420, | ||||
|             'upload_date': '20130828', | ||||
|             'session_code': 'KOS002', | ||||
|             'session_day': 'Day 1', | ||||
|             'session_room': 'Arena 1A', | ||||
|             'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', | ||||
|                                  'Mads Kristensen'], | ||||
|             'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'], | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', | ||||
|         'md5': 'b43ee4529d111bc37ba7ee4f34813e68', | ||||
|         'md5': 'dcf983ee6acd2088e7188c3cf79b46bc', | ||||
|         'info_dict': { | ||||
|             'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', | ||||
|             'ext': 'mp4', | ||||
|             'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024', | ||||
|             'ext': 'wmv', | ||||
|             'title': 'Self-service BI with Power BI - nuclear testing', | ||||
|             'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', | ||||
|             'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54', | ||||
|             'duration': 1540, | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'thumbnail': r're:https?://.*\.jpg', | ||||
|             'timestamp': 1386381991, | ||||
|             'upload_date': '20131207', | ||||
|             'authors': ['Mike Wilmot'], | ||||
|         }, | ||||
|     }, { | ||||
|         # low quality mp4 is best | ||||
|         'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', | ||||
|         'info_dict': { | ||||
|             'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', | ||||
|             'id': '33ad69d2-6a4e-4172-83a1-a523013dec76', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ranges for the Standard Library', | ||||
|             'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', | ||||
|             'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372', | ||||
|             'duration': 5646, | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'thumbnail': r're:https?://.*\.jpg', | ||||
|             'upload_date': '20150930', | ||||
|             'timestamp': 1443640735, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -70,7 +70,7 @@ class Channel9IE(InfoExtractor): | ||||
|             'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b', | ||||
|             'title': 'Channel 9', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|         'playlist_mincount': 100, | ||||
|     }, { | ||||
|         'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', | ||||
|         'only_matching': True, | ||||
| @@ -81,189 +81,6 @@ class Channel9IE(InfoExtractor): | ||||
|  | ||||
|     _RSS_URL = 'http://channel9.msdn.com/%s/RSS' | ||||
|  | ||||
|     def _formats_from_html(self, html): | ||||
|         FORMAT_REGEX = r''' | ||||
|             (?x) | ||||
|             <a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s* | ||||
|             <span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s* | ||||
|             (?:<div\s+class="popup\s+rounded">\s* | ||||
|             <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s* | ||||
|             </div>)?                                                # File size part may be missing | ||||
|         ''' | ||||
|         quality = qualities(( | ||||
|             'MP3', 'MP4', | ||||
|             'Low Quality WMV', 'Low Quality MP4', | ||||
|             'Mid Quality WMV', 'Mid Quality MP4', | ||||
|             'High Quality WMV', 'High Quality MP4')) | ||||
|         formats = [{ | ||||
|             'url': x.group('url'), | ||||
|             'format_id': x.group('quality'), | ||||
|             'format_note': x.group('note'), | ||||
|             'format': '%s (%s)' % (x.group('quality'), x.group('note')), | ||||
|             'filesize_approx': parse_filesize(x.group('filesize')), | ||||
|             'quality': quality(x.group('quality')), | ||||
|             'vcodec': 'none' if x.group('note') == 'Audio only' else None, | ||||
|         } for x in list(re.finditer(FORMAT_REGEX, html))] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return formats | ||||
|  | ||||
|     def _extract_title(self, html): | ||||
|         title = self._html_search_meta('title', html, 'title') | ||||
|         if title is None: | ||||
|             title = self._og_search_title(html) | ||||
|             TITLE_SUFFIX = ' (Channel 9)' | ||||
|             if title is not None and title.endswith(TITLE_SUFFIX): | ||||
|                 title = title[:-len(TITLE_SUFFIX)] | ||||
|         return title | ||||
|  | ||||
|     def _extract_description(self, html): | ||||
|         DESCRIPTION_REGEX = r'''(?sx) | ||||
|             <div\s+class="entry-content">\s* | ||||
|             <div\s+id="entry-body">\s* | ||||
|             (?P<description>.+?)\s* | ||||
|             </div>\s* | ||||
|             </div> | ||||
|         ''' | ||||
|         m = re.search(DESCRIPTION_REGEX, html) | ||||
|         if m is not None: | ||||
|             return m.group('description') | ||||
|         return self._html_search_meta('description', html, 'description') | ||||
|  | ||||
|     def _extract_duration(self, html): | ||||
|         m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html) | ||||
|         return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None | ||||
|  | ||||
|     def _extract_slides(self, html): | ||||
|         m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html) | ||||
|         return m.group('slidesurl') if m is not None else None | ||||
|  | ||||
|     def _extract_zip(self, html): | ||||
|         m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html) | ||||
|         return m.group('zipurl') if m is not None else None | ||||
|  | ||||
|     def _extract_avg_rating(self, html): | ||||
|         m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html) | ||||
|         return float(m.group('avgrating')) if m is not None else 0 | ||||
|  | ||||
|     def _extract_rating_count(self, html): | ||||
|         m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html) | ||||
|         return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0 | ||||
|  | ||||
|     def _extract_view_count(self, html): | ||||
|         m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html) | ||||
|         return int(self._fix_count(m.group('viewcount'))) if m is not None else 0 | ||||
|  | ||||
|     def _extract_comment_count(self, html): | ||||
|         m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html) | ||||
|         return int(self._fix_count(m.group('commentcount'))) if m is not None else 0 | ||||
|  | ||||
|     def _fix_count(self, count): | ||||
|         return int(str(count).replace(',', '')) if count is not None else None | ||||
|  | ||||
|     def _extract_authors(self, html): | ||||
|         m = re.search(r'(?s)<li class="author">(.*?)</li>', html) | ||||
|         if m is None: | ||||
|             return None | ||||
|         return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1)) | ||||
|  | ||||
|     def _extract_session_code(self, html): | ||||
|         m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html) | ||||
|         return m.group('code') if m is not None else None | ||||
|  | ||||
|     def _extract_session_day(self, html): | ||||
|         m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html) | ||||
|         return m.group('day').strip() if m is not None else None | ||||
|  | ||||
|     def _extract_session_room(self, html): | ||||
|         m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html) | ||||
|         return m.group('room') if m is not None else None | ||||
|  | ||||
|     def _extract_session_speakers(self, html): | ||||
|         return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html) | ||||
|  | ||||
|     def _extract_content(self, html, content_path): | ||||
|         # Look for downloadable content | ||||
|         formats = self._formats_from_html(html) | ||||
|         slides = self._extract_slides(html) | ||||
|         zip_ = self._extract_zip(html) | ||||
|  | ||||
|         # Nothing to download | ||||
|         if len(formats) == 0 and slides is None and zip_ is None: | ||||
|             self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path) | ||||
|             return | ||||
|  | ||||
|         # Extract meta | ||||
|         title = self._extract_title(html) | ||||
|         description = self._extract_description(html) | ||||
|         thumbnail = self._og_search_thumbnail(html) | ||||
|         duration = self._extract_duration(html) | ||||
|         avg_rating = self._extract_avg_rating(html) | ||||
|         rating_count = self._extract_rating_count(html) | ||||
|         view_count = self._extract_view_count(html) | ||||
|         comment_count = self._extract_comment_count(html) | ||||
|  | ||||
|         common = { | ||||
|             '_type': 'video', | ||||
|             'id': content_path, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'avg_rating': avg_rating, | ||||
|             'rating_count': rating_count, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|  | ||||
|         result = [] | ||||
|  | ||||
|         if slides is not None: | ||||
|             d = common.copy() | ||||
|             d.update({'title': title + '-Slides', 'url': slides}) | ||||
|             result.append(d) | ||||
|  | ||||
|         if zip_ is not None: | ||||
|             d = common.copy() | ||||
|             d.update({'title': title + '-Zip', 'url': zip_}) | ||||
|             result.append(d) | ||||
|  | ||||
|         if len(formats) > 0: | ||||
|             d = common.copy() | ||||
|             d.update({'title': title, 'formats': formats}) | ||||
|             result.append(d) | ||||
|  | ||||
|         return result | ||||
|  | ||||
|     def _extract_entry_item(self, html, content_path): | ||||
|         contents = self._extract_content(html, content_path) | ||||
|         if contents is None: | ||||
|             return contents | ||||
|  | ||||
|         if len(contents) > 1: | ||||
|             raise ExtractorError('Got more than one entry') | ||||
|         result = contents[0] | ||||
|         result['authors'] = self._extract_authors(html) | ||||
|  | ||||
|         return result | ||||
|  | ||||
|     def _extract_session(self, html, content_path): | ||||
|         contents = self._extract_content(html, content_path) | ||||
|         if contents is None: | ||||
|             return contents | ||||
|  | ||||
|         session_meta = { | ||||
|             'session_code': self._extract_session_code(html), | ||||
|             'session_day': self._extract_session_day(html), | ||||
|             'session_room': self._extract_session_room(html), | ||||
|             'session_speakers': self._extract_session_speakers(html), | ||||
|         } | ||||
|  | ||||
|         for content in contents: | ||||
|             content.update(session_meta) | ||||
|  | ||||
|         return self.playlist_result(contents) | ||||
|  | ||||
|     def _extract_list(self, video_id, rss_url=None): | ||||
|         if not rss_url: | ||||
|             rss_url = self._RSS_URL % video_id | ||||
| @@ -274,9 +91,7 @@ class Channel9IE(InfoExtractor): | ||||
|         return self.playlist_result(entries, video_id, title_text) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         content_path = mobj.group('contentpath') | ||||
|         rss = mobj.group('rss') | ||||
|         content_path, rss = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         if rss: | ||||
|             return self._extract_list(content_path, url) | ||||
| @@ -284,17 +99,158 @@ class Channel9IE(InfoExtractor): | ||||
|         webpage = self._download_webpage( | ||||
|             url, content_path, 'Downloading web page') | ||||
|  | ||||
|         page_type = self._search_regex( | ||||
|             r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2', | ||||
|             webpage, 'page type', default=None, group='pagetype') | ||||
|         if page_type: | ||||
|             if page_type == 'Entry':      # Any 'item'-like page, may contain downloadable content | ||||
|                 return self._extract_entry_item(webpage, content_path) | ||||
|             elif page_type == 'Session':  # Event session page, may contain downloadable content | ||||
|                 return self._extract_session(webpage, content_path) | ||||
|             elif page_type == 'Event': | ||||
|                 return self._extract_list(content_path) | ||||
|         episode_data = self._search_regex( | ||||
|             r"data-episode='([^']+)'", webpage, 'episode data', default=None) | ||||
|         if episode_data: | ||||
|             episode_data = self._parse_json(unescapeHTML( | ||||
|                 episode_data), content_path) | ||||
|             content_id = episode_data['contentId'] | ||||
|             is_session = '/Sessions(' in episode_data['api'] | ||||
|             content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] | ||||
|             if is_session: | ||||
|                 content_url += '?$expand=Speakers' | ||||
|             else: | ||||
|                 raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True) | ||||
|         else:  # Assuming list | ||||
|                 content_url += '?$expand=Authors' | ||||
|             content_data = self._download_json(content_url, content_id) | ||||
|             title = content_data['Title'] | ||||
|  | ||||
|             QUALITIES = ( | ||||
|                 'mp3', | ||||
|                 'wmv', 'mp4', | ||||
|                 'wmv-low', 'mp4-low', | ||||
|                 'wmv-mid', 'mp4-mid', | ||||
|                 'wmv-high', 'mp4-high', | ||||
|             ) | ||||
|  | ||||
|             quality_key = qualities(QUALITIES) | ||||
|  | ||||
|             def quality(quality_id, format_url): | ||||
|                 return (len(QUALITIES) if '_Source.' in format_url | ||||
|                         else quality_key(quality_id)) | ||||
|  | ||||
|             formats = [] | ||||
|             urls = set() | ||||
|  | ||||
|             SITE_QUALITIES = { | ||||
|                 'MP3': 'mp3', | ||||
|                 'MP4': 'mp4', | ||||
|                 'Low Quality WMV': 'wmv-low', | ||||
|                 'Low Quality MP4': 'mp4-low', | ||||
|                 'Mid Quality WMV': 'wmv-mid', | ||||
|                 'Mid Quality MP4': 'mp4-mid', | ||||
|                 'High Quality WMV': 'wmv-high', | ||||
|                 'High Quality MP4': 'mp4-high', | ||||
|             } | ||||
|  | ||||
|             formats_select = self._search_regex( | ||||
|                 r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage, | ||||
|                 'formats select', default=None) | ||||
|             if formats_select: | ||||
|                 for mobj in re.finditer( | ||||
|                         r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<', | ||||
|                         formats_select): | ||||
|                     format_url = mobj.group('url') | ||||
|                     if format_url in urls: | ||||
|                         continue | ||||
|                     urls.add(format_url) | ||||
|                     format_id = mobj.group('format') | ||||
|                     quality_id = SITE_QUALITIES.get(format_id, format_id) | ||||
|                     formats.append({ | ||||
|                         'url': format_url, | ||||
|                         'format_id': quality_id, | ||||
|                         'quality': quality(quality_id, format_url), | ||||
|                         'vcodec': 'none' if quality_id == 'mp3' else None, | ||||
|                     }) | ||||
|  | ||||
|             API_QUALITIES = { | ||||
|                 'VideoMP4Low': 'mp4-low', | ||||
|                 'VideoWMV': 'wmv-mid', | ||||
|                 'VideoMP4Medium': 'mp4-mid', | ||||
|                 'VideoMP4High': 'mp4-high', | ||||
|                 'VideoWMVHQ': 'wmv-hq', | ||||
|             } | ||||
|  | ||||
|             for format_id, q in API_QUALITIES.items(): | ||||
|                 q_url = content_data.get(format_id) | ||||
|                 if not q_url or q_url in urls: | ||||
|                     continue | ||||
|                 urls.add(q_url) | ||||
|                 formats.append({ | ||||
|                     'url': q_url, | ||||
|                     'format_id': q, | ||||
|                     'quality': quality(q, q_url), | ||||
|                 }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             slides = content_data.get('Slides') | ||||
|             zip_file = content_data.get('ZipFile') | ||||
|  | ||||
|             if not formats and not slides and not zip_file: | ||||
|                 raise ExtractorError( | ||||
|                     'None of recording, slides or zip are available for %s' % content_path) | ||||
|  | ||||
|             subtitles = {} | ||||
|             for caption in content_data.get('Captions', []): | ||||
|                 caption_url = caption.get('Url') | ||||
|                 if not caption_url: | ||||
|                     continue | ||||
|                 subtitles.setdefault(caption.get('Language', 'en'), []).append({ | ||||
|                     'url': caption_url, | ||||
|                     'ext': 'vtt', | ||||
|                 }) | ||||
|  | ||||
|             common = { | ||||
|                 'id': content_id, | ||||
|                 'title': title, | ||||
|                 'description': clean_html(content_data.get('Description') or content_data.get('Body')), | ||||
|                 'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'), | ||||
|                 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), | ||||
|                 'timestamp': parse_iso8601(content_data.get('PublishedDate')), | ||||
|                 'avg_rating': int_or_none(content_data.get('Rating')), | ||||
|                 'rating_count': int_or_none(content_data.get('RatingCount')), | ||||
|                 'view_count': int_or_none(content_data.get('Views')), | ||||
|                 'comment_count': int_or_none(content_data.get('CommentCount')), | ||||
|                 'subtitles': subtitles, | ||||
|             } | ||||
|             if is_session: | ||||
|                 speakers = [] | ||||
|                 for s in content_data.get('Speakers', []): | ||||
|                     speaker_name = s.get('FullName') | ||||
|                     if not speaker_name: | ||||
|                         continue | ||||
|                     speakers.append(speaker_name) | ||||
|  | ||||
|                 common.update({ | ||||
|                     'session_code': content_data.get('Code'), | ||||
|                     'session_room': content_data.get('Room'), | ||||
|                     'session_speakers': speakers, | ||||
|                 }) | ||||
|             else: | ||||
|                 authors = [] | ||||
|                 for a in content_data.get('Authors', []): | ||||
|                     author_name = a.get('DisplayName') | ||||
|                     if not author_name: | ||||
|                         continue | ||||
|                     authors.append(author_name) | ||||
|                 common['authors'] = authors | ||||
|  | ||||
|             contents = [] | ||||
|  | ||||
|             if slides: | ||||
|                 d = common.copy() | ||||
|                 d.update({'title': title + '-Slides', 'url': slides}) | ||||
|                 contents.append(d) | ||||
|  | ||||
|             if zip_file: | ||||
|                 d = common.copy() | ||||
|                 d.update({'title': title + '-Zip', 'url': zip_file}) | ||||
|                 contents.append(d) | ||||
|  | ||||
|             if formats: | ||||
|                 d = common.copy() | ||||
|                 d.update({'title': title, 'formats': formats}) | ||||
|                 contents.append(d) | ||||
|             return self.playlist_result(contents) | ||||
|         else: | ||||
|             return self._extract_list(content_path) | ||||
|   | ||||
| @@ -1,97 +1,56 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_HTTPError, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     HEADRequest, | ||||
|     remove_end, | ||||
|     str_to_int, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CloudyIE(InfoExtractor): | ||||
|     _IE_DESC = 'cloudy.ec' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?:www\.)?cloudy\.ec/ | ||||
|         (?:v/|embed\.php\?id=) | ||||
|         (?P<id>[A-Za-z0-9]+) | ||||
|         ''' | ||||
|     _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s' | ||||
|     _API_URL = 'http://www.cloudy.ec/api/player.api.php' | ||||
|     _MAX_TRIES = 2 | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.cloudy.ec/v/af511e2527aac', | ||||
|         'md5': '5cb253ace826a42f35b4740539bedf07', | ||||
|         'md5': '29832b05028ead1b58be86bf319397ca', | ||||
|         'info_dict': { | ||||
|             'id': 'af511e2527aac', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Funny Cats and Animals Compilation june 2013', | ||||
|             'upload_date': '20130913', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _extract_video(self, video_id, file_key, error_url=None, try_num=0): | ||||
|  | ||||
|         if try_num > self._MAX_TRIES - 1: | ||||
|             raise ExtractorError('Unable to extract video URL', expected=True) | ||||
|  | ||||
|         form = { | ||||
|             'file': video_id, | ||||
|             'key': file_key, | ||||
|         } | ||||
|  | ||||
|         if error_url: | ||||
|             form.update({ | ||||
|                 'numOfErrors': try_num, | ||||
|                 'errorCode': '404', | ||||
|                 'errorUrl': error_url, | ||||
|             }) | ||||
|  | ||||
|         player_data = self._download_webpage( | ||||
|             self._API_URL, video_id, 'Downloading player data', query=form) | ||||
|         data = compat_parse_qs(player_data) | ||||
|  | ||||
|         try_num += 1 | ||||
|  | ||||
|         if 'error' in data: | ||||
|             raise ExtractorError( | ||||
|                 '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), | ||||
|                 expected=True) | ||||
|  | ||||
|         title = data.get('title', [None])[0] | ||||
|         if title: | ||||
|             title = remove_end(title, '&asdasdas').strip() | ||||
|  | ||||
|         video_url = data.get('url', [None])[0] | ||||
|  | ||||
|         if video_url: | ||||
|             try: | ||||
|                 self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL') | ||||
|             except ExtractorError as e: | ||||
|                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: | ||||
|                     self.report_warning('Invalid video URL, requesting another', video_id) | ||||
|                     return self._extract_video(video_id, file_key, video_url, try_num) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         url = self._EMBED_URL % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         webpage = self._download_webpage( | ||||
|             'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id) | ||||
|  | ||||
|         file_key = self._search_regex( | ||||
|             [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'], | ||||
|             webpage, 'file_key') | ||||
|         info = self._parse_html5_media_entries(url, webpage, video_id)[0] | ||||
|  | ||||
|         return self._extract_video(video_id, file_key) | ||||
|         webpage = self._download_webpage( | ||||
|             'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False) | ||||
|  | ||||
|         if webpage: | ||||
|             info.update({ | ||||
|                 'title': self._search_regex( | ||||
|                     r'<h\d[^>]*>([^<]+)<', webpage, 'title'), | ||||
|                 'upload_date': unified_strdate(self._search_regex( | ||||
|                     r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage, | ||||
|                     'upload date', fatal=False)), | ||||
|                 'view_count': str_to_int(self._search_regex( | ||||
|                     r'([\d,.]+) views<', webpage, 'view count', fatal=False)), | ||||
|             }) | ||||
|  | ||||
|         if not info.get('title'): | ||||
|             info['title'] = video_id | ||||
|  | ||||
|         info['id'] = video_id | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| @@ -36,34 +37,35 @@ from ..utils import ( | ||||
|     clean_html, | ||||
|     compiled_regex_type, | ||||
|     determine_ext, | ||||
|     determine_protocol, | ||||
|     error_to_compat_str, | ||||
|     ExtractorError, | ||||
|     extract_attributes, | ||||
|     fix_xml_ampersands, | ||||
|     float_or_none, | ||||
|     GeoRestrictedError, | ||||
|     GeoUtils, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     orderedSet, | ||||
|     parse_codecs, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     parse_m3u8_attributes, | ||||
|     RegexNotFoundError, | ||||
|     sanitize_filename, | ||||
|     sanitized_Request, | ||||
|     sanitize_filename, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     unified_timestamp, | ||||
|     update_Request, | ||||
|     update_url_query, | ||||
|     urljoin, | ||||
|     url_basename, | ||||
|     xpath_element, | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
|     determine_protocol, | ||||
|     parse_duration, | ||||
|     mimetype2ext, | ||||
|     update_Request, | ||||
|     update_url_query, | ||||
|     parse_m3u8_attributes, | ||||
|     extract_attributes, | ||||
|     parse_codecs, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -546,6 +548,34 @@ class InfoExtractor(object): | ||||
|  | ||||
|         return encoding | ||||
|  | ||||
|     def __check_blocked(self, content): | ||||
|         first_block = content[:512] | ||||
|         if ('<title>Access to this site is blocked</title>' in content and | ||||
|                 'Websense' in first_block): | ||||
|             msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' | ||||
|             blocked_iframe = self._html_search_regex( | ||||
|                 r'<iframe src="([^"]+)"', content, | ||||
|                 'Websense information URL', default=None) | ||||
|             if blocked_iframe: | ||||
|                 msg += ' Visit %s for more details' % blocked_iframe | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|         if '<title>The URL you requested has been blocked</title>' in first_block: | ||||
|             msg = ( | ||||
|                 'Access to this webpage has been blocked by Indian censorship. ' | ||||
|                 'Use a VPN or proxy server (with --proxy) to route around it.') | ||||
|             block_msg = self._html_search_regex( | ||||
|                 r'</h1><p>(.*?)</p>', | ||||
|                 content, 'block message', default=None) | ||||
|             if block_msg: | ||||
|                 msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|         if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and | ||||
|                 'blocklist.rkn.gov.ru' in content): | ||||
|             raise ExtractorError( | ||||
|                 'Access to this webpage has been blocked by decision of the Russian government. ' | ||||
|                 'Visit http://blocklist.rkn.gov.ru/ for a block reason.', | ||||
|                 expected=True) | ||||
|  | ||||
|     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): | ||||
|         content_type = urlh.headers.get('Content-Type', '') | ||||
|         webpage_bytes = urlh.read() | ||||
| @@ -587,25 +617,7 @@ class InfoExtractor(object): | ||||
|         except LookupError: | ||||
|             content = webpage_bytes.decode('utf-8', 'replace') | ||||
|  | ||||
|         if ('<title>Access to this site is blocked</title>' in content and | ||||
|                 'Websense' in content[:512]): | ||||
|             msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' | ||||
|             blocked_iframe = self._html_search_regex( | ||||
|                 r'<iframe src="([^"]+)"', content, | ||||
|                 'Websense information URL', default=None) | ||||
|             if blocked_iframe: | ||||
|                 msg += ' Visit %s for more details' % blocked_iframe | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|         if '<title>The URL you requested has been blocked</title>' in content[:512]: | ||||
|             msg = ( | ||||
|                 'Access to this webpage has been blocked by Indian censorship. ' | ||||
|                 'Use a VPN or proxy server (with --proxy) to route around it.') | ||||
|             block_msg = self._html_search_regex( | ||||
|                 r'</h1><p>(.*?)</p>', | ||||
|                 content, 'block message', default=None) | ||||
|             if block_msg: | ||||
|                 msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|         self.__check_blocked(content) | ||||
|  | ||||
|         return content | ||||
|  | ||||
| @@ -714,6 +726,13 @@ class InfoExtractor(object): | ||||
|             video_info['title'] = video_title | ||||
|         return video_info | ||||
|  | ||||
|     def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None): | ||||
|         urlrs = orderedSet( | ||||
|             self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) | ||||
|             for m in matches) | ||||
|         return self.playlist_result( | ||||
|             urlrs, playlist_id=video_id, playlist_title=video_title) | ||||
|  | ||||
|     @staticmethod | ||||
|     def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None): | ||||
|         """Returns a playlist""" | ||||
| @@ -2010,7 +2029,7 @@ class InfoExtractor(object): | ||||
|                 }) | ||||
|         return formats | ||||
|  | ||||
|     def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None): | ||||
|     def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): | ||||
|         def absolute_url(video_url): | ||||
|             return compat_urlparse.urljoin(base_url, video_url) | ||||
|  | ||||
| @@ -2032,7 +2051,8 @@ class InfoExtractor(object): | ||||
|                 is_plain_url = False | ||||
|                 formats = self._extract_m3u8_formats( | ||||
|                     full_url, video_id, ext='mp4', | ||||
|                     entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id) | ||||
|                     entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id, | ||||
|                     preference=preference) | ||||
|             elif ext == 'mpd': | ||||
|                 is_plain_url = False | ||||
|                 formats = self._extract_mpd_formats( | ||||
| @@ -2160,18 +2180,24 @@ class InfoExtractor(object): | ||||
|                     }) | ||||
|         return formats | ||||
|  | ||||
|     @staticmethod | ||||
|     def _find_jwplayer_data(webpage): | ||||
|     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): | ||||
|         mobj = re.search( | ||||
|             r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('options') | ||||
|             try: | ||||
|                 jwplayer_data = self._parse_json(mobj.group('options'), | ||||
|                                                  video_id=video_id, | ||||
|                                                  transform_source=transform_source) | ||||
|             except ExtractorError: | ||||
|                 pass | ||||
|             else: | ||||
|                 if isinstance(jwplayer_data, dict): | ||||
|                     return jwplayer_data | ||||
|  | ||||
|     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): | ||||
|         jwplayer_data = self._parse_json( | ||||
|             self._find_jwplayer_data(webpage), video_id, | ||||
|             transform_source=js_to_json) | ||||
|         jwplayer_data = self._find_jwplayer_data( | ||||
|             webpage, video_id, transform_source=js_to_json) | ||||
|         return self._parse_jwplayer_data( | ||||
|             jwplayer_data, video_id, *args, **kwargs) | ||||
|  | ||||
| @@ -2197,56 +2223,9 @@ class InfoExtractor(object): | ||||
|  | ||||
|             this_video_id = video_id or video_data['mediaid'] | ||||
|  | ||||
|             formats = [] | ||||
|             for source in video_data['sources']: | ||||
|                 source_url = self._proto_relative_url(source['file']) | ||||
|                 if base_url: | ||||
|                     source_url = compat_urlparse.urljoin(base_url, source_url) | ||||
|                 source_type = source.get('type') or '' | ||||
|                 ext = mimetype2ext(source_type) or determine_ext(source_url) | ||||
|                 if source_type == 'hls' or ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) | ||||
|                 elif ext == 'mpd': | ||||
|                     formats.extend(self._extract_mpd_formats( | ||||
|                         source_url, this_video_id, mpd_id=mpd_id, fatal=False)) | ||||
|                 # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 | ||||
|                 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): | ||||
|                     formats.append({ | ||||
|                         'url': source_url, | ||||
|                         'vcodec': 'none', | ||||
|                         'ext': ext, | ||||
|                     }) | ||||
|                 else: | ||||
|                     height = int_or_none(source.get('height')) | ||||
|                     if height is None: | ||||
|                         # Often no height is provided but there is a label in | ||||
|                         # format like 1080p. | ||||
|                         height = int_or_none(self._search_regex( | ||||
|                             r'^(\d{3,})[pP]$', source.get('label') or '', | ||||
|                             'height', default=None)) | ||||
|                     a_format = { | ||||
|                         'url': source_url, | ||||
|                         'width': int_or_none(source.get('width')), | ||||
|                         'height': height, | ||||
|                         'ext': ext, | ||||
|                     } | ||||
|                     if source_url.startswith('rtmp'): | ||||
|                         a_format['ext'] = 'flv' | ||||
|  | ||||
|                         # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as | ||||
|                         # of jwplayer.flash.swf | ||||
|                         rtmp_url_parts = re.split( | ||||
|                             r'((?:mp4|mp3|flv):)', source_url, 1) | ||||
|                         if len(rtmp_url_parts) == 3: | ||||
|                             rtmp_url, prefix, play_path = rtmp_url_parts | ||||
|                             a_format.update({ | ||||
|                                 'url': rtmp_url, | ||||
|                                 'play_path': prefix + play_path, | ||||
|                             }) | ||||
|                         if rtmp_params: | ||||
|                             a_format.update(rtmp_params) | ||||
|                     formats.append(a_format) | ||||
|             formats = self._parse_jwplayer_formats( | ||||
|                 video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id, | ||||
|                 mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             subtitles = {} | ||||
| @@ -2277,6 +2256,65 @@ class InfoExtractor(object): | ||||
|         else: | ||||
|             return self.playlist_result(entries) | ||||
|  | ||||
|     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, | ||||
|                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): | ||||
|         formats = [] | ||||
|         for source in jwplayer_sources_data: | ||||
|             source_url = self._proto_relative_url(source['file']) | ||||
|             if base_url: | ||||
|                 source_url = compat_urlparse.urljoin(base_url, source_url) | ||||
|             source_type = source.get('type') or '' | ||||
|             ext = mimetype2ext(source_type) or determine_ext(source_url) | ||||
|             if source_type == 'hls' or ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     source_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id=m3u8_id, fatal=False)) | ||||
|             elif ext == 'mpd': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     source_url, video_id, mpd_id=mpd_id, fatal=False)) | ||||
|             elif ext == 'smil': | ||||
|                 formats.extend(self._extract_smil_formats( | ||||
|                     source_url, video_id, fatal=False)) | ||||
|             # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 | ||||
|             elif source_type.startswith('audio') or ext in ( | ||||
|                     'oga', 'aac', 'mp3', 'mpeg', 'vorbis'): | ||||
|                 formats.append({ | ||||
|                     'url': source_url, | ||||
|                     'vcodec': 'none', | ||||
|                     'ext': ext, | ||||
|                 }) | ||||
|             else: | ||||
|                 height = int_or_none(source.get('height')) | ||||
|                 if height is None: | ||||
|                     # Often no height is provided but there is a label in | ||||
|                     # format like "1080p", "720p SD", or 1080. | ||||
|                     height = int_or_none(self._search_regex( | ||||
|                         r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''), | ||||
|                         'height', default=None)) | ||||
|                 a_format = { | ||||
|                     'url': source_url, | ||||
|                     'width': int_or_none(source.get('width')), | ||||
|                     'height': height, | ||||
|                     'tbr': int_or_none(source.get('bitrate')), | ||||
|                     'ext': ext, | ||||
|                 } | ||||
|                 if source_url.startswith('rtmp'): | ||||
|                     a_format['ext'] = 'flv' | ||||
|                     # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as | ||||
|                     # of jwplayer.flash.swf | ||||
|                     rtmp_url_parts = re.split( | ||||
|                         r'((?:mp4|mp3|flv):)', source_url, 1) | ||||
|                     if len(rtmp_url_parts) == 3: | ||||
|                         rtmp_url, prefix, play_path = rtmp_url_parts | ||||
|                         a_format.update({ | ||||
|                             'url': rtmp_url, | ||||
|                             'play_path': prefix + play_path, | ||||
|                         }) | ||||
|                     if rtmp_params: | ||||
|                         a_format.update(rtmp_params) | ||||
|                 formats.append(a_format) | ||||
|         return formats | ||||
|  | ||||
|     def _live_title(self, name): | ||||
|         """ Generate the title for a live video """ | ||||
|         now = datetime.datetime.now() | ||||
|   | ||||
| @@ -9,13 +9,14 @@ from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     orderedSet, | ||||
|     remove_end, | ||||
|     extract_attributes, | ||||
|     mimetype2ext, | ||||
|     determine_ext, | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     orderedSet, | ||||
|     parse_iso8601, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -66,6 +67,16 @@ class CondeNastIE(InfoExtractor): | ||||
|             'upload_date': '20130314', | ||||
|             'timestamp': 1363219200, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series', | ||||
|         'info_dict': { | ||||
|             'id': '58d1865bfd2e6126e2000015', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Only True Surprise? Trump’s an Idiot', | ||||
|             'uploader': 'gq', | ||||
|             'upload_date': '20170321', | ||||
|             'timestamp': 1490126427, | ||||
|         }, | ||||
|     }, { | ||||
|         # JS embed | ||||
|         'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js', | ||||
| @@ -114,26 +125,33 @@ class CondeNastIE(InfoExtractor): | ||||
|             }) | ||||
|         video_id = query['videoId'] | ||||
|         video_info = None | ||||
|         info_page = self._download_webpage( | ||||
|         info_page = self._download_json( | ||||
|             'http://player.cnevids.com/player/video.js', | ||||
|             video_id, 'Downloading video info', query=query, fatal=False) | ||||
|             video_id, 'Downloading video info', fatal=False, query=query) | ||||
|         if info_page: | ||||
|             video_info = self._parse_json(self._search_regex( | ||||
|                 r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video'] | ||||
|         else: | ||||
|             video_info = info_page.get('video') | ||||
|         if not video_info: | ||||
|             info_page = self._download_webpage( | ||||
|                 'http://player.cnevids.com/player/loader.js', | ||||
|                 video_id, 'Downloading loader info', query=query) | ||||
|             video_info = self._parse_json(self._search_regex( | ||||
|                 r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id) | ||||
|             video_info = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'), | ||||
|                 video_id, transform_source=js_to_json)['video'] | ||||
|  | ||||
|         title = video_info['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for fdata in video_info.get('sources', [{}])[0]: | ||||
|         for fdata in video_info['sources']: | ||||
|             src = fdata.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             ext = mimetype2ext(fdata.get('type')) or determine_ext(src) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|                 continue | ||||
|             quality = fdata.get('quality') | ||||
|             formats.append({ | ||||
|                 'format_id': ext + ('-%s' % quality if quality else ''), | ||||
| @@ -169,7 +187,6 @@ class CondeNastIE(InfoExtractor): | ||||
|                 path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/'))) | ||||
|             url_type = 'embed' | ||||
|  | ||||
|         self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site]) | ||||
|         webpage = self._download_webpage(url, item_id) | ||||
|  | ||||
|         if url_type == 'series': | ||||
|   | ||||
| @@ -177,6 +177,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): | ||||
|             'uploader': 'Kadokawa Pictures Inc.', | ||||
|             'upload_date': '20170118', | ||||
|             'series': "KONOSUBA -God's blessing on this wonderful world!", | ||||
|             'season': "KONOSUBA -God's blessing on this wonderful world! 2", | ||||
|             'season_number': 2, | ||||
|             'episode': 'Give Me Deliverance from this Judicial Injustice!', | ||||
|             'episode_number': 1, | ||||
| @@ -207,6 +208,38 @@ class CrunchyrollIE(CrunchyrollBaseIE): | ||||
|             # Just test metadata extraction | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # make sure we can extract an uploader name that's not a link | ||||
|         'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899', | ||||
|         'info_dict': { | ||||
|             'id': '606899', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors', | ||||
|             'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"', | ||||
|             'uploader': 'Geneon Entertainment', | ||||
|             'upload_date': '20120717', | ||||
|         }, | ||||
|         'params': { | ||||
|             # just test metadata extraction | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # A video with a vastly different season name compared to the series name | ||||
|         'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532', | ||||
|         'info_dict': { | ||||
|             'id': '590532', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test', | ||||
|             'description': 'Mahiro and Nyaruko talk about official certification.', | ||||
|             'uploader': 'TV TOKYO', | ||||
|             'upload_date': '20120305', | ||||
|             'series': 'Nyarko-san: Another Crawling Chaos', | ||||
|             'season': 'Haiyoru! Nyaruani (ONA)', | ||||
|         }, | ||||
|         'params': { | ||||
|             # Just test metadata extraction | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _FORMAT_IDS = { | ||||
| @@ -357,7 +390,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|         else: | ||||
|             webpage_url = 'http://www.' + mobj.group('url') | ||||
|  | ||||
|         webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage') | ||||
|         webpage = self._download_webpage( | ||||
|             self._add_skip_wall(webpage_url), video_id, | ||||
|             headers=self.geo_verification_headers()) | ||||
|         note_m = self._html_search_regex( | ||||
|             r'<div class="showmedia-trailer-notice">(.+?)</div>', | ||||
|             webpage, 'trailer-notice', default='') | ||||
| @@ -388,8 +423,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|         if video_upload_date: | ||||
|             video_upload_date = unified_strdate(video_upload_date) | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage, | ||||
|             'video_uploader', fatal=False) | ||||
|             # try looking for both an uploader that's a link and one that's not | ||||
|             [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], | ||||
|             webpage, 'video_uploader', fatal=False) | ||||
|  | ||||
|         available_fmts = [] | ||||
|         for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): | ||||
| @@ -475,7 +511,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|         # webpage provide more accurate data than series_title from XML | ||||
|         series = self._html_search_regex( | ||||
|             r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)', | ||||
|             webpage, 'series', default=xpath_text(metadata, 'series_title')) | ||||
|             webpage, 'series', fatal=False) | ||||
|         season = xpath_text(metadata, 'series_title') | ||||
|  | ||||
|         episode = xpath_text(metadata, 'episode_title') | ||||
|         episode_number = int_or_none(xpath_text(metadata, 'episode_number')) | ||||
| @@ -492,6 +529,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date': video_upload_date, | ||||
|             'series': series, | ||||
|             'season': season, | ||||
|             'season_number': season_number, | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
| @@ -529,16 +567,18 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): | ||||
|     def _real_extract(self, url): | ||||
|         show_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(self._add_skip_wall(url), show_id) | ||||
|         webpage = self._download_webpage( | ||||
|             self._add_skip_wall(url), show_id, | ||||
|             headers=self.geo_verification_headers()) | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>', | ||||
|             webpage, 'title') | ||||
|         episode_paths = re.findall( | ||||
|             r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"', | ||||
|             r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"', | ||||
|             webpage) | ||||
|         entries = [ | ||||
|             self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll') | ||||
|             for ep in episode_paths | ||||
|             self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id) | ||||
|             for ep_id, ep in episode_paths | ||||
|         ] | ||||
|         entries.reverse() | ||||
|  | ||||
|   | ||||
| @@ -82,6 +82,11 @@ class CWTVIE(InfoExtractor): | ||||
|                             'url': quality_url, | ||||
|                             'tbr': tbr, | ||||
|                         }) | ||||
|         video_metadata = video_data['assetFields'] | ||||
|         ism_url = video_metadata.get('smoothStreamingUrl') | ||||
|         if ism_url: | ||||
|             formats.extend(self._extract_ism_formats( | ||||
|                 ism_url, video_id, ism_id='mss', fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [{ | ||||
| @@ -90,8 +95,6 @@ class CWTVIE(InfoExtractor): | ||||
|             'height': image.get('height'), | ||||
|         } for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None | ||||
|  | ||||
|         video_metadata = video_data['assetFields'] | ||||
|  | ||||
|         subtitles = { | ||||
|             'en': [{ | ||||
|                 'url': video_metadata['UnicornCcUrl'], | ||||
|   | ||||
| @@ -282,9 +282,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor): | ||||
|         } | ||||
|  | ||||
|     def _check_error(self, info): | ||||
|         error = info.get('error') | ||||
|         if info.get('error') is not None: | ||||
|             title = error['title'] | ||||
|             # See https://developer.dailymotion.com/api#access-error | ||||
|             if error.get('code') == 'DM007': | ||||
|                 self.raise_geo_restricted(msg=title) | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True) | ||||
|                 '%s said: %s' % (self.IE_NAME, title), expected=True) | ||||
|  | ||||
|     def _get_subtitles(self, video_id, webpage): | ||||
|         try: | ||||
|   | ||||
							
								
								
									
										159
									
								
								youtube_dl/extractor/daisuki.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										159
									
								
								youtube_dl/extractor/daisuki.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,159 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import json | ||||
| import random | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..aes import ( | ||||
|     aes_cbc_decrypt, | ||||
|     aes_cbc_encrypt, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     bytes_to_intlist, | ||||
|     bytes_to_long, | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     intlist_to_bytes, | ||||
|     get_element_by_id, | ||||
|     js_to_json, | ||||
|     int_or_none, | ||||
|     long_to_bytes, | ||||
|     pkcs1pad, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DaisukiIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html', | ||||
|         'info_dict': { | ||||
|             'id': '11213', | ||||
|             'ext': 'mp4', | ||||
|             'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS', | ||||
|             'subtitles': { | ||||
|                 'mul': [{ | ||||
|                     'ext': 'ttml', | ||||
|                 }], | ||||
|             }, | ||||
|             'creator': 'BANDAI NAMCO Entertainment', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # AES-encrypted HLS stream | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     # The public key in PEM format can be found in clientlibs_anime_watch.min.js | ||||
|     _RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         flashvars = self._parse_json(self._search_regex( | ||||
|             r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|  | ||||
|         iv = [0] * 16 | ||||
|  | ||||
|         data = {} | ||||
|         for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'): | ||||
|             data[key] = flashvars.get(key, '') | ||||
|  | ||||
|         encrypted_rtn = None | ||||
|  | ||||
|         # Some AES keys are rejected. Try it with different AES keys | ||||
|         for idx in range(5): | ||||
|             aes_key = [random.randint(0, 254) for _ in range(32)] | ||||
|             padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128)) | ||||
|  | ||||
|             n, e = self._RSA_KEY | ||||
|             encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n)) | ||||
|             init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={ | ||||
|                 's': flashvars.get('s', ''), | ||||
|                 'c': flashvars.get('ss3_prm', ''), | ||||
|                 'e': url, | ||||
|                 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( | ||||
|                     bytes_to_intlist(json.dumps(data)), | ||||
|                     aes_key, iv))).decode('ascii'), | ||||
|                 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), | ||||
|             }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) | ||||
|  | ||||
|             if 'rtn' in init_data: | ||||
|                 encrypted_rtn = init_data['rtn'] | ||||
|                 break | ||||
|  | ||||
|             self._sleep(5, video_id) | ||||
|  | ||||
|         if encrypted_rtn is None: | ||||
|             raise ExtractorError('Failed to fetch init data') | ||||
|  | ||||
|         rtn = self._parse_json( | ||||
|             intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( | ||||
|                 base64.b64decode(encrypted_rtn)), | ||||
|                 aes_key, iv)).decode('utf-8').rstrip('\0'), | ||||
|             video_id) | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native') | ||||
|  | ||||
|         title = remove_end(self._og_search_title(webpage), ' - DAISUKI') | ||||
|  | ||||
|         creator = self._html_search_regex( | ||||
|             r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False) | ||||
|  | ||||
|         subtitles = {} | ||||
|         caption_url = rtn.get('caption_url') | ||||
|         if caption_url: | ||||
|             # mul: multiple languages | ||||
|             subtitles['mul'] = [{ | ||||
|                 'url': caption_url, | ||||
|                 'ext': 'ttml', | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'creator': creator, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class DaisukiPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html', | ||||
|         'info_dict': { | ||||
|             'id': 'TheIdolMasterCG', | ||||
|             'title': 'THE IDOLM@STER CINDERELLA GIRLS', | ||||
|             'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8', | ||||
|         }, | ||||
|         'playlist_count': 26, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         episode_pattern = r'''(?sx) | ||||
|             <img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+? | ||||
|             <p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)''' | ||||
|         entries = [{ | ||||
|             '_type': 'url_transparent', | ||||
|             'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'), | ||||
|             'episode_id': episode_id, | ||||
|             'episode_number': int_or_none(episode_id), | ||||
|         } for movie_id, episode_id in re.findall(episode_pattern, webpage)] | ||||
|  | ||||
|         playlist_title = remove_end( | ||||
|             self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI') | ||||
|         playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage)) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | ||||
| @@ -1,17 +1,21 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     ExtractorError, | ||||
|     remove_end, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DiscoveryGoIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?(?: | ||||
| class DiscoveryGoBaseIE(InfoExtractor): | ||||
|     _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: | ||||
|             discovery| | ||||
|             investigationdiscovery| | ||||
|             discoverylife| | ||||
| @@ -21,18 +25,23 @@ class DiscoveryGoIE(InfoExtractor): | ||||
|             sciencechannel| | ||||
|             tlc| | ||||
|             velocitychannel | ||||
|         )go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)''' | ||||
|         )go\.com/%s(?P<id>[^/?#&]+)''' | ||||
|  | ||||
|  | ||||
| class DiscoveryGoIE(DiscoveryGoBaseIE): | ||||
|     _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _TEST = { | ||||
|         'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', | ||||
|         'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', | ||||
|         'info_dict': { | ||||
|             'id': '57a33c536b66d1cd0345eeb1', | ||||
|             'id': '58c167d86b66d12f2addeb01', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Kiss First, Ask Questions Later!', | ||||
|             'description': 'md5:fe923ba34050eae468bffae10831cb22', | ||||
|             'duration': 2579, | ||||
|             'series': 'Love at First Kiss', | ||||
|             'season_number': 1, | ||||
|             'episode_number': 1, | ||||
|             'title': 'Reaper Madness', | ||||
|             'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', | ||||
|             'duration': 2519, | ||||
|             'series': 'Bering Sea Gold', | ||||
|             'season_number': 8, | ||||
|             'episode_number': 6, | ||||
|             'age_limit': 14, | ||||
|         }, | ||||
|     } | ||||
| @@ -113,3 +122,46 @@ class DiscoveryGoIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): | ||||
|     _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.discoverygo.com/bering-sea-gold/', | ||||
|         'info_dict': { | ||||
|             'id': 'bering-sea-gold', | ||||
|             'title': 'Bering Sea Gold', | ||||
|             'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', | ||||
|         }, | ||||
|         'playlist_mincount': 6, | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if DiscoveryGoIE.suitable(url) else super( | ||||
|             DiscoveryGoPlaylistIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         entries = [] | ||||
|         for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage): | ||||
|             data = self._parse_json( | ||||
|                 mobj.group('json'), display_id, | ||||
|                 transform_source=unescapeHTML, fatal=False) | ||||
|             if not isinstance(data, dict) or data.get('type') != 'episode': | ||||
|                 continue | ||||
|             episode_url = data.get('socialUrl') | ||||
|             if not episode_url: | ||||
|                 continue | ||||
|             entries.append(self.url_result( | ||||
|                 episode_url, ie=DiscoveryGoIE.ie_key(), | ||||
|                 video_id=data.get('id'))) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, display_id, | ||||
|             remove_end(self._og_search_title( | ||||
|                 webpage, fatal=False), ' | Discovery GO'), | ||||
|             self._og_search_description(webpage)) | ||||
|   | ||||
| @@ -9,13 +9,13 @@ from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import smuggle_url | ||||
| 
 | ||||
| 
 | ||||
| class TlcDeIE(InfoExtractor): | ||||
|     IE_NAME = 'tlc.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' | ||||
| class DiscoveryNetworksDeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))' | ||||
| 
 | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', | ||||
|         'info_dict': { | ||||
|             'id': '3235167922001', | ||||
| @@ -29,7 +29,13 @@ class TlcDeIE(InfoExtractor): | ||||
|             'upload_date': '20140404', | ||||
|             'uploader_id': '1659832546', | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.discovery.de/#5332316765001', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s' | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
| @@ -39,5 +45,8 @@ class TlcDeIE(InfoExtractor): | ||||
|             title = mobj.group('title') | ||||
|             webpage = self._download_webpage(url, title) | ||||
|             brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) | ||||
|             brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] | ||||
|         return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) | ||||
|             brightcove_id = compat_parse_qs(compat_urlparse.urlparse( | ||||
|                 brightcove_legacy_url).query)['@videoPlayer'][0] | ||||
|         return self.url_result(smuggle_url( | ||||
|             self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}), | ||||
|             'BrightcoveNew', brightcove_id) | ||||
							
								
								
									
										59
									
								
								youtube_dl/extractor/discoveryvr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								youtube_dl/extractor/discoveryvr.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_duration | ||||
|  | ||||
|  | ||||
| class DiscoveryVRIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction', | ||||
|         'md5': '32b1929798c464a54356378b7912eca4', | ||||
|         'info_dict': { | ||||
|             'id': 'discovery-vr-an-introduction', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Discovery VR - An Introduction', | ||||
|             'description': 'md5:80d418a10efb8899d9403e61d8790f06', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         bootstrap_data = self._search_regex( | ||||
|             r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";', | ||||
|             webpage, 'bootstrap data') | ||||
|         bootstrap_data = self._parse_json( | ||||
|             bootstrap_data.encode('utf-8').decode('unicode_escape'), | ||||
|             display_id) | ||||
|         videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos'] | ||||
|         video_data = next(video for video in videos if video.get('slug') == display_id) | ||||
|  | ||||
|         series = video_data.get('showTitle') | ||||
|         title = episode = video_data.get('title') or series | ||||
|         if series and series != title: | ||||
|             title = '%s - %s' % (series, title) | ||||
|  | ||||
|         formats = [] | ||||
|         for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')): | ||||
|             f_url = video_data.get(f) | ||||
|             if not f_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'url': f_url, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': display_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': video_data.get('description'), | ||||
|             'thumbnail': video_data.get('thumbnail'), | ||||
|             'duration': parse_duration(video_data.get('runTime')), | ||||
|             'formats': formats, | ||||
|             'episode': episode, | ||||
|             'series': series, | ||||
|         } | ||||
| @@ -1,15 +1,10 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import hashlib | ||||
| import time | ||||
| import uuid | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_urlencode, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| @@ -25,7 +20,7 @@ class DouyuTVIE(InfoExtractor): | ||||
|             'id': '17732', | ||||
|             'display_id': 'iseven', | ||||
|             'ext': 'flv', | ||||
|             'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'description': r're:.*m7show@163\.com.*', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': '7师傅', | ||||
| @@ -56,7 +51,7 @@ class DouyuTVIE(InfoExtractor): | ||||
|             'id': '17732', | ||||
|             'display_id': '17732', | ||||
|             'ext': 'flv', | ||||
|             'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'description': r're:.*m7show@163\.com.*', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': '7师傅', | ||||
| @@ -74,10 +69,6 @@ class DouyuTVIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf | ||||
|     # is encrypted originally, but ffdec can dump memory to get the decrypted one. | ||||
|     _API_KEY = 'A12Svb&%1UUmf@hC' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
| @@ -88,6 +79,7 @@ class DouyuTVIE(InfoExtractor): | ||||
|             room_id = self._html_search_regex( | ||||
|                 r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') | ||||
|  | ||||
|         # Grab metadata from mobile API | ||||
|         room = self._download_json( | ||||
|             'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, | ||||
|             note='Downloading room info')['data'] | ||||
| @@ -96,38 +88,22 @@ class DouyuTVIE(InfoExtractor): | ||||
|         if room.get('show_status') == '2': | ||||
|             raise ExtractorError('Live stream is offline', expected=True) | ||||
|  | ||||
|         tt = compat_str(int(time.time() / 60)) | ||||
|         did = uuid.uuid4().hex.upper() | ||||
|  | ||||
|         sign_content = ''.join((room_id, did, self._API_KEY, tt)) | ||||
|         sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() | ||||
|  | ||||
|         flv_data = compat_urllib_parse_urlencode({ | ||||
|             'cdn': 'ws', | ||||
|             'rate': '0', | ||||
|             'tt': tt, | ||||
|             'did': did, | ||||
|             'sign': sign, | ||||
|         }) | ||||
|  | ||||
|         video_info = self._download_json( | ||||
|             'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id, | ||||
|             data=flv_data, note='Downloading video info', | ||||
|             headers={'Content-Type': 'application/x-www-form-urlencoded'}) | ||||
|  | ||||
|         error_code = video_info.get('error', 0) | ||||
|         if error_code is not 0: | ||||
|             raise ExtractorError( | ||||
|                 '%s reported error %i' % (self.IE_NAME, error_code), | ||||
|                 expected=True) | ||||
|  | ||||
|         base_url = video_info['data']['rtmp_url'] | ||||
|         live_path = video_info['data']['rtmp_live'] | ||||
|  | ||||
|         video_url = '%s/%s' % (base_url, live_path) | ||||
|         # Grab the URL from PC client API | ||||
|         # The m3u8 url from mobile API requires re-authentication every 5 minutes | ||||
|         tt = int(time.time()) | ||||
|         signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt) | ||||
|         sign = hashlib.md5(signContent.encode('ascii')).hexdigest() | ||||
|         video_url = self._download_json( | ||||
|             'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id, | ||||
|             video_id, note='Downloading video URL info', | ||||
|             query={'rate': 0}, headers={ | ||||
|                 'auth': sign, | ||||
|                 'time': str(tt), | ||||
|                 'aid': 'pcclient' | ||||
|             })['data']['live_url'] | ||||
|  | ||||
|         title = self._live_title(unescapeHTML(room['room_name'])) | ||||
|         description = room.get('notice') | ||||
|         description = room.get('show_details') | ||||
|         thumbnail = room.get('room_src') | ||||
|         uploader = room.get('nickname') | ||||
|  | ||||
|   | ||||
| @@ -6,37 +6,24 @@ import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     compat_HTTPError, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     USER_AGENTS, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
|     remove_end, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' | ||||
|     _VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # geo restricted, via direct unsigned hls URL | ||||
|         'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/', | ||||
|         'info_dict': { | ||||
|             'id': '1255600', | ||||
|             'display_id': 'stagione-1-episodio-25', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Episodio 25', | ||||
|             'description': 'md5:cae5f40ad988811b197d2d27a53227eb', | ||||
|             'duration': 2761, | ||||
|             'timestamp': 1454701800, | ||||
|             'upload_date': '20160205', | ||||
|             'creator': 'RTIT', | ||||
|             'series': 'Take me out', | ||||
|             'season_number': 1, | ||||
|             'episode_number': 25, | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|     }, { | ||||
|         # non geo restricted, via secure api, unsigned download hls URL | ||||
|         'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', | ||||
|         'info_dict': { | ||||
| @@ -168,3 +155,90 @@ class DPlayIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class DPlayItIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)' | ||||
|     _GEO_COUNTRIES = ['IT'] | ||||
|     _TEST = { | ||||
|         'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', | ||||
|         'md5': '2b808ffb00fc47b884a172ca5d13053c', | ||||
|         'info_dict': { | ||||
|             'id': '6918', | ||||
|             'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', | ||||
|             'description': 'md5:3c7a4303aef85868f867a26f5cc14813', | ||||
|             'thumbnail': r're:^https?://.*\.jpe?g', | ||||
|             'upload_date': '20160524', | ||||
|             'series': 'Biografie imbarazzanti', | ||||
|             'season_number': 1, | ||||
|             'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', | ||||
|             'episode_number': 1, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         info_url = self._search_regex( | ||||
|             r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         title = remove_end(self._og_search_title(webpage), ' | Dplay') | ||||
|  | ||||
|         try: | ||||
|             info = self._download_json( | ||||
|                 info_url, display_id, headers={ | ||||
|                     'Authorization': 'Bearer %s' % self._get_cookies(url).get( | ||||
|                         'dplayit_token').value, | ||||
|                     'Referer': url, | ||||
|                 }) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): | ||||
|                 info = self._parse_json(e.cause.read().decode('utf-8'), display_id) | ||||
|                 error = info['errors'][0] | ||||
|                 if error.get('code') == 'access.denied.geoblocked': | ||||
|                     self.raise_geo_restricted( | ||||
|                         msg=error.get('detail'), countries=self._GEO_COUNTRIES) | ||||
|                 raise ExtractorError(info['errors'][0]['detail'], expected=True) | ||||
|             raise | ||||
|  | ||||
|         hls_url = info['data']['attributes']['streaming']['hls']['url'] | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             hls_url, display_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|             m3u8_id='hls') | ||||
|  | ||||
|         series = self._html_search_regex( | ||||
|             r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>', | ||||
|             webpage, 'series', fatal=False) | ||||
|         episode = self._search_regex( | ||||
|             r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)', | ||||
|             webpage, 'episode', fatal=False) | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             season_number = int(mobj.group('season_number')) | ||||
|             episode_number = int(mobj.group('episode_number')) | ||||
|             upload_date = unified_strdate(mobj.group('upload_date')) | ||||
|         else: | ||||
|             season_number = episode_number = upload_date = None | ||||
|  | ||||
|         return { | ||||
|             'id': info_url.rpartition('/')[-1], | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'series': series, | ||||
|             'season_number': season_number, | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -15,6 +15,8 @@ from ..utils import ( | ||||
|  | ||||
| class DRTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' | ||||
|     _GEO_BYPASS = False | ||||
|     _GEO_COUNTRIES = ['DK'] | ||||
|     IE_NAME = 'drtv' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', | ||||
| @@ -137,7 +139,7 @@ class DRTVIE(InfoExtractor): | ||||
|         if not formats and restricted_to_denmark: | ||||
|             self.raise_geo_restricted( | ||||
|                 'Unfortunately, DR is not allowed to show this program outside Denmark.', | ||||
|                 expected=True) | ||||
|                 countries=self._GEO_COUNTRIES) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -156,6 +158,7 @@ class DRTVIE(InfoExtractor): | ||||
| class DRTVLiveIE(InfoExtractor): | ||||
|     IE_NAME = 'drtv:live' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)' | ||||
|     _GEO_COUNTRIES = ['DK'] | ||||
|     _TEST = { | ||||
|         'url': 'https://www.dr.dk/tv/live/dr1', | ||||
|         'info_dict': { | ||||
|   | ||||
							
								
								
									
										39
									
								
								youtube_dl/extractor/etonline.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								youtube_dl/extractor/etonline.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class ETOnlineIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?etonline\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.etonline.com/tv/211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale/', | ||||
|         'info_dict': { | ||||
|             'id': '211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale', | ||||
|             'title': 'md5:a21ec7d3872ed98335cbd2a046f34ee6', | ||||
|             'description': 'md5:8b94484063f463cca709617c79618ccd', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|     }, { | ||||
|         'url': 'http://www.etonline.com/media/video/here_are_the_stars_who_love_bringing_their_moms_as_dates_to_the_oscars-211359/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911076001/default_default/index.html?videoId=ref:%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result( | ||||
|                 self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'BrightcoveNew', video_id) | ||||
|             for video_id in re.findall( | ||||
|                 r'site\.brightcove\s*\([^,]+,\s*["\'](title_\d+)', webpage)] | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, playlist_id, | ||||
|             self._og_search_title(webpage, fatal=False), | ||||
|             self._og_search_description(webpage)) | ||||
| @@ -19,6 +19,7 @@ from .acast import ( | ||||
|     ACastChannelIE, | ||||
| ) | ||||
| from .addanime import AddAnimeIE | ||||
| from .adn import ADNIE | ||||
| from .adobetv import ( | ||||
|     AdobeTVIE, | ||||
|     AdobeTVShowIE, | ||||
| @@ -71,6 +72,7 @@ from .arte import ( | ||||
| ) | ||||
| from .atresplayer import AtresPlayerIE | ||||
| from .atttechchannel import ATTTechChannelIE | ||||
| from .atvat import ATVAtIE | ||||
| from .audimedia import AudiMediaIE | ||||
| from .audioboom import AudioBoomIE | ||||
| from .audiomack import AudiomackIE, AudiomackAlbumIE | ||||
| @@ -83,6 +85,7 @@ from .awaan import ( | ||||
| from .azmedien import ( | ||||
|     AZMedienIE, | ||||
|     AZMedienPlaylistIE, | ||||
|     AZMedienShowPlaylistIE, | ||||
| ) | ||||
| from .azubu import AzubuIE, AzubuLiveIE | ||||
| from .baidu import BaiduVideoIE | ||||
| @@ -116,6 +119,7 @@ from .bleacherreport import ( | ||||
| from .blinkx import BlinkxIE | ||||
| from .bloomberg import BloombergIE | ||||
| from .bokecc import BokeCCIE | ||||
| from .bostonglobe import BostonGlobeIE | ||||
| from .bpb import BpbIE | ||||
| from .br import BRIE | ||||
| from .bravotv import BravoTVIE | ||||
| @@ -227,6 +231,10 @@ from .dailymotion import ( | ||||
|     DailymotionUserIE, | ||||
|     DailymotionCloudIE, | ||||
| ) | ||||
| from .daisuki import ( | ||||
|     DaisukiIE, | ||||
|     DaisukiPlaylistIE, | ||||
| ) | ||||
| from .daum import ( | ||||
|     DaumIE, | ||||
|     DaumClipIE, | ||||
| @@ -241,7 +249,10 @@ from .dfb import DFBIE | ||||
| from .dhm import DHMIE | ||||
| from .dotsub import DotsubIE | ||||
| from .douyutv import DouyuTVIE | ||||
| from .dplay import DPlayIE | ||||
| from .dplay import ( | ||||
|     DPlayIE, | ||||
|     DPlayItIE, | ||||
| ) | ||||
| from .dramafever import ( | ||||
|     DramaFeverIE, | ||||
|     DramaFeverSeriesIE, | ||||
| @@ -257,7 +268,12 @@ from .dvtv import DVTVIE | ||||
| from .dumpert import DumpertIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| from .discovery import DiscoveryIE | ||||
| from .discoverygo import DiscoveryGoIE | ||||
| from .discoverygo import ( | ||||
|     DiscoveryGoIE, | ||||
|     DiscoveryGoPlaylistIE, | ||||
| ) | ||||
| from .discoverynetworks import DiscoveryNetworksDeIE | ||||
| from .discoveryvr import DiscoveryVRIE | ||||
| from .disney import DisneyIE | ||||
| from .dispeak import DigitallySpeakingIE | ||||
| from .dropbox import DropboxIE | ||||
| @@ -288,6 +304,7 @@ from .espn import ( | ||||
|     ESPNArticleIE, | ||||
| ) | ||||
| from .esri import EsriVideoIE | ||||
| from .etonline import ETOnlineIE | ||||
| from .europa import EuropaIE | ||||
| from .everyonesmixtape import EveryonesMixtapeIE | ||||
| from .expotv import ExpoTVIE | ||||
| @@ -338,6 +355,7 @@ from .francetv import ( | ||||
| ) | ||||
| from .freesound import FreesoundIE | ||||
| from .freespeech import FreespeechIE | ||||
| from .freshlive import FreshLiveIE | ||||
| from .funimation import FunimationIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .fusion import FusionIE | ||||
| @@ -637,6 +655,7 @@ from .ninecninemedia import ( | ||||
| from .ninegag import NineGagIE | ||||
| from .ninenow import NineNowIE | ||||
| from .nintendo import NintendoIE | ||||
| from .njpwworld import NJPWWorldIE | ||||
| from .nobelprize import NobelPrizeIE | ||||
| from .noco import NocoIE | ||||
| from .normalboots import NormalbootsIE | ||||
| @@ -666,6 +685,7 @@ from .npo import ( | ||||
|     NPORadioIE, | ||||
|     NPORadioFragmentIE, | ||||
|     SchoolTVIE, | ||||
|     HetKlokhuisIE, | ||||
|     VPROIE, | ||||
|     WNLIE, | ||||
| ) | ||||
| @@ -710,6 +730,10 @@ from .orf import ( | ||||
|     ORFFM4IE, | ||||
|     ORFIPTVIE, | ||||
| ) | ||||
| from .packtpub import ( | ||||
|     PacktPubIE, | ||||
|     PacktPubCourseIE, | ||||
| ) | ||||
| from .pandatv import PandaTVIE | ||||
| from .pandoratv import PandoraTVIE | ||||
| from .parliamentliveuk import ParliamentLiveUKIE | ||||
| @@ -779,11 +803,12 @@ from .radiojavan import RadioJavanIE | ||||
| from .radiobremen import RadioBremenIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| from .rai import ( | ||||
|     RaiTVIE, | ||||
|     RaiPlayIE, | ||||
|     RaiIE, | ||||
| ) | ||||
| from .rbmaradio import RBMARadioIE | ||||
| from .rds import RDSIE | ||||
| from .redbulltv import RedBullTVIE | ||||
| from .redtube import RedTubeIE | ||||
| from .regiotv import RegioTVIE | ||||
| from .rentv import ( | ||||
| @@ -835,7 +860,6 @@ from .safari import ( | ||||
| from .sapo import SapoIE | ||||
| from .savefrom import SaveFromIE | ||||
| from .sbs import SBSIE | ||||
| from .scivee import SciVeeIE | ||||
| from .screencast import ScreencastIE | ||||
| from .screencastomatic import ScreencastOMaticIE | ||||
| from .scrippsnetworks import ScrippsNetworksWatchIE | ||||
| @@ -852,6 +876,7 @@ from .shared import ( | ||||
| from .showroomlive import ShowRoomLiveIE | ||||
| from .sina import SinaIE | ||||
| from .sixplay import SixPlayIE | ||||
| from .skylinewebcams import SkylineWebcamsIE | ||||
| from .skynewsarabia import ( | ||||
|     SkyNewsArabiaIE, | ||||
|     SkyNewsArabiaArticleIE, | ||||
| @@ -957,7 +982,6 @@ from .thisav import ThisAVIE | ||||
| from .thisoldhouse import ThisOldHouseIE | ||||
| from .threeqsdn import ThreeQSDNIE | ||||
| from .tinypic import TinyPicIE | ||||
| from .tlc import TlcDeIE | ||||
| from .tmz import ( | ||||
|     TMZIE, | ||||
|     TMZArticleIE, | ||||
| @@ -970,6 +994,7 @@ from .tnaflix import ( | ||||
| ) | ||||
| from .toggle import ToggleIE | ||||
| from .tonline import TOnlineIE | ||||
| from .toongoggles import ToonGogglesIE | ||||
| from .toutv import TouTvIE | ||||
| from .toypics import ToypicsUserIE, ToypicsIE | ||||
| from .traileraddict import TrailerAddictIE | ||||
| @@ -990,6 +1015,7 @@ from .tunein import ( | ||||
|     TuneInTopicIE, | ||||
|     TuneInShortenerIE, | ||||
| ) | ||||
| from .tunepk import TunePkIE | ||||
| from .turbo import TurboIE | ||||
| from .tutv import TutvIE | ||||
| from .tv2 import ( | ||||
| @@ -998,6 +1024,7 @@ from .tv2 import ( | ||||
| ) | ||||
| from .tv3 import TV3IE | ||||
| from .tv4 import TV4IE | ||||
| from .tv5mondeplus import TV5MondePlusIE | ||||
| from .tva import TVAIE | ||||
| from .tvanouvelles import ( | ||||
|     TVANouvellesIE, | ||||
| @@ -1156,6 +1183,12 @@ from .voicerepublic import VoiceRepublicIE | ||||
| from .voxmedia import VoxMediaIE | ||||
| from .vporn import VpornIE | ||||
| from .vrt import VRTIE | ||||
| from .vrak import VrakIE | ||||
| from .vrv import ( | ||||
|     VRVIE, | ||||
|     VRVSeriesIE, | ||||
| ) | ||||
| from .medialaan import MedialaanIE | ||||
| from .vube import VubeIE | ||||
| from .vuclip import VuClipIE | ||||
| from .vvvvid import VVVVIDIE | ||||
|   | ||||
| @@ -54,7 +54,7 @@ class EyedoTVIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'), | ||||
|                 m3u8_url, video_id, 'mp4', 'm3u8_native'), | ||||
|             'description': xpath_text(video_data, _add_ns('Description')), | ||||
|             'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))), | ||||
|             'uploader': xpath_text(video_data, _add_ns('Createur')), | ||||
|   | ||||
| @@ -196,6 +196,10 @@ class FacebookIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # no title | ||||
|         'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -303,7 +307,7 @@ class FacebookIE(InfoExtractor): | ||||
|         if not video_data: | ||||
|             server_js_data = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)', | ||||
|                     r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)', | ||||
|                     webpage, 'js data', default='{}'), | ||||
|                 video_id, transform_source=js_to_json, fatal=False) | ||||
|             if server_js_data: | ||||
| @@ -353,15 +357,15 @@ class FacebookIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title', | ||||
|             default=None) | ||||
|             r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, | ||||
|             'title', default=None) | ||||
|         if not video_title: | ||||
|             video_title = self._html_search_regex( | ||||
|                 r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>', | ||||
|                 webpage, 'alternative title', default=None) | ||||
|         if not video_title: | ||||
|             video_title = self._html_search_meta( | ||||
|                 'description', webpage, 'title') | ||||
|                 'description', webpage, 'title', default=None) | ||||
|         if video_title: | ||||
|             video_title = limit_length(video_title, 80) | ||||
|         else: | ||||
|   | ||||
| @@ -47,9 +47,12 @@ class FOXIE(AdobePassIE): | ||||
|             resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating) | ||||
|             query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource) | ||||
|  | ||||
|         return { | ||||
|         info = self._search_json_ld(webpage, video_id, fatal=False) | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'ThePlatform', | ||||
|             'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), | ||||
|             'id': video_id, | ||||
|         } | ||||
|         }) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -4,7 +4,8 @@ from __future__ import unicode_literals | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     unified_strdate, | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -19,6 +20,7 @@ class FranceCultureIE(InfoExtractor): | ||||
|             'title': 'Rendez-vous au pays des geeks', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'upload_date': '20140301', | ||||
|             'timestamp': 1393642916, | ||||
|             'vcodec': 'none', | ||||
|         } | ||||
|     } | ||||
| @@ -28,30 +30,34 @@ class FranceCultureIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"', | ||||
|             webpage, 'video path') | ||||
|         video_data = extract_attributes(self._search_regex( | ||||
|             r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)', | ||||
|             webpage, 'video data')) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         video_url = video_data['data-asset-source'] | ||||
|         title = video_data.get('data-asset-title') or self._og_search_title(webpage) | ||||
|  | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             '(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<', | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>', | ||||
|             webpage, 'description', default=None) | ||||
|         thumbnail = self._search_regex( | ||||
|             r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"', | ||||
|             r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"', | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
|         uploader = self._html_search_regex( | ||||
|             r'(?s)<div id="emission".*?<span class="author">(.*?)</span>', | ||||
|             r'(?s)<span class="author">(.*?)</span>', | ||||
|             webpage, 'uploader', default=None) | ||||
|         vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None | ||||
|         ext = determine_ext(video_url.lower()) | ||||
|  | ||||
|         return { | ||||
|             'id': display_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'vcodec': vcodec, | ||||
|             'ext': ext, | ||||
|             'vcodec': 'none' if ext == 'mp3' else None, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'timestamp': int_or_none(video_data.get('data-asset-created-date')), | ||||
|             'duration': int_or_none(video_data.get('data-duration')), | ||||
|         } | ||||
|   | ||||
							
								
								
									
										83
									
								
								youtube_dl/extractor/freshlive.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								youtube_dl/extractor/freshlive.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FreshLiveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://freshlive.tv/satotv/74712', | ||||
|         'md5': '9f0cf5516979c4454ce982df3d97f352', | ||||
|         'info_dict': { | ||||
|             'id': '74712', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'テスト', | ||||
|             'description': 'テスト', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 1511, | ||||
|             'timestamp': 1483619655, | ||||
|             'upload_date': '20170105', | ||||
|             'uploader': 'サトTV', | ||||
|             'uploader_id': 'satotv', | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|             'is_live': False, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         options = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>', | ||||
|                 webpage, 'initial context'), | ||||
|             video_id) | ||||
|  | ||||
|         info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id] | ||||
|  | ||||
|         title = info['title'] | ||||
|  | ||||
|         if info.get('status') == 'upcoming': | ||||
|             raise ExtractorError('Stream %s is upcoming' % video_id, expected=True) | ||||
|  | ||||
|         stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl'] | ||||
|  | ||||
|         is_live = info.get('liveStreamUrl') is not None | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             stream_url, video_id, 'mp4', | ||||
|             'm3u8_native', m3u8_id='hls') | ||||
|  | ||||
|         if is_live: | ||||
|             title = self._live_title(title) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': title, | ||||
|             'description': info.get('description'), | ||||
|             'thumbnail': info.get('thumbnailUrl'), | ||||
|             'duration': int_or_none(info.get('airTime')), | ||||
|             'timestamp': unified_timestamp(info.get('createdAt')), | ||||
|             'uploader': try_get( | ||||
|                 info, lambda x: x['channel']['title'], compat_str), | ||||
|             'uploader_id': try_get( | ||||
|                 info, lambda x: x['channel']['code'], compat_str), | ||||
|             'uploader_url': try_get( | ||||
|                 info, lambda x: x['channel']['permalink'], compat_str), | ||||
|             'view_count': int_or_none(info.get('viewCount')), | ||||
|             'comment_count': int_or_none(info.get('commentCount')), | ||||
|             'tags': info.get('tags', []), | ||||
|             'is_live': is_live, | ||||
|         } | ||||
| @@ -7,9 +7,9 @@ from ..compat import ( | ||||
|     compat_urllib_parse_unquote_plus, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     sanitized_Request, | ||||
|     ExtractorError, | ||||
|     urlencode_postdata | ||||
| @@ -17,34 +17,26 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class FunimationIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _NETRC_MACHINE = 'funimation' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.funimation.com/shows/air/videos/official/breeze', | ||||
|         'url': 'https://www.funimation.com/shows/hacksign/role-play/', | ||||
|         'info_dict': { | ||||
|             'id': '658', | ||||
|             'display_id': 'breeze', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Air - 1 - Breeze', | ||||
|             'description': 'md5:1769f43cd5fc130ace8fd87232207892', | ||||
|             'thumbnail': r're:https?://.*\.jpg', | ||||
|         }, | ||||
|         'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed', | ||||
|     }, { | ||||
|         'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play', | ||||
|         'info_dict': { | ||||
|             'id': '31128', | ||||
|             'id': '91144', | ||||
|             'display_id': 'role-play', | ||||
|             'ext': 'mp4', | ||||
|             'title': '.hack//SIGN - 1 - Role Play', | ||||
|             'title': '.hack//SIGN - Role Play', | ||||
|             'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd', | ||||
|             'thumbnail': r're:https?://.*\.jpg', | ||||
|         }, | ||||
|         'skip': 'Access without user interaction is forbidden by CloudFlare', | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview', | ||||
|         'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/', | ||||
|         'info_dict': { | ||||
|             'id': '9635', | ||||
|             'display_id': 'broadcast-dub-preview', | ||||
| @@ -54,25 +46,13 @@ class FunimationIE(InfoExtractor): | ||||
|             'thumbnail': r're:https?://.*\.(?:jpg|png)', | ||||
|         }, | ||||
|         'skip': 'Access without user interaction is forbidden by CloudFlare', | ||||
|     }, { | ||||
|         'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _LOGIN_URL = 'http://www.funimation.com/login' | ||||
|  | ||||
|     def _download_webpage(self, *args, **kwargs): | ||||
|         try: | ||||
|             return super(FunimationIE, self)._download_webpage(*args, **kwargs) | ||||
|         except ExtractorError as ee: | ||||
|             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: | ||||
|                 response = ee.cause.read() | ||||
|                 if b'>Please complete the security check to access<' in response: | ||||
|                     raise ExtractorError( | ||||
|                         'Access to funimation.com is blocked by CloudFlare. ' | ||||
|                         'Please browse to http://www.funimation.com/, solve ' | ||||
|                         'the reCAPTCHA, export browser cookies to a text file,' | ||||
|                         ' and then try again with --cookies YOUR_COOKIE_FILE.', | ||||
|                         expected=True) | ||||
|             raise | ||||
|  | ||||
|     def _extract_cloudflare_session_ua(self, url): | ||||
|         ci_session_cookie = self._get_cookies(url).get('ci_session') | ||||
|         if ci_session_cookie: | ||||
| @@ -114,119 +94,74 @@ class FunimationIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         def _search_kane(name): | ||||
|             return self._search_regex( | ||||
|                 r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name, | ||||
|                 webpage, name, default=None) | ||||
|  | ||||
|         title_data = self._parse_json(self._search_regex( | ||||
|             r'TITLE_DATA\s*=\s*({[^}]+})', | ||||
|             webpage, 'title data', default=''), | ||||
|             display_id, js_to_json, fatal=False) or {} | ||||
|  | ||||
|         video_id = title_data.get('id') or self._search_regex([ | ||||
|             r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", | ||||
|             r'<iframe[^>]+src="/player/(\d+)"', | ||||
|         ], webpage, 'video_id', default=None) | ||||
|         if not video_id: | ||||
|             player_url = self._html_search_meta([ | ||||
|                 'al:web:url', | ||||
|                 'og:video:url', | ||||
|                 'og:video:secure_url', | ||||
|             ], webpage, fatal=True) | ||||
|             video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id') | ||||
|  | ||||
|         title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage) | ||||
|         series = _search_kane('showName') | ||||
|         if series: | ||||
|             title = '%s - %s' % (series, title) | ||||
|         description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) | ||||
|  | ||||
|         try: | ||||
|             sources = self._download_json( | ||||
|                 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id, | ||||
|                 video_id)['items'] | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 error = self._parse_json(e.cause.read(), video_id)['errors'][0] | ||||
|                 raise ExtractorError('%s said: %s' % ( | ||||
|                     self.IE_NAME, error.get('detail') or error.get('title')), expected=True) | ||||
|             raise | ||||
|  | ||||
|         errors = [] | ||||
|         formats = [] | ||||
|  | ||||
|         ERRORS_MAP = { | ||||
|             'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn', | ||||
|             'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut', | ||||
|             'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut', | ||||
|             'ERROR_VIDEO_EXPIRED': 'videoExpired', | ||||
|             'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable', | ||||
|             'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription', | ||||
|             'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription', | ||||
|             'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding', | ||||
|             'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN', | ||||
|             'ERROR_STREAM_NOT_FOUND': 'streamNotFound', | ||||
|         } | ||||
|  | ||||
|         USER_AGENTS = ( | ||||
|             # PC UA is served with m3u8 that provides some bonus lower quality formats | ||||
|             ('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'), | ||||
|             # Mobile UA allows to extract direct links and also does not fail when | ||||
|             # PC UA fails with hulu error (e.g. | ||||
|             # http://www.funimation.com/shows/hacksign/videos/official/role-play) | ||||
|             ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'), | ||||
|         ) | ||||
|  | ||||
|         user_agent = self._extract_cloudflare_session_ua(url) | ||||
|         if user_agent: | ||||
|             USER_AGENTS = ((None, user_agent),) | ||||
|  | ||||
|         for kind, user_agent in USER_AGENTS: | ||||
|             request = sanitized_Request(url) | ||||
|             request.add_header('User-Agent', user_agent) | ||||
|             webpage = self._download_webpage( | ||||
|                 request, display_id, | ||||
|                 'Downloading %s webpage' % kind if kind else 'Downloading webpage') | ||||
|  | ||||
|             playlist = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'var\s+playersData\s*=\s*(\[.+?\]);\n', | ||||
|                     webpage, 'players data'), | ||||
|                 display_id)[0]['playlist'] | ||||
|  | ||||
|             items = next(item['items'] for item in playlist if item.get('items')) | ||||
|             item = next(item for item in items if item.get('itemAK') == display_id) | ||||
|  | ||||
|             error_messages = {} | ||||
|             video_error_messages = self._search_regex( | ||||
|                 r'var\s+videoErrorMessages\s*=\s*({.+?});\n', | ||||
|                 webpage, 'error messages', default=None) | ||||
|             if video_error_messages: | ||||
|                 error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False) | ||||
|                 if error_messages_json: | ||||
|                     for _, error in error_messages_json.items(): | ||||
|                         type_ = error.get('type') | ||||
|                         description = error.get('description') | ||||
|                         content = error.get('content') | ||||
|                         if type_ == 'text' and description and content: | ||||
|                             error_message = ERRORS_MAP.get(description) | ||||
|                             if error_message: | ||||
|                                 error_messages[error_message] = content | ||||
|  | ||||
|             for video in item.get('videoSet', []): | ||||
|                 auth_token = video.get('authToken') | ||||
|                 if not auth_token: | ||||
|                     continue | ||||
|                 funimation_id = video.get('FUNImationID') or video.get('videoId') | ||||
|                 preference = 1 if video.get('languageMode') == 'dub' else 0 | ||||
|                 if not auth_token.startswith('?'): | ||||
|                     auth_token = '?%s' % auth_token | ||||
|                 for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)): | ||||
|                     format_url = video.get('%sUrl' % quality) | ||||
|                     if not format_url: | ||||
|                         continue | ||||
|                     if not format_url.startswith(('http', '//')): | ||||
|                         errors.append(format_url) | ||||
|                         continue | ||||
|                     if determine_ext(format_url) == 'm3u8': | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                             preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False)) | ||||
|                     else: | ||||
|                         tbr = int_or_none(self._search_regex( | ||||
|                             r'-(\d+)[Kk]', format_url, 'tbr', default=None)) | ||||
|                         formats.append({ | ||||
|                             'url': format_url + auth_token, | ||||
|                             'format_id': '%s-http-%dp' % (funimation_id, height), | ||||
|                             'height': height, | ||||
|                             'tbr': tbr, | ||||
|                             'preference': preference, | ||||
|                         }) | ||||
|  | ||||
|         if not formats and errors: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' | ||||
|                 % (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))), | ||||
|                 expected=True) | ||||
|  | ||||
|         for source in sources: | ||||
|             source_url = source.get('src') | ||||
|             if not source_url: | ||||
|                 continue | ||||
|             source_type = source.get('videoType') or determine_ext(source_url) | ||||
|             if source_type == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     source_url, video_id, 'mp4', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'format_id': source_type, | ||||
|                     'url': source_url, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = item['title'] | ||||
|         artist = item.get('artist') | ||||
|         if artist: | ||||
|             title = '%s - %s' % (artist, title) | ||||
|         description = self._og_search_description(webpage) or item.get('description') | ||||
|         thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl') | ||||
|         video_id = item.get('itemId') or display_id | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'series': series, | ||||
|             'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')), | ||||
|             'episode_number': int_or_none(title_data.get('episodeNum')), | ||||
|             'episode': episode, | ||||
|             'season_id': title_data.get('seriesId'), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -84,6 +84,7 @@ from .twentymin import TwentyMinutenIE | ||||
| from .ustream import UstreamIE | ||||
| from .openload import OpenloadIE | ||||
| from .videopress import VideoPressIE | ||||
| from .rutube import RutubeIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -448,6 +449,23 @@ class GenericIE(InfoExtractor): | ||||
|                 }, | ||||
|             }], | ||||
|         }, | ||||
|         { | ||||
|             # Brightcove with UUID in videoPlayer | ||||
|             'url': 'http://www8.hp.com/cn/zh/home.html', | ||||
|             'info_dict': { | ||||
|                 'id': '5255815316001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Sprocket Video - China', | ||||
|                 'description': 'Sprocket Video - China', | ||||
|                 'uploader': 'HP-Video Gallery', | ||||
|                 'timestamp': 1482263210, | ||||
|                 'upload_date': '20161220', | ||||
|                 'uploader_id': '1107601872001', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # m3u8 download | ||||
|             }, | ||||
|         }, | ||||
|         # ooyala video | ||||
|         { | ||||
|             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', | ||||
| @@ -884,12 +902,13 @@ class GenericIE(InfoExtractor): | ||||
|         }, | ||||
|         # LazyYT | ||||
|         { | ||||
|             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', | ||||
|             'url': 'https://skiplagged.com/', | ||||
|             'info_dict': { | ||||
|                 'id': '1986', | ||||
|                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', | ||||
|                 'id': 'skiplagged', | ||||
|                 'title': 'Skiplagged: The smart way to find cheap flights', | ||||
|             }, | ||||
|             'playlist_mincount': 2, | ||||
|             'playlist_mincount': 1, | ||||
|             'add_ie': ['Youtube'], | ||||
|         }, | ||||
|         # Cinchcast embed | ||||
|         { | ||||
| @@ -972,6 +991,20 @@ class GenericIE(InfoExtractor): | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # JWPlayer config passed as variable | ||||
|             'url': 'http://www.txxx.com/videos/3326530/ariele/', | ||||
|             'info_dict': { | ||||
|                 'id': '3326530_hq', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'ARIELE | Tube Cup', | ||||
|                 'uploader': 'www.txxx.com', | ||||
|                 'age_limit': 18, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         }, | ||||
|         # rtl.nl embed | ||||
|         { | ||||
|             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', | ||||
| @@ -1502,11 +1535,39 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'add_ie': [VideoPressIE.ie_key()], | ||||
|         }, | ||||
|         { | ||||
|             # Rutube embed | ||||
|             'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2', | ||||
|             'info_dict': { | ||||
|                 'id': '9b3d5bee0a8740bf70dfd29d3ea43541', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Магаззино: Казань 2', | ||||
|                 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a', | ||||
|                 'uploader': 'Магаззино', | ||||
|                 'upload_date': '20170228', | ||||
|                 'uploader_id': '996642', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'add_ie': [RutubeIE.ie_key()], | ||||
|         }, | ||||
|         { | ||||
|             # ThePlatform embedded with whitespaces in URLs | ||||
|             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             # Senate ISVP iframe https | ||||
|             'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security', | ||||
|             'md5': 'fb8c70b0b515e5037981a2492099aab8', | ||||
|             'info_dict': { | ||||
|                 'id': 'govtaff020316', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Integrated Senate Video Player', | ||||
|             }, | ||||
|             'add_ie': [SenateISVPIE.ie_key()], | ||||
|         }, | ||||
|         # { | ||||
|         #     # TODO: find another test | ||||
|         #     # http://schema.org/VideoObject | ||||
| @@ -1806,14 +1867,6 @@ class GenericIE(InfoExtractor): | ||||
|         video_description = self._og_search_description(webpage, default=None) | ||||
|         video_thumbnail = self._og_search_thumbnail(webpage, default=None) | ||||
|  | ||||
|         # Helper method | ||||
|         def _playlist_from_matches(matches, getter=None, ie=None): | ||||
|             urlrs = orderedSet( | ||||
|                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) | ||||
|                 for m in matches) | ||||
|             return self.playlist_result( | ||||
|                 urlrs, playlist_id=video_id, playlist_title=video_title) | ||||
|  | ||||
|         # Look for Brightcove Legacy Studio embeds | ||||
|         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) | ||||
|         if bc_urls: | ||||
| @@ -1834,28 +1887,28 @@ class GenericIE(InfoExtractor): | ||||
|         # Look for Brightcove New Studio embeds | ||||
|         bc_urls = BrightcoveNewIE._extract_urls(webpage) | ||||
|         if bc_urls: | ||||
|             return _playlist_from_matches(bc_urls, ie='BrightcoveNew') | ||||
|             return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew') | ||||
|  | ||||
|         # Look for ThePlatform embeds | ||||
|         tp_urls = ThePlatformIE._extract_urls(webpage) | ||||
|         if tp_urls: | ||||
|             return _playlist_from_matches(tp_urls, ie='ThePlatform') | ||||
|             return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform') | ||||
|  | ||||
|         # Look for Vessel embeds | ||||
|         vessel_urls = VesselIE._extract_urls(webpage) | ||||
|         if vessel_urls: | ||||
|             return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key()) | ||||
|             return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key()) | ||||
|  | ||||
|         # Look for embedded rtl.nl player | ||||
|         matches = re.findall( | ||||
|             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', | ||||
|             webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches, ie='RtlNl') | ||||
|             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl') | ||||
|  | ||||
|         vimeo_urls = VimeoIE._extract_urls(url, webpage) | ||||
|         if vimeo_urls: | ||||
|             return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key()) | ||||
|             return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key()) | ||||
|  | ||||
|         vid_me_embed_url = self._search_regex( | ||||
|             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', | ||||
| @@ -1877,25 +1930,25 @@ class GenericIE(InfoExtractor): | ||||
|                 (?:embed|v|p)/.+?) | ||||
|             \1''', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches( | ||||
|                 matches, lambda m: unescapeHTML(m[1])) | ||||
|             return self.playlist_from_matches( | ||||
|                 matches, video_id, video_title, lambda m: unescapeHTML(m[1])) | ||||
|  | ||||
|         # Look for lazyYT YouTube embed | ||||
|         matches = re.findall( | ||||
|             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches, lambda m: unescapeHTML(m)) | ||||
|             return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m)) | ||||
|  | ||||
|         # Look for Wordpress "YouTube Video Importer" plugin | ||||
|         matches = re.findall(r'''(?x)<div[^>]+ | ||||
|             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ | ||||
|             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches, lambda m: m[-1]) | ||||
|             return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1]) | ||||
|  | ||||
|         matches = DailymotionIE._extract_urls(webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches) | ||||
|             return self.playlist_from_matches(matches, video_id, video_title) | ||||
|  | ||||
|         # Look for embedded Dailymotion playlist player (#3822) | ||||
|         m = re.search( | ||||
| @@ -1904,8 +1957,8 @@ class GenericIE(InfoExtractor): | ||||
|             playlists = re.findall( | ||||
|                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url'))) | ||||
|             if playlists: | ||||
|                 return _playlist_from_matches( | ||||
|                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p) | ||||
|                 return self.playlist_from_matches( | ||||
|                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) | ||||
|  | ||||
|         # Look for embedded Wistia player | ||||
|         match = re.search( | ||||
| @@ -2012,8 +2065,9 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             embeds = self._parse_json(mobj.group(1), video_id, fatal=False) | ||||
|             if embeds: | ||||
|                 return _playlist_from_matches( | ||||
|                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala') | ||||
|                 return self.playlist_from_matches( | ||||
|                     embeds, video_id, video_title, | ||||
|                     getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala') | ||||
|  | ||||
|         # Look for Aparat videos | ||||
|         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) | ||||
| @@ -2075,13 +2129,13 @@ class GenericIE(InfoExtractor): | ||||
|         # Look for funnyordie embed | ||||
|         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches( | ||||
|                 matches, getter=unescapeHTML, ie='FunnyOrDie') | ||||
|             return self.playlist_from_matches( | ||||
|                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie') | ||||
|  | ||||
|         # Look for BBC iPlayer embed | ||||
|         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches(matches, ie='BBCCoUk') | ||||
|             return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk') | ||||
|  | ||||
|         # Look for embedded RUTV player | ||||
|         rutv_url = RUTVIE._extract_url(webpage) | ||||
| @@ -2096,32 +2150,32 @@ class GenericIE(InfoExtractor): | ||||
|         # Look for embedded SportBox player | ||||
|         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) | ||||
|         if sportbox_urls: | ||||
|             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') | ||||
|             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed') | ||||
|  | ||||
|         # Look for embedded XHamster player | ||||
|         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) | ||||
|         if xhamster_urls: | ||||
|             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed') | ||||
|             return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed') | ||||
|  | ||||
|         # Look for embedded TNAFlixNetwork player | ||||
|         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage) | ||||
|         if tnaflix_urls: | ||||
|             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) | ||||
|             return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key()) | ||||
|  | ||||
|         # Look for embedded PornHub player | ||||
|         pornhub_urls = PornHubIE._extract_urls(webpage) | ||||
|         if pornhub_urls: | ||||
|             return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key()) | ||||
|             return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key()) | ||||
|  | ||||
|         # Look for embedded DrTuber player | ||||
|         drtuber_urls = DrTuberIE._extract_urls(webpage) | ||||
|         if drtuber_urls: | ||||
|             return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key()) | ||||
|             return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key()) | ||||
|  | ||||
|         # Look for embedded RedTube player | ||||
|         redtube_urls = RedTubeIE._extract_urls(webpage) | ||||
|         if redtube_urls: | ||||
|             return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key()) | ||||
|             return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key()) | ||||
|  | ||||
|         # Look for embedded Tvigle player | ||||
|         mobj = re.search( | ||||
| @@ -2167,12 +2221,12 @@ class GenericIE(InfoExtractor): | ||||
|         # Look for embedded soundcloud player | ||||
|         soundcloud_urls = SoundcloudIE._extract_urls(webpage) | ||||
|         if soundcloud_urls: | ||||
|             return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) | ||||
|             return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) | ||||
|  | ||||
|         # Look for tunein player | ||||
|         tunein_urls = TuneInBaseIE._extract_urls(webpage) | ||||
|         if tunein_urls: | ||||
|             return _playlist_from_matches(tunein_urls) | ||||
|             return self.playlist_from_matches(tunein_urls, video_id, video_title) | ||||
|  | ||||
|         # Look for embedded mtvservices player | ||||
|         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) | ||||
| @@ -2455,30 +2509,36 @@ class GenericIE(InfoExtractor): | ||||
|         # Look for DBTV embeds | ||||
|         dbtv_urls = DBTVIE._extract_urls(webpage) | ||||
|         if dbtv_urls: | ||||
|             return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key()) | ||||
|             return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key()) | ||||
|  | ||||
|         # Look for Videa embeds | ||||
|         videa_urls = VideaIE._extract_urls(webpage) | ||||
|         if videa_urls: | ||||
|             return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) | ||||
|             return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key()) | ||||
|  | ||||
|         # Look for 20 minuten embeds | ||||
|         twentymin_urls = TwentyMinutenIE._extract_urls(webpage) | ||||
|         if twentymin_urls: | ||||
|             return _playlist_from_matches( | ||||
|                 twentymin_urls, ie=TwentyMinutenIE.ie_key()) | ||||
|             return self.playlist_from_matches( | ||||
|                 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key()) | ||||
|  | ||||
|         # Look for Openload embeds | ||||
|         openload_urls = OpenloadIE._extract_urls(webpage) | ||||
|         if openload_urls: | ||||
|             return _playlist_from_matches( | ||||
|                 openload_urls, ie=OpenloadIE.ie_key()) | ||||
|             return self.playlist_from_matches( | ||||
|                 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) | ||||
|  | ||||
|         # Look for VideoPress embeds | ||||
|         videopress_urls = VideoPressIE._extract_urls(webpage) | ||||
|         if videopress_urls: | ||||
|             return _playlist_from_matches( | ||||
|                 videopress_urls, ie=VideoPressIE.ie_key()) | ||||
|             return self.playlist_from_matches( | ||||
|                 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()) | ||||
|  | ||||
|         # Look for Rutube embeds | ||||
|         rutube_urls = RutubeIE._extract_urls(webpage) | ||||
|         if rutube_urls: | ||||
|             return self.playlist_from_matches( | ||||
|                 rutube_urls, ie=RutubeIE.ie_key()) | ||||
|  | ||||
|         # Looking for http://schema.org/VideoObject | ||||
|         json_ld = self._search_json_ld( | ||||
| @@ -2504,14 +2564,14 @@ class GenericIE(InfoExtractor): | ||||
|                 self._sort_formats(entry['formats']) | ||||
|             return self.playlist_result(entries) | ||||
|  | ||||
|         jwplayer_data_str = self._find_jwplayer_data(webpage) | ||||
|         if jwplayer_data_str: | ||||
|             try: | ||||
|                 jwplayer_data = self._parse_json( | ||||
|                     jwplayer_data_str, video_id, transform_source=js_to_json) | ||||
|                 return self._parse_jwplayer_data(jwplayer_data, video_id) | ||||
|             except ExtractorError: | ||||
|                 pass | ||||
|         jwplayer_data = self._find_jwplayer_data( | ||||
|             webpage, video_id, transform_source=js_to_json) | ||||
|         if jwplayer_data: | ||||
|             info = self._parse_jwplayer_data( | ||||
|                 jwplayer_data, video_id, require_title=False, base_url=url) | ||||
|             if not info.get('title'): | ||||
|                 info['title'] = video_title | ||||
|             return info | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
| @@ -2586,11 +2646,14 @@ class GenericIE(InfoExtractor): | ||||
|                     found = re.search(REDIRECT_REGEX, refresh_header) | ||||
|             if found: | ||||
|                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) | ||||
|                 self.report_following_redirect(new_url) | ||||
|                 return { | ||||
|                     '_type': 'url', | ||||
|                     'url': new_url, | ||||
|                 } | ||||
|                 if new_url != url: | ||||
|                     self.report_following_redirect(new_url) | ||||
|                     return { | ||||
|                         '_type': 'url', | ||||
|                         'url': new_url, | ||||
|                     } | ||||
|                 else: | ||||
|                     found = None | ||||
|  | ||||
|         if not found: | ||||
|             # twitter:player is a https URL to iframe player that may or may not | ||||
|   | ||||
| @@ -36,8 +36,7 @@ class GoIE(AdobePassIE): | ||||
|             'requestor_id': 'DisneyXD', | ||||
|         } | ||||
|     } | ||||
|     _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', | ||||
|         'info_dict': { | ||||
| @@ -53,6 +52,12 @@ class GoIE(AdobePassIE): | ||||
|     }, { | ||||
|         'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -79,44 +84,60 @@ class GoIE(AdobePassIE): | ||||
|             ext = determine_ext(asset_url) | ||||
|             if ext == 'm3u8': | ||||
|                 video_type = video_data.get('type') | ||||
|                 if video_type == 'lf': | ||||
|                     data = { | ||||
|                         'video_id': video_data['id'], | ||||
|                         'video_type': video_type, | ||||
|                         'brand': brand, | ||||
|                         'device': '001', | ||||
|                     } | ||||
|                     if video_data.get('accesslevel') == '1': | ||||
|                         requestor_id = site_info['requestor_id'] | ||||
|                         resource = self._get_mvpd_resource( | ||||
|                             requestor_id, title, video_id, None) | ||||
|                         auth = self._extract_mvpd_auth( | ||||
|                             url, video_id, requestor_id, resource) | ||||
|                         data.update({ | ||||
|                             'token': auth, | ||||
|                             'token_type': 'ap', | ||||
|                             'adobe_requestor_id': requestor_id, | ||||
|                         }) | ||||
|                     entitlement = self._download_json( | ||||
|                         'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', | ||||
|                         video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) | ||||
|                     errors = entitlement.get('errors', {}).get('errors', []) | ||||
|                     if errors: | ||||
|                         for error in errors: | ||||
|                             if error.get('code') == 1002: | ||||
|                                 self.raise_geo_restricted( | ||||
|                                     error['message'], countries=self._GEO_COUNTRIES) | ||||
|                         error_message = ', '.join([error['message'] for error in errors]) | ||||
|                         raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) | ||||
|                     asset_url += '?' + entitlement['uplynkData']['sessionKey'] | ||||
|                 data = { | ||||
|                     'video_id': video_data['id'], | ||||
|                     'video_type': video_type, | ||||
|                     'brand': brand, | ||||
|                     'device': '001', | ||||
|                 } | ||||
|                 if video_data.get('accesslevel') == '1': | ||||
|                     requestor_id = site_info['requestor_id'] | ||||
|                     resource = self._get_mvpd_resource( | ||||
|                         requestor_id, title, video_id, None) | ||||
|                     auth = self._extract_mvpd_auth( | ||||
|                         url, video_id, requestor_id, resource) | ||||
|                     data.update({ | ||||
|                         'token': auth, | ||||
|                         'token_type': 'ap', | ||||
|                         'adobe_requestor_id': requestor_id, | ||||
|                     }) | ||||
|                 else: | ||||
|                     self._initialize_geo_bypass(['US']) | ||||
|                 entitlement = self._download_json( | ||||
|                     'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', | ||||
|                     video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) | ||||
|                 errors = entitlement.get('errors', {}).get('errors', []) | ||||
|                 if errors: | ||||
|                     for error in errors: | ||||
|                         if error.get('code') == 1002: | ||||
|                             self.raise_geo_restricted( | ||||
|                                 error['message'], countries=['US']) | ||||
|                     error_message = ', '.join([error['message'] for error in errors]) | ||||
|                     raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) | ||||
|                 asset_url += '?' + entitlement['uplynkData']['sessionKey'] | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                 f = { | ||||
|                     'format_id': format_id, | ||||
|                     'url': asset_url, | ||||
|                     'ext': ext, | ||||
|                 }) | ||||
|                 } | ||||
|                 if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url): | ||||
|                     f.update({ | ||||
|                         'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE', | ||||
|                         'preference': 1, | ||||
|                     }) | ||||
|                 else: | ||||
|                     mobj = re.search(r'/(\d+)x(\d+)/', asset_url) | ||||
|                     if mobj: | ||||
|                         height = int(mobj.group(2)) | ||||
|                         f.update({ | ||||
|                             'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height, | ||||
|                             'width': int(mobj.group(1)), | ||||
|                             'height': height, | ||||
|                         }) | ||||
|                 formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_element, | ||||
| @@ -14,14 +15,26 @@ from ..utils import ( | ||||
|  | ||||
| class HBOBaseIE(InfoExtractor): | ||||
|     _FORMATS_INFO = { | ||||
|         'pro7': { | ||||
|             'width': 1280, | ||||
|             'height': 720, | ||||
|         }, | ||||
|         '1920': { | ||||
|             'width': 1280, | ||||
|             'height': 720, | ||||
|         }, | ||||
|         'pro6': { | ||||
|             'width': 768, | ||||
|             'height': 432, | ||||
|         }, | ||||
|         '640': { | ||||
|             'width': 768, | ||||
|             'height': 432, | ||||
|         }, | ||||
|         'pro5': { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|         'highwifi': { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
| @@ -78,6 +91,17 @@ class HBOBaseIE(InfoExtractor): | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         video_url.replace('.tar', '/base_index_w8.m3u8'), | ||||
|                         video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|                 elif source.tag == 'hls': | ||||
|                     # #EXT-X-BYTERANGE is not supported by native hls downloader | ||||
|                     # and ffmpeg (#10955) | ||||
|                     # formats.extend(self._extract_m3u8_formats( | ||||
|                     #     video_url.replace('.tar', '/base_index.m3u8'), | ||||
|                     #     video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|                     continue | ||||
|                 elif source.tag == 'dash': | ||||
|                     formats.extend(self._extract_mpd_formats( | ||||
|                         video_url.replace('.tar', '/manifest.mpd'), | ||||
|                         video_id, mpd_id='dash', fatal=False)) | ||||
|                 else: | ||||
|                     format_info = self._FORMATS_INFO.get(source.tag, {}) | ||||
|                     formats.append({ | ||||
| @@ -112,10 +136,11 @@ class HBOBaseIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class HBOIE(HBOBaseIE): | ||||
|     IE_NAME = 'hbo' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', | ||||
|         'md5': '1c33253f0c7782142c993c0ba62a8753', | ||||
|         'md5': '2c6a6bc1222c7e91cb3334dad1746e5a', | ||||
|         'info_dict': { | ||||
|             'id': '1437839', | ||||
|             'ext': 'mp4', | ||||
| @@ -131,11 +156,12 @@ class HBOIE(HBOBaseIE): | ||||
|  | ||||
|  | ||||
| class HBOEpisodeIE(HBOBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html' | ||||
|     IE_NAME = 'hbo:episode' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', | ||||
|         'md5': '689132b253cc0ab7434237fc3a293210', | ||||
|         'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb', | ||||
|         'info_dict': { | ||||
|             'id': '1439518', | ||||
|             'display_id': 'ep-52-inside-the-episode', | ||||
| @@ -147,16 +173,19 @@ class HBOEpisodeIE(HBOBaseIE): | ||||
|     }, { | ||||
|         'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         path, display_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         content = self._download_json( | ||||
|             'http://www.hbo.com/api/content/' + path, display_id)['content'] | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)', | ||||
|             webpage, 'video ID', group='video_id') | ||||
|         video_id = compat_str((content.get('parsed', {}).get( | ||||
|             'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId']) | ||||
|  | ||||
|         info_dict = self._extract_from_id(video_id) | ||||
|         info_dict['display_id'] = display_id | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     get_element_by_attribute, | ||||
|     int_or_none, | ||||
| @@ -50,6 +51,33 @@ class InstagramIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # multi video post | ||||
|         'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/', | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': 'BQ0dSaohpPW', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Video 1', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'BQ0dTpOhuHT', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Video 2', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'BQ0dT7RBFeF', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Video 3', | ||||
|             }, | ||||
|         }], | ||||
|         'info_dict': { | ||||
|             'id': 'BQ0eAlwhDrw', | ||||
|             'title': 'Post by instagram', | ||||
|             'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://instagram.com/p/-Cmh1cukG2/', | ||||
|         'only_matching': True, | ||||
| @@ -113,6 +141,32 @@ class InstagramIE(InfoExtractor): | ||||
|                     'timestamp': int_or_none(comment.get('created_at')), | ||||
|                 } for comment in media.get( | ||||
|                     'comments', {}).get('nodes', []) if comment.get('text')] | ||||
|                 if not video_url: | ||||
|                     edges = try_get( | ||||
|                         media, lambda x: x['edge_sidecar_to_children']['edges'], | ||||
|                         list) or [] | ||||
|                     if edges: | ||||
|                         entries = [] | ||||
|                         for edge_num, edge in enumerate(edges, start=1): | ||||
|                             node = try_get(edge, lambda x: x['node'], dict) | ||||
|                             if not node: | ||||
|                                 continue | ||||
|                             node_video_url = try_get(node, lambda x: x['video_url'], compat_str) | ||||
|                             if not node_video_url: | ||||
|                                 continue | ||||
|                             entries.append({ | ||||
|                                 'id': node.get('shortcode') or node['id'], | ||||
|                                 'title': 'Video %d' % edge_num, | ||||
|                                 'url': node_video_url, | ||||
|                                 'thumbnail': node.get('display_url'), | ||||
|                                 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), | ||||
|                                 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), | ||||
|                                 'view_count': int_or_none(node.get('video_view_count')), | ||||
|                             }) | ||||
|                         return self.playlist_result( | ||||
|                             entries, video_id, | ||||
|                             'Post by %s' % uploader_id if uploader_id else None, | ||||
|                             description) | ||||
|  | ||||
|         if not video_url: | ||||
|             video_url = self._og_search_video_url(webpage, secure=False) | ||||
|   | ||||
| @@ -16,6 +16,8 @@ class IviIE(InfoExtractor): | ||||
|     IE_DESC = 'ivi.ru' | ||||
|     IE_NAME = 'ivi' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' | ||||
|     _GEO_BYPASS = False | ||||
|     _GEO_COUNTRIES = ['RU'] | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # Single movie | ||||
| @@ -91,7 +93,11 @@ class IviIE(InfoExtractor): | ||||
|  | ||||
|         if 'error' in video_json: | ||||
|             error = video_json['error'] | ||||
|             if error['origin'] == 'NoRedisValidData': | ||||
|             origin = error['origin'] | ||||
|             if origin == 'NotAllowedForLocation': | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg=error['message'], countries=self._GEO_COUNTRIES) | ||||
|             elif origin == 'NoRedisValidData': | ||||
|                 raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
|             raise ExtractorError( | ||||
|                 'Unable to download video %s: %s' % (video_id, error['message']), | ||||
|   | ||||
| @@ -30,7 +30,7 @@ from ..utils import ( | ||||
| class LeIE(InfoExtractor): | ||||
|     IE_DESC = '乐视网' | ||||
|     _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html' | ||||
|  | ||||
|     _GEO_COUNTRIES = ['CN'] | ||||
|     _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' | ||||
|  | ||||
|     _TESTS = [{ | ||||
| @@ -126,10 +126,9 @@ class LeIE(InfoExtractor): | ||||
|         if playstatus['status'] == 0: | ||||
|             flag = playstatus['flag'] | ||||
|             if flag == 1: | ||||
|                 msg = 'Country %s auth error' % playstatus['country'] | ||||
|                 self.raise_geo_restricted() | ||||
|             else: | ||||
|                 msg = 'Generic error. flag = %d' % flag | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|                 raise ExtractorError('Generic error. flag = %d' % flag, expected=True) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         media_id = self._match_id(url) | ||||
|   | ||||
| @@ -62,13 +62,21 @@ class LimelightBaseIE(InfoExtractor): | ||||
|                 fmt = { | ||||
|                     'url': stream_url, | ||||
|                     'abr': float_or_none(stream.get('audioBitRate')), | ||||
|                     'vbr': float_or_none(stream.get('videoBitRate')), | ||||
|                     'fps': float_or_none(stream.get('videoFrameRate')), | ||||
|                     'width': int_or_none(stream.get('videoWidthInPixels')), | ||||
|                     'height': int_or_none(stream.get('videoHeightInPixels')), | ||||
|                     'ext': ext, | ||||
|                 } | ||||
|                 rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url) | ||||
|                 width = int_or_none(stream.get('videoWidthInPixels')) | ||||
|                 height = int_or_none(stream.get('videoHeightInPixels')) | ||||
|                 vbr = float_or_none(stream.get('videoBitRate')) | ||||
|                 if width or height or vbr: | ||||
|                     fmt.update({ | ||||
|                         'width': width, | ||||
|                         'height': height, | ||||
|                         'vbr': vbr, | ||||
|                     }) | ||||
|                 else: | ||||
|                     fmt['vcodec'] = 'none' | ||||
|                 rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url) | ||||
|                 if rtmp: | ||||
|                     format_id = 'rtmp' | ||||
|                     if stream.get('videoBitRate'): | ||||
|   | ||||
| @@ -119,7 +119,8 @@ class LivestreamIE(InfoExtractor): | ||||
|         m3u8_url = video_data.get('m3u8_url') | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|                 m3u8_url, video_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|         f4m_url = video_data.get('f4m_url') | ||||
|         if f4m_url: | ||||
| @@ -158,11 +159,11 @@ class LivestreamIE(InfoExtractor): | ||||
|         if smil_url: | ||||
|             formats.extend(self._extract_smil_formats(smil_url, broadcast_id)) | ||||
|  | ||||
|         entry_protocol = 'm3u8' if is_live else 'm3u8_native' | ||||
|         m3u8_url = stream_info.get('m3u8_url') | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False)) | ||||
|                 m3u8_url, broadcast_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|         rtsp_url = stream_info.get('rtsp_url') | ||||
|         if rtsp_url: | ||||
| @@ -276,7 +277,7 @@ class LivestreamOriginalIE(InfoExtractor): | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|     def _extract_video_formats(self, video_data, video_id, entry_protocol): | ||||
|     def _extract_video_formats(self, video_data, video_id): | ||||
|         formats = [] | ||||
|  | ||||
|         progressive_url = video_data.get('progressiveUrl') | ||||
| @@ -289,7 +290,8 @@ class LivestreamOriginalIE(InfoExtractor): | ||||
|         m3u8_url = video_data.get('httpUrl') | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False)) | ||||
|                 m3u8_url, video_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|         rtsp_url = video_data.get('rtspUrl') | ||||
|         if rtsp_url: | ||||
| @@ -340,11 +342,10 @@ class LivestreamOriginalIE(InfoExtractor): | ||||
|                 } | ||||
|             video_data = self._download_json(stream_url, content_id) | ||||
|             is_live = video_data.get('isLive') | ||||
|             entry_protocol = 'm3u8' if is_live else 'm3u8_native' | ||||
|             info.update({ | ||||
|                 'id': content_id, | ||||
|                 'title': self._live_title(info['title']) if is_live else info['title'], | ||||
|                 'formats': self._extract_video_formats(video_data, content_id, entry_protocol), | ||||
|                 'formats': self._extract_video_formats(video_data, content_id), | ||||
|                 'is_live': is_live, | ||||
|             }) | ||||
|             return info | ||||
|   | ||||
| @@ -260,9 +260,24 @@ class LyndaCourseIE(LyndaBaseIE): | ||||
|         course_path = mobj.group('coursepath') | ||||
|         course_id = mobj.group('courseid') | ||||
|  | ||||
|         item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path | ||||
|  | ||||
|         course = self._download_json( | ||||
|             'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, | ||||
|             course_id, 'Downloading course JSON') | ||||
|             course_id, 'Downloading course JSON', fatal=False) | ||||
|  | ||||
|         if not course: | ||||
|             webpage = self._download_webpage(url, course_id) | ||||
|             entries = [ | ||||
|                 self.url_result( | ||||
|                     item_template % video_id, ie=LyndaIE.ie_key(), | ||||
|                     video_id=video_id) | ||||
|                 for video_id in re.findall( | ||||
|                     r'data-video-id=["\'](\d+)', webpage)] | ||||
|             return self.playlist_result( | ||||
|                 entries, course_id, | ||||
|                 self._og_search_title(webpage, fatal=False), | ||||
|                 self._og_search_description(webpage)) | ||||
|  | ||||
|         if course.get('Status') == 'NotFound': | ||||
|             raise ExtractorError( | ||||
| @@ -283,7 +298,7 @@ class LyndaCourseIE(LyndaBaseIE): | ||||
|                 if video_id: | ||||
|                     entries.append({ | ||||
|                         '_type': 'url_transparent', | ||||
|                         'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id), | ||||
|                         'url': item_template % video_id, | ||||
|                         'ie_key': LyndaIE.ie_key(), | ||||
|                         'chapter': chapter.get('Title'), | ||||
|                         'chapter_number': int_or_none(chapter.get('ChapterIndex')), | ||||
|   | ||||
| @@ -14,7 +14,7 @@ from ..utils import ( | ||||
|  | ||||
| class MDRIE(InfoExtractor): | ||||
|     IE_DESC = 'MDR.DE and KiKA' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # MDR regularly deletes its videos | ||||
| @@ -31,6 +31,7 @@ class MDRIE(InfoExtractor): | ||||
|             'duration': 250, | ||||
|             'uploader': 'MITTELDEUTSCHER RUNDFUNK', | ||||
|         }, | ||||
|         'skip': '404 not found', | ||||
|     }, { | ||||
|         'url': 'http://www.kika.de/baumhaus/videos/video19636.html', | ||||
|         'md5': '4930515e36b06c111213e80d1e4aad0e', | ||||
| @@ -41,6 +42,7 @@ class MDRIE(InfoExtractor): | ||||
|             'duration': 134, | ||||
|             'uploader': 'KIKA', | ||||
|         }, | ||||
|         'skip': '404 not found', | ||||
|     }, { | ||||
|         'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', | ||||
|         'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', | ||||
| @@ -49,11 +51,21 @@ class MDRIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Beutolomäus und der geheime Weihnachtswunsch', | ||||
|             'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', | ||||
|             'timestamp': 1450950000, | ||||
|             'upload_date': '20151224', | ||||
|             'timestamp': 1482541200, | ||||
|             'upload_date': '20161224', | ||||
|             'duration': 4628, | ||||
|             'uploader': 'KIKA', | ||||
|         }, | ||||
|     }, { | ||||
|         # audio with alternative playerURL pattern | ||||
|         'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html', | ||||
|         'info_dict': { | ||||
|             'id': '100', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Feature: Operation Mindfuck - Robert Anton Wilson', | ||||
|             'duration': 3239, | ||||
|             'uploader': 'MITTELDEUTSCHER RUNDFUNK', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', | ||||
|         'only_matching': True, | ||||
| @@ -71,7 +83,7 @@ class MDRIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         data_url = self._search_regex( | ||||
|             r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1', | ||||
|             r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1', | ||||
|             webpage, 'data url', group='url').replace(r'\/', '/') | ||||
|  | ||||
|         doc = self._download_xml( | ||||
|   | ||||
							
								
								
									
										259
									
								
								youtube_dl/extractor/medialaan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								youtube_dl/extractor/medialaan.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,259 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MedialaanIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:www\.)? | ||||
|                         (?: | ||||
|                             (?P<site_id>vtm|q2|vtmkzoom)\.be/ | ||||
|                             (?: | ||||
|                                 video(?:/[^/]+/id/|/?\?.*?\baid=)| | ||||
|                                 (?:[^/]+/)* | ||||
|                             ) | ||||
|                         ) | ||||
|                         (?P<id>[^/?#&]+) | ||||
|                     ''' | ||||
|     _NETRC_MACHINE = 'medialaan' | ||||
|     _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-' | ||||
|     _SITE_TO_APP_ID = { | ||||
|         'vtm': 'vtm_watch', | ||||
|         'q2': 'q2', | ||||
|         'vtmkzoom': 'vtmkzoom', | ||||
|     } | ||||
|     _TESTS = [{ | ||||
|         # vod | ||||
|         'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch', | ||||
|         'info_dict': { | ||||
|             'id': 'vtm_20170219_VM0678361_vtmwatch', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Allemaal Chris afl. 6', | ||||
|             'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2', | ||||
|             'timestamp': 1487533280, | ||||
|             'upload_date': '20170219', | ||||
|             'duration': 2562, | ||||
|             'series': 'Allemaal Chris', | ||||
|             'season': 'Allemaal Chris', | ||||
|             'season_number': 1, | ||||
|             'season_id': '256936078124527', | ||||
|             'episode': 'Allemaal Chris afl. 6', | ||||
|             'episode_number': 6, | ||||
|             'episode_id': '256936078591527', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Requires account credentials', | ||||
|     }, { | ||||
|         # clip | ||||
|         'url': 'http://vtm.be/video?aid=168332', | ||||
|         'info_dict': { | ||||
|             'id': '168332', | ||||
|             'ext': 'mp4', | ||||
|             'title': '"Veronique liegt!"', | ||||
|             'description': 'md5:1385e2b743923afe54ba4adc38476155', | ||||
|             'timestamp': 1489002029, | ||||
|             'upload_date': '20170308', | ||||
|             'duration': 96, | ||||
|         }, | ||||
|     }, { | ||||
|         # vod | ||||
|         'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # vod | ||||
|         'url': 'http://vtm.be/video?aid=163157', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # vod | ||||
|         'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # clip | ||||
|         'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._logged_in = False | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             self.raise_login_required() | ||||
|  | ||||
|         auth_data = { | ||||
|             'APIKey': self._APIKEY, | ||||
|             'sdk': 'js_6.1', | ||||
|             'format': 'json', | ||||
|             'loginID': username, | ||||
|             'password': password, | ||||
|         } | ||||
|  | ||||
|         auth_info = self._download_json( | ||||
|             'https://accounts.eu1.gigya.com/accounts.login', None, | ||||
|             note='Logging in', errnote='Unable to log in', | ||||
|             data=urlencode_postdata(auth_data)) | ||||
|  | ||||
|         error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') | ||||
|         if error_message: | ||||
|             raise ExtractorError( | ||||
|                 'Unable to login: %s' % error_message, expected=True) | ||||
|  | ||||
|         self._uid = auth_info['UID'] | ||||
|         self._uid_signature = auth_info['UIDSignature'] | ||||
|         self._signature_timestamp = auth_info['signatureTimestamp'] | ||||
|  | ||||
|         self._logged_in = True | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id, site_id = mobj.group('id', 'site_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         config = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);', | ||||
|                 webpage, 'config', default='{}'), video_id, | ||||
|             transform_source=lambda s: s.replace( | ||||
|                 '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'")) | ||||
|  | ||||
|         vod_id = config.get('vodId') or self._search_regex( | ||||
|             (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"', | ||||
|              r'<[^>]+id=["\']vod-(\d+)'), | ||||
|             webpage, 'video_id', default=None) | ||||
|  | ||||
|         # clip, no authentication required | ||||
|         if not vod_id: | ||||
|             player = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'vmmaplayer\(({.+?})\);', webpage, 'vmma player', | ||||
|                     default=''), | ||||
|                 video_id, transform_source=lambda s: '[%s]' % s, fatal=False) | ||||
|             if player: | ||||
|                 video = player[-1] | ||||
|                 info = { | ||||
|                     'id': video_id, | ||||
|                     'url': video['videoUrl'], | ||||
|                     'title': video['title'], | ||||
|                     'thumbnail': video.get('imageUrl'), | ||||
|                     'timestamp': int_or_none(video.get('createdDate')), | ||||
|                     'duration': int_or_none(video.get('duration')), | ||||
|                 } | ||||
|             else: | ||||
|                 info = self._parse_html5_media_entries( | ||||
|                     url, webpage, video_id, m3u8_id='hls')[0] | ||||
|                 info.update({ | ||||
|                     'id': video_id, | ||||
|                     'title': self._html_search_meta('description', webpage), | ||||
|                     'duration': parse_duration(self._html_search_meta('duration', webpage)), | ||||
|                 }) | ||||
|         # vod, authentication required | ||||
|         else: | ||||
|             if not self._logged_in: | ||||
|                 self._login() | ||||
|  | ||||
|             settings = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', | ||||
|                     webpage, 'drupal settings', default='{}'), | ||||
|                 video_id) | ||||
|  | ||||
|             def get(container, item): | ||||
|                 return try_get( | ||||
|                     settings, lambda x: x[container][item], | ||||
|                     compat_str) or self._search_regex( | ||||
|                     r'"%s"\s*:\s*"([^"]+)' % item, webpage, item, | ||||
|                     default=None) | ||||
|  | ||||
|             app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch') | ||||
|             sso = get('vod', 'gigyaDatabase') or 'vtm-sso' | ||||
|  | ||||
|             data = self._download_json( | ||||
|                 'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id, | ||||
|                 video_id, query={ | ||||
|                     'app_id': app_id, | ||||
|                     'user_network': sso, | ||||
|                     'UID': self._uid, | ||||
|                     'UIDSignature': self._uid_signature, | ||||
|                     'signatureTimestamp': self._signature_timestamp, | ||||
|                 }) | ||||
|  | ||||
|             formats = self._extract_m3u8_formats( | ||||
|                 data['response']['uri'], video_id, entry_protocol='m3u8_native', | ||||
|                 ext='mp4', m3u8_id='hls') | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             info = { | ||||
|                 'id': vod_id, | ||||
|                 'formats': formats, | ||||
|             } | ||||
|  | ||||
|             api_key = get('vod', 'apiKey') | ||||
|             channel = get('medialaanGigya', 'channel') | ||||
|  | ||||
|             if api_key: | ||||
|                 videos = self._download_json( | ||||
|                     'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False, | ||||
|                     query={ | ||||
|                         'channels': channel, | ||||
|                         'ids': vod_id, | ||||
|                         'limit': 1, | ||||
|                         'apikey': api_key, | ||||
|                     }) | ||||
|                 if videos: | ||||
|                     video = try_get( | ||||
|                         videos, lambda x: x['response']['videos'][0], dict) | ||||
|                     if video: | ||||
|                         def get(container, item, expected_type=None): | ||||
|                             return try_get( | ||||
|                                 video, lambda x: x[container][item], expected_type) | ||||
|  | ||||
|                         def get_string(container, item): | ||||
|                             return get(container, item, compat_str) | ||||
|  | ||||
|                         info.update({ | ||||
|                             'series': get_string('program', 'title'), | ||||
|                             'season': get_string('season', 'title'), | ||||
|                             'season_number': int_or_none(get('season', 'number')), | ||||
|                             'season_id': get_string('season', 'id'), | ||||
|                             'episode': get_string('episode', 'title'), | ||||
|                             'episode_number': int_or_none(get('episode', 'number')), | ||||
|                             'episode_id': get_string('episode', 'id'), | ||||
|                             'duration': int_or_none( | ||||
|                                 video.get('duration')) or int_or_none( | ||||
|                                 video.get('durationMillis'), scale=1000), | ||||
|                             'title': get_string('episode', 'title'), | ||||
|                             'description': get_string('episode', 'text'), | ||||
|                             'timestamp': unified_timestamp(get_string( | ||||
|                                 'publication', 'begin')), | ||||
|                         }) | ||||
|  | ||||
|             if not info.get('title'): | ||||
|                 info['title'] = try_get( | ||||
|                     config, lambda x: x['videoConfig']['title'], | ||||
|                     compat_str) or self._html_search_regex( | ||||
|                     r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title', | ||||
|                     default=None) or self._og_search_title(webpage) | ||||
|  | ||||
|         if not info.get('description'): | ||||
|             info['description'] = self._html_search_regex( | ||||
|                 r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>', | ||||
|                 webpage, 'description', default=None) | ||||
|  | ||||
|         return info | ||||
| @@ -2,16 +2,17 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class MGTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' | ||||
|     _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' | ||||
|     IE_DESC = '芒果TV' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', | ||||
|         'md5': '1bdadcf760a0b90946ca68ee9a2db41a', | ||||
|         'md5': 'b1ffc0fc163152acf6beaa81832c9ee7', | ||||
|         'info_dict': { | ||||
|             'id': '3116640', | ||||
|             'ext': 'mp4', | ||||
| @@ -21,48 +22,45 @@ class MGTVIE(InfoExtractor): | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         # no tbr extracted from stream_url | ||||
|         'url': 'http://www.mgtv.com/v/1/1/f/3324755.html', | ||||
|         'url': 'http://www.mgtv.com/b/301817/3826653.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         api_data = self._download_json( | ||||
|             'http://v.api.mgtv.com/player/video', video_id, | ||||
|             'http://pcweb.api.mgtv.com/player/video', video_id, | ||||
|             query={'video_id': video_id}, | ||||
|             headers=self.geo_verification_headers())['data'] | ||||
|         info = api_data['info'] | ||||
|         title = info['title'].strip() | ||||
|         stream_domain = api_data['stream_domain'][0] | ||||
|  | ||||
|         formats = [] | ||||
|         for idx, stream in enumerate(api_data['stream']): | ||||
|             stream_url = stream.get('url') | ||||
|             if not stream_url: | ||||
|             stream_path = stream.get('url') | ||||
|             if not stream_path: | ||||
|                 continue | ||||
|             format_data = self._download_json( | ||||
|                 stream_domain + stream_path, video_id, | ||||
|                 note='Download video info for format #%d' % idx) | ||||
|             format_url = format_data.get('info') | ||||
|             if not format_url: | ||||
|                 continue | ||||
|             tbr = int_or_none(self._search_regex( | ||||
|                 r'(\d+)\.mp4', stream_url, 'tbr', default=None)) | ||||
|  | ||||
|             def extract_format(stream_url, format_id, idx, query={}): | ||||
|                 format_info = self._download_json( | ||||
|                     stream_url, video_id, | ||||
|                     note='Download video info for format %s' % (format_id or '#%d' % idx), | ||||
|                     query=query) | ||||
|                 return { | ||||
|                     'format_id': format_id, | ||||
|                     'url': format_info['info'], | ||||
|                     'ext': 'mp4', | ||||
|                     'tbr': tbr, | ||||
|                 } | ||||
|  | ||||
|             formats.append(extract_format( | ||||
|                 stream_url, 'hls-%d' % tbr if tbr else None, idx * 2)) | ||||
|             formats.append(extract_format(stream_url.replace( | ||||
|                 '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031})) | ||||
|                 r'_(\d+)_mp4/', format_url, 'tbr', default=None)) | ||||
|             formats.append({ | ||||
|                 'format_id': compat_str(tbr or idx), | ||||
|                 'url': format_url, | ||||
|                 'ext': 'mp4', | ||||
|                 'tbr': tbr, | ||||
|                 'protocol': 'm3u8_native', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['title'].strip(), | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': info.get('desc'), | ||||
|             'duration': int_or_none(info.get('duration')), | ||||
|   | ||||
| @@ -51,6 +51,7 @@ class MioMioIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31', | ||||
|         }, | ||||
|         'skip': 'Unable to load videos', | ||||
|     }] | ||||
|  | ||||
|     def _extract_mioplayer(self, webpage, video_id, title, http_headers): | ||||
| @@ -94,9 +95,18 @@ class MioMioIE(InfoExtractor): | ||||
|  | ||||
|         return entries | ||||
|  | ||||
|     def _download_chinese_webpage(self, *args, **kwargs): | ||||
|         # Requests with English locales return garbage | ||||
|         headers = { | ||||
|             'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3', | ||||
|         } | ||||
|         kwargs.setdefault('headers', {}).update(headers) | ||||
|         return self._download_webpage(*args, **kwargs) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         webpage = self._download_chinese_webpage( | ||||
|             url, video_id) | ||||
|  | ||||
|         title = self._html_search_meta( | ||||
|             'description', webpage, 'title', fatal=True) | ||||
| @@ -106,7 +116,7 @@ class MioMioIE(InfoExtractor): | ||||
|  | ||||
|         if '_h5' in mioplayer_path: | ||||
|             player_url = compat_urlparse.urljoin(url, mioplayer_path) | ||||
|             player_webpage = self._download_webpage( | ||||
|             player_webpage = self._download_chinese_webpage( | ||||
|                 player_url, video_id, | ||||
|                 note='Downloading player webpage', headers={'Referer': url}) | ||||
|             entries = self._parse_html5_media_entries(player_url, player_webpage, video_id) | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| import uuid | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_urlencode, | ||||
| @@ -24,6 +25,9 @@ class MiTeleBaseIE(InfoExtractor): | ||||
|             r'(?s)(<ms-video-player.+?</ms-video-player>)', | ||||
|             webpage, 'ms video player')) | ||||
|         video_id = player_data['data-media-id'] | ||||
|         if player_data.get('data-cms-id') == 'ooyala': | ||||
|             return self.url_result( | ||||
|                 'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id) | ||||
|         config_url = compat_urlparse.urljoin(url, player_data['data-config']) | ||||
|         config = self._download_json( | ||||
|             config_url, video_id, 'Downloading config JSON') | ||||
|   | ||||
| @@ -34,12 +34,6 @@ class NineCNineMediaStackIE(NineCNineMediaBaseIE): | ||||
|         formats.extend(self._extract_f4m_formats( | ||||
|             stack_base_url + 'f4m', stack_id, | ||||
|             f4m_id='hds', fatal=False)) | ||||
|         mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False) | ||||
|         if mp4_url: | ||||
|             formats.append({ | ||||
|                 'url': mp4_url, | ||||
|                 'format_id': 'mp4', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
							
								
								
									
										83
									
								
								youtube_dl/extractor/njpwworld.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								youtube_dl/extractor/njpwworld.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     get_element_by_class, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NJPWWorldIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)' | ||||
|     IE_DESC = '新日本プロレスワールド' | ||||
|     _NETRC_MACHINE = 'njpwworld' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://njpwworld.com/p/s_series_00155_1_9/', | ||||
|         'info_dict': { | ||||
|             'id': 's_series_00155_1_9', | ||||
|             'ext': 'mp4', | ||||
|             'title': '第9試合 ランディ・サベージ vs リック・スタイナー', | ||||
|             'tags': list, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # AES-encrypted m3u8 | ||||
|         }, | ||||
|         'skip': 'Requires login', | ||||
|     } | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         # No authentication to be performed | ||||
|         if not username: | ||||
|             return True | ||||
|  | ||||
|         webpage, urlh = self._download_webpage_handle( | ||||
|             'https://njpwworld.com/auth/login', None, | ||||
|             note='Logging in', errnote='Unable to login', | ||||
|             data=urlencode_postdata({'login_id': username, 'pw': password})) | ||||
|         # /auth/login will return 302 for successful logins | ||||
|         if urlh.geturl() == 'https://njpwworld.com/auth/login': | ||||
|             self.report_warning('unable to login') | ||||
|             return False | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage): | ||||
|             player_url = compat_urlparse.urljoin(url, player_url) | ||||
|  | ||||
|             player_page = self._download_webpage( | ||||
|                 player_url, video_id, note='Downloading player page') | ||||
|  | ||||
|             entries = self._parse_html5_media_entries( | ||||
|                 player_url, player_page, video_id, m3u8_id='hls-%s' % kind, | ||||
|                 m3u8_entry_protocol='m3u8_native', | ||||
|                 preference=2 if 'hq' in kind else 1) | ||||
|             formats.extend(entries[0]['formats']) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         post_content = get_element_by_class('post-content', webpage) | ||||
|         tags = re.findall( | ||||
|             r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content | ||||
|         ) if post_content else None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'formats': formats, | ||||
|             'tags': tags, | ||||
|         } | ||||
| @@ -23,7 +23,7 @@ from ..utils import ( | ||||
|  | ||||
| class NocoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' | ||||
|     _LOGIN_URL = 'http://noco.tv/do.php' | ||||
|     _LOGIN_URL = 'https://noco.tv/do.php' | ||||
|     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' | ||||
|     _SUB_LANG_TEMPLATE = '&sub_lang=%s' | ||||
|     _NETRC_MACHINE = 'noco' | ||||
| @@ -69,16 +69,17 @@ class NocoIE(InfoExtractor): | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_form = { | ||||
|             'a': 'login', | ||||
|             'cookie': '1', | ||||
|             'username': username, | ||||
|             'password': password, | ||||
|         } | ||||
|         request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') | ||||
|  | ||||
|         login = self._download_json(request, None, 'Logging in as %s' % username) | ||||
|         login = self._download_json( | ||||
|             self._LOGIN_URL, None, 'Logging in as %s' % username, | ||||
|             data=urlencode_postdata({ | ||||
|                 'a': 'login', | ||||
|                 'cookie': '1', | ||||
|                 'username': username, | ||||
|                 'password': password, | ||||
|             }), | ||||
|             headers={ | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | ||||
|             }) | ||||
|  | ||||
|         if 'erreur' in login: | ||||
|             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) | ||||
|   | ||||
| @@ -3,41 +3,27 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_HTTPError | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     fix_xml_ampersands, | ||||
|     orderedSet, | ||||
|     parse_duration, | ||||
|     qualities, | ||||
|     strip_jsonp, | ||||
|     unified_strdate, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NPOBaseIE(InfoExtractor): | ||||
|     def _get_token(self, video_id): | ||||
|         token_page = self._download_webpage( | ||||
|             'http://ida.omroep.nl/npoplayer/i.js', | ||||
|             video_id, note='Downloading token') | ||||
|         token = self._search_regex( | ||||
|             r'npoplayer\.token = "(.+?)"', token_page, 'token') | ||||
|         # Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js | ||||
|         token_l = list(token) | ||||
|         first = second = None | ||||
|         for i in range(5, len(token_l) - 4): | ||||
|             if token_l[i].isdigit(): | ||||
|                 if first is None: | ||||
|                     first = i | ||||
|                 elif second is None: | ||||
|                     second = i | ||||
|         if first is None or second is None: | ||||
|             first = 12 | ||||
|             second = 13 | ||||
|  | ||||
|         token_l[first], token_l[second] = token_l[second], token_l[first] | ||||
|  | ||||
|         return ''.join(token_l) | ||||
|         return self._download_json( | ||||
|             'http://ida.omroep.nl/app.php/auth', video_id, | ||||
|             note='Downloading token')['token'] | ||||
|  | ||||
|  | ||||
| class NPOIE(NPOBaseIE): | ||||
| @@ -51,97 +37,120 @@ class NPOIE(NPOBaseIE): | ||||
|                             (?: | ||||
|                                 npo\.nl/(?!live|radio)(?:[^/]+/){2}| | ||||
|                                 ntr\.nl/(?:[^/]+/){2,}| | ||||
|                                 omroepwnl\.nl/video/fragment/[^/]+__ | ||||
|                                 omroepwnl\.nl/video/fragment/[^/]+__| | ||||
|                                 zapp\.nl/[^/]+/[^/]+/ | ||||
|                             ) | ||||
|                         ) | ||||
|                         (?P<id>[^/?#]+) | ||||
|                 ''' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', | ||||
|             'md5': '4b3f9c429157ec4775f2c9cb7b911016', | ||||
|             'info_dict': { | ||||
|                 'id': 'VPWON_1220719', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': 'Nieuwsuur', | ||||
|                 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', | ||||
|                 'upload_date': '20140622', | ||||
|             }, | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', | ||||
|         'md5': '4b3f9c429157ec4775f2c9cb7b911016', | ||||
|         'info_dict': { | ||||
|             'id': 'VPWON_1220719', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Nieuwsuur', | ||||
|             'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', | ||||
|             'upload_date': '20140622', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', | ||||
|             'md5': 'da50a5787dbfc1603c4ad80f31c5120b', | ||||
|             'info_dict': { | ||||
|                 'id': 'VARA_101191800', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': 'De Mega Mike & Mega Thomas show: The best of.', | ||||
|                 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', | ||||
|                 'upload_date': '20090227', | ||||
|                 'duration': 2400, | ||||
|             }, | ||||
|     }, { | ||||
|         'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', | ||||
|         'md5': 'da50a5787dbfc1603c4ad80f31c5120b', | ||||
|         'info_dict': { | ||||
|             'id': 'VARA_101191800', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'De Mega Mike & Mega Thomas show: The best of.', | ||||
|             'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', | ||||
|             'upload_date': '20090227', | ||||
|             'duration': 2400, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', | ||||
|             'md5': 'f8065e4e5a7824068ed3c7e783178f2c', | ||||
|             'info_dict': { | ||||
|                 'id': 'VPWON_1169289', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': 'Tegenlicht: De toekomst komt uit Afrika', | ||||
|                 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', | ||||
|                 'upload_date': '20130225', | ||||
|                 'duration': 3000, | ||||
|             }, | ||||
|     }, { | ||||
|         'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', | ||||
|         'md5': 'f8065e4e5a7824068ed3c7e783178f2c', | ||||
|         'info_dict': { | ||||
|             'id': 'VPWON_1169289', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika', | ||||
|             'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', | ||||
|             'upload_date': '20130225', | ||||
|             'duration': 3000, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706', | ||||
|             'info_dict': { | ||||
|                 'id': 'WO_VPRO_043706', | ||||
|                 'ext': 'wmv', | ||||
|                 'title': 'De nieuwe mens - Deel 1', | ||||
|                 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b', | ||||
|                 'duration': 4680, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # mplayer mms download | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|     }, { | ||||
|         'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706', | ||||
|         'info_dict': { | ||||
|             'id': 'WO_VPRO_043706', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'De nieuwe mens - Deel 1', | ||||
|             'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b', | ||||
|             'duration': 4680, | ||||
|         }, | ||||
|         # non asf in streams | ||||
|         { | ||||
|             'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771', | ||||
|             'md5': 'b3da13de374cbe2d5332a7e910bef97f', | ||||
|             'info_dict': { | ||||
|                 'id': 'WO_NOS_762771', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Hoe gaat Europa verder na Parijs?', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', | ||||
|             'md5': '01c6a2841675995da1f0cf776f03a9c3', | ||||
|             'info_dict': { | ||||
|                 'id': 'VPWON_1233944', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': 'Aap, poot, pies', | ||||
|                 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', | ||||
|                 'upload_date': '20150508', | ||||
|                 'duration': 599, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', | ||||
|             'md5': 'd30cd8417b8b9bca1fdff27428860d08', | ||||
|             'info_dict': { | ||||
|                 'id': 'POW_00996502', | ||||
|                 'ext': 'm4v', | ||||
|                 'title': '''"Dit is wel een 'landslide'..."''', | ||||
|                 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', | ||||
|                 'upload_date': '20150508', | ||||
|                 'duration': 462, | ||||
|             }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     ] | ||||
|     }, { | ||||
|         # non asf in streams | ||||
|         'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771', | ||||
|         'info_dict': { | ||||
|             'id': 'WO_NOS_762771', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hoe gaat Europa verder na Parijs?', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', | ||||
|         'info_dict': { | ||||
|             'id': 'VPWON_1233944', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Aap, poot, pies', | ||||
|             'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', | ||||
|             'upload_date': '20150508', | ||||
|             'duration': 599, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', | ||||
|         'info_dict': { | ||||
|             'id': 'POW_00996502', | ||||
|             'ext': 'm4v', | ||||
|             'title': '''"Dit is wel een 'landslide'..."''', | ||||
|             'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', | ||||
|             'upload_date': '20150508', | ||||
|             'duration': 462, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         # audio | ||||
|         'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437', | ||||
|         'info_dict': { | ||||
|             'id': 'RBX_FUNX_6683215', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Jouw Stad Rotterdam', | ||||
|             'description': 'md5:db251505244f097717ec59fabc372d9f', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # live stream | ||||
|         'url': 'npo:LI_NL1_4188102', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -170,70 +179,115 @@ class NPOIE(NPOBaseIE): | ||||
|         token = self._get_token(video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         urls = set() | ||||
|  | ||||
|         pubopties = metadata.get('pubopties') | ||||
|         if pubopties: | ||||
|             quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) | ||||
|             for format_id in pubopties: | ||||
|                 format_info = self._download_json( | ||||
|                     'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' | ||||
|                     % (video_id, format_id, token), | ||||
|                     video_id, 'Downloading %s JSON' % format_id) | ||||
|                 if format_info.get('error_code', 0) or format_info.get('errorcode', 0): | ||||
|         quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) | ||||
|         items = self._download_json( | ||||
|             'http://ida.omroep.nl/app.php/%s' % video_id, video_id, | ||||
|             'Downloading formats JSON', query={ | ||||
|                 'adaptive': 'yes', | ||||
|                 'token': token, | ||||
|             })['items'][0] | ||||
|         for num, item in enumerate(items): | ||||
|             item_url = item.get('url') | ||||
|             if not item_url or item_url in urls: | ||||
|                 continue | ||||
|             urls.add(item_url) | ||||
|             format_id = self._search_regex( | ||||
|                 r'video/ida/([^/]+)', item_url, 'format id', | ||||
|                 default=None) | ||||
|  | ||||
|             def add_format_url(format_url): | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                 }) | ||||
|  | ||||
|             # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 | ||||
|             if item.get('contentType') in ('url', 'audio'): | ||||
|                 add_format_url(item_url) | ||||
|                 continue | ||||
|  | ||||
|             try: | ||||
|                 stream_info = self._download_json( | ||||
|                     item_url + '&type=json', video_id, | ||||
|                     'Downloading %s stream JSON' | ||||
|                     % item.get('label') or item.get('format') or format_id or num) | ||||
|             except ExtractorError as ee: | ||||
|                 if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: | ||||
|                     error = (self._parse_json( | ||||
|                         ee.cause.read().decode(), video_id, | ||||
|                         fatal=False) or {}).get('errorstring') | ||||
|                     if error: | ||||
|                         raise ExtractorError(error, expected=True) | ||||
|                 raise | ||||
|             # Stream URL instead of JSON, example: npo:LI_NL1_4188102 | ||||
|             if isinstance(stream_info, compat_str): | ||||
|                 if not stream_info.startswith('http'): | ||||
|                     continue | ||||
|                 streams = format_info.get('streams') | ||||
|                 if streams: | ||||
|                     try: | ||||
|                         video_info = self._download_json( | ||||
|                             streams[0] + '&type=json', | ||||
|                             video_id, 'Downloading %s stream JSON' % format_id) | ||||
|                     except ExtractorError as ee: | ||||
|                         if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: | ||||
|                             error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring') | ||||
|                             if error: | ||||
|                                 raise ExtractorError(error, expected=True) | ||||
|                         raise | ||||
|                 else: | ||||
|                     video_info = format_info | ||||
|                 video_url = video_info.get('url') | ||||
|                 if not video_url: | ||||
|                 video_url = stream_info | ||||
|             # JSON | ||||
|             else: | ||||
|                 video_url = stream_info.get('url') | ||||
|             if not video_url or video_url in urls: | ||||
|                 continue | ||||
|             urls.add(item_url) | ||||
|             if determine_ext(video_url) == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     video_url, video_id, ext='mp4', | ||||
|                     entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 add_format_url(video_url) | ||||
|  | ||||
|         is_live = metadata.get('medium') == 'live' | ||||
|  | ||||
|         if not is_live: | ||||
|             for num, stream in enumerate(metadata.get('streams', [])): | ||||
|                 stream_url = stream.get('url') | ||||
|                 if not stream_url or stream_url in urls: | ||||
|                     continue | ||||
|                 if format_id == 'adaptive': | ||||
|                     formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) | ||||
|                 else: | ||||
|                 urls.add(stream_url) | ||||
|                 # smooth streaming is not supported | ||||
|                 stream_type = stream.get('type', '').lower() | ||||
|                 if stream_type in ['ss', 'ms']: | ||||
|                     continue | ||||
|                 if stream_type == 'hds': | ||||
|                     f4m_formats = self._extract_f4m_formats( | ||||
|                         stream_url, video_id, fatal=False) | ||||
|                     # f4m downloader downloads only piece of live stream | ||||
|                     for f4m_format in f4m_formats: | ||||
|                         f4m_format['preference'] = -1 | ||||
|                     formats.extend(f4m_formats) | ||||
|                 elif stream_type == 'hls': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         stream_url, video_id, ext='mp4', fatal=False)) | ||||
|                 # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 | ||||
|                 elif '.asf' in stream_url: | ||||
|                     asx = self._download_xml( | ||||
|                         stream_url, video_id, | ||||
|                         'Downloading stream %d ASX playlist' % num, | ||||
|                         transform_source=fix_xml_ampersands, fatal=False) | ||||
|                     if not asx: | ||||
|                         continue | ||||
|                     ref = asx.find('./ENTRY/Ref') | ||||
|                     if ref is None: | ||||
|                         continue | ||||
|                     video_url = ref.get('href') | ||||
|                     if not video_url or video_url in urls: | ||||
|                         continue | ||||
|                     urls.add(video_url) | ||||
|                     formats.append({ | ||||
|                         'url': video_url, | ||||
|                         'format_id': format_id, | ||||
|                         'quality': quality(format_id), | ||||
|                         'ext': stream.get('formaat', 'asf'), | ||||
|                         'quality': stream.get('kwaliteit'), | ||||
|                         'preference': -10, | ||||
|                     }) | ||||
|  | ||||
|         streams = metadata.get('streams') | ||||
|         if streams: | ||||
|             for i, stream in enumerate(streams): | ||||
|                 stream_url = stream.get('url') | ||||
|                 if not stream_url: | ||||
|                     continue | ||||
|                 if '.asf' not in stream_url: | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'url': stream_url, | ||||
|                         'quality': stream.get('kwaliteit'), | ||||
|                     }) | ||||
|                     continue | ||||
|                 asx = self._download_xml( | ||||
|                     stream_url, video_id, | ||||
|                     'Downloading stream %d ASX playlist' % i, | ||||
|                     transform_source=fix_xml_ampersands) | ||||
|                 ref = asx.find('./ENTRY/Ref') | ||||
|                 if ref is None: | ||||
|                     continue | ||||
|                 video_url = ref.get('href') | ||||
|                 if not video_url: | ||||
|                     continue | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'ext': stream.get('formaat', 'asf'), | ||||
|                     'quality': stream.get('kwaliteit'), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -246,28 +300,28 @@ class NPOIE(NPOBaseIE): | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'title': self._live_title(title) if is_live else title, | ||||
|             'description': metadata.get('info'), | ||||
|             'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], | ||||
|             'upload_date': unified_strdate(metadata.get('gidsdatum')), | ||||
|             'duration': parse_duration(metadata.get('tijdsduur')), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'is_live': is_live, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NPOLiveIE(NPOBaseIE): | ||||
|     IE_NAME = 'npo.nl:live' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/live/npo-1', | ||||
|         'info_dict': { | ||||
|             'id': 'LI_NEDERLAND1_136692', | ||||
|             'id': 'LI_NL1_4188102', | ||||
|             'display_id': 'npo-1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^Nederland 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'description': 'Livestream', | ||||
|             'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
| @@ -283,58 +337,12 @@ class NPOLiveIE(NPOBaseIE): | ||||
|         live_id = self._search_regex( | ||||
|             r'data-prid="([^"]+)"', webpage, 'live id') | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             'http://e.omroep.nl/metadata/%s' % live_id, | ||||
|             display_id, transform_source=strip_jsonp) | ||||
|  | ||||
|         token = self._get_token(display_id) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         streams = metadata.get('streams') | ||||
|         if streams: | ||||
|             for stream in streams: | ||||
|                 stream_type = stream.get('type').lower() | ||||
|                 # smooth streaming is not supported | ||||
|                 if stream_type in ['ss', 'ms']: | ||||
|                     continue | ||||
|                 stream_info = self._download_json( | ||||
|                     'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp' | ||||
|                     % (stream.get('url'), token), | ||||
|                     display_id, 'Downloading %s JSON' % stream_type) | ||||
|                 if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): | ||||
|                     continue | ||||
|                 stream_url = self._download_json( | ||||
|                     stream_info['stream'], display_id, | ||||
|                     'Downloading %s URL' % stream_type, | ||||
|                     'Unable to download %s URL' % stream_type, | ||||
|                     transform_source=strip_jsonp, fatal=False) | ||||
|                 if not stream_url: | ||||
|                     continue | ||||
|                 if stream_type == 'hds': | ||||
|                     f4m_formats = self._extract_f4m_formats(stream_url, display_id) | ||||
|                     # f4m downloader downloads only piece of live stream | ||||
|                     for f4m_format in f4m_formats: | ||||
|                         f4m_format['preference'] = -1 | ||||
|                     formats.extend(f4m_formats) | ||||
|                 elif stream_type == 'hls': | ||||
|                     formats.extend(self._extract_m3u8_formats(stream_url, display_id, 'mp4')) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'url': stream_url, | ||||
|                         'preference': -10, | ||||
|                     }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'npo:%s' % live_id, | ||||
|             'ie_key': NPOIE.ie_key(), | ||||
|             'id': live_id, | ||||
|             'display_id': display_id, | ||||
|             'title': self._live_title(metadata['titel']), | ||||
|             'description': metadata['info'], | ||||
|             'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], | ||||
|             'formats': formats, | ||||
|             'is_live': True, | ||||
|         } | ||||
|  | ||||
|  | ||||
| @@ -416,7 +424,21 @@ class NPORadioFragmentIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SchoolTVIE(InfoExtractor): | ||||
| class NPODataMidEmbedIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_id = self._search_regex( | ||||
|             r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id') | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'NPO', | ||||
|             'url': 'npo:%s' % video_id, | ||||
|             'display_id': display_id | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SchoolTVIE(NPODataMidEmbedIE): | ||||
|     IE_NAME = 'schooltv' | ||||
|     _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)' | ||||
|  | ||||
| @@ -435,17 +457,25 @@ class SchoolTVIE(InfoExtractor): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_id = self._search_regex( | ||||
|             r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id') | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'NPO', | ||||
|             'url': 'npo:%s' % video_id, | ||||
|             'display_id': display_id | ||||
|  | ||||
| class HetKlokhuisIE(NPODataMidEmbedIE): | ||||
|     IE_NAME = 'hetklokhuis' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', | ||||
|         'info_dict': { | ||||
|             'id': 'VPWON_1260528', | ||||
|             'display_id': 'Zwaartekrachtsgolven', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Het Klokhuis: Zwaartekrachtsgolven', | ||||
|             'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48', | ||||
|             'upload_date': '20170223', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
| class NPOPlaylistBaseIE(NPOIE): | ||||
|   | ||||
| @@ -72,20 +72,41 @@ class OpenloadIE(InfoExtractor): | ||||
|             raise ExtractorError('File not found', expected=True) | ||||
|  | ||||
|         ol_id = self._search_regex( | ||||
|             '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>', | ||||
|             '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>', | ||||
|             webpage, 'openload ID') | ||||
|  | ||||
|         first_two_chars = int(float(ol_id[0:][:2])) | ||||
|         urlcode = [] | ||||
|         num = 2 | ||||
|         decoded = '' | ||||
|         a = ol_id[0:24] | ||||
|         b = [] | ||||
|         for i in range(0, len(a), 8): | ||||
|             b.append(int(a[i:i + 8] or '0', 16)) | ||||
|         ol_id = ol_id[24:] | ||||
|         j = 0 | ||||
|         k = 0 | ||||
|         while j < len(ol_id): | ||||
|             c = 128 | ||||
|             d = 0 | ||||
|             e = 0 | ||||
|             f = 0 | ||||
|             _more = True | ||||
|             while _more: | ||||
|                 if j + 1 >= len(ol_id): | ||||
|                     c = 143 | ||||
|                 f = int(ol_id[j:j + 2] or '0', 16) | ||||
|                 j += 2 | ||||
|                 d += (f & 127) << e | ||||
|                 e += 7 | ||||
|                 _more = f >= c | ||||
|             g = d ^ b[k % 3] | ||||
|             for i in range(4): | ||||
|                 char_dec = (g >> 8 * i) & (c + 127) | ||||
|                 char = compat_chr(char_dec) | ||||
|                 if char != '#': | ||||
|                     decoded += char | ||||
|             k += 1 | ||||
|  | ||||
|         while num < len(ol_id): | ||||
|             key = int(float(ol_id[num + 3:][:2])) | ||||
|             urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars))) | ||||
|             num += 5 | ||||
|  | ||||
|         video_url = 'https://openload.co/stream/' + ''.join( | ||||
|             [value for _, value in sorted(urlcode, key=lambda x: x[0])]) | ||||
|         video_url = 'https://openload.co/stream/%s?mime=true' | ||||
|         video_url = video_url % decoded | ||||
|  | ||||
|         title = self._og_search_title(webpage, default=None) or self._search_regex( | ||||
|             r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, | ||||
|   | ||||
							
								
								
									
										138
									
								
								youtube_dl/extractor/packtpub.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								youtube_dl/extractor/packtpub.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,138 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     remove_end, | ||||
|     strip_or_none, | ||||
|     unified_timestamp, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PacktPubBaseIE(InfoExtractor): | ||||
|     _PACKT_BASE = 'https://www.packtpub.com' | ||||
|     _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE | ||||
|  | ||||
|  | ||||
| class PacktPubIE(PacktPubBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', | ||||
|         'md5': '1e74bd6cfd45d7d07666f4684ef58f70', | ||||
|         'info_dict': { | ||||
|             'id': '20530', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Project Intro', | ||||
|             'thumbnail': r're:(?i)^https?://.*\.jpg', | ||||
|             'timestamp': 1490918400, | ||||
|             'upload_date': '20170331', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _handle_error(self, response): | ||||
|         if response.get('status') != 'success': | ||||
|             raise ExtractorError( | ||||
|                 '% said: %s' % (self.IE_NAME, response['message']), | ||||
|                 expected=True) | ||||
|  | ||||
|     def _download_json(self, *args, **kwargs): | ||||
|         response = super(PacktPubIE, self)._download_json(*args, **kwargs) | ||||
|         self._handle_error(response) | ||||
|         return response | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         course_id, chapter_id, video_id = mobj.group( | ||||
|             'course_id', 'chapter_id', 'id') | ||||
|  | ||||
|         video = self._download_json( | ||||
|             '%s/users/me/products/%s/chapters/%s/sections/%s' | ||||
|             % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, | ||||
|             'Downloading JSON video')['data'] | ||||
|  | ||||
|         content = video.get('content') | ||||
|         if not content: | ||||
|             raise ExtractorError('This video is locked', expected=True) | ||||
|  | ||||
|         video_url = content['file'] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             '%s/products/%s/chapters/%s/sections/%s/metadata' | ||||
|             % (self._MAPT_REST, course_id, chapter_id, video_id), | ||||
|             video_id)['data'] | ||||
|  | ||||
|         title = metadata['pageTitle'] | ||||
|         course_title = metadata.get('title') | ||||
|         if course_title: | ||||
|             title = remove_end(title, ' - %s' % course_title) | ||||
|         timestamp = unified_timestamp(metadata.get('publicationDate')) | ||||
|         thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class PacktPubCourseIE(PacktPubBaseIE): | ||||
|     _VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215', | ||||
|         'info_dict': { | ||||
|             'id': '9781787122215', | ||||
|             'title': 'Learn Nodejs by building 12 projects [Video]', | ||||
|         }, | ||||
|         'playlist_count': 90, | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if PacktPubIE.suitable(url) else super( | ||||
|             PacktPubCourseIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         url, course_id = mobj.group('url', 'id') | ||||
|  | ||||
|         course = self._download_json( | ||||
|             '%s/products/%s/metadata' % (self._MAPT_REST, course_id), | ||||
|             course_id)['data'] | ||||
|  | ||||
|         entries = [] | ||||
|         for chapter_num, chapter in enumerate(course['tableOfContents'], 1): | ||||
|             if chapter.get('type') != 'chapter': | ||||
|                 continue | ||||
|             children = chapter.get('children') | ||||
|             if not isinstance(children, list): | ||||
|                 continue | ||||
|             chapter_info = { | ||||
|                 'chapter': chapter.get('title'), | ||||
|                 'chapter_number': chapter_num, | ||||
|                 'chapter_id': chapter.get('id'), | ||||
|             } | ||||
|             for section in children: | ||||
|                 if section.get('type') != 'section': | ||||
|                     continue | ||||
|                 section_url = section.get('seoUrl') | ||||
|                 if not isinstance(section_url, compat_str): | ||||
|                     continue | ||||
|                 entry = { | ||||
|                     '_type': 'url_transparent', | ||||
|                     'url': urljoin(url + '/', section_url), | ||||
|                     'title': strip_or_none(section.get('title')), | ||||
|                     'description': clean_html(section.get('summary')), | ||||
|                     'ie_key': PacktPubIE.ie_key(), | ||||
|                 } | ||||
|                 entry.update(chapter_info) | ||||
|                 entries.append(entry) | ||||
|  | ||||
|         return self.playlist_result(entries, course_id, course.get('title')) | ||||
| @@ -20,7 +20,7 @@ class PeriscopeBaseIE(InfoExtractor): | ||||
| class PeriscopeIE(PeriscopeBaseIE): | ||||
|     IE_DESC = 'Periscope' | ||||
|     IE_NAME = 'periscope' | ||||
|     _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' | ||||
|     # Alive example URLs can be found here http://onperiscope.com/ | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', | ||||
| @@ -41,6 +41,9 @@ class PeriscopeIE(PeriscopeBaseIE): | ||||
|     }, { | ||||
|         'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -103,7 +106,7 @@ class PeriscopeIE(PeriscopeBaseIE): | ||||
|  | ||||
|  | ||||
| class PeriscopeUserIE(PeriscopeBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$' | ||||
|     IE_DESC = 'Periscope user videos' | ||||
|     IE_NAME = 'periscope:user' | ||||
|  | ||||
|   | ||||
| @@ -40,7 +40,7 @@ class PluralsightIE(PluralsightBaseIE): | ||||
|         'info_dict': { | ||||
|             'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Management of SQL Server - Demo Monitoring', | ||||
|             'title': 'Demo Monitoring', | ||||
|             'duration': 338, | ||||
|         }, | ||||
|         'skip': 'Requires pluralsight account credentials', | ||||
| @@ -169,11 +169,10 @@ class PluralsightIE(PluralsightBaseIE): | ||||
|  | ||||
|         collection = course['modules'] | ||||
|  | ||||
|         module, clip = None, None | ||||
|         clip = None | ||||
|  | ||||
|         for module_ in collection: | ||||
|             if name in (module_.get('moduleName'), module_.get('name')): | ||||
|                 module = module_ | ||||
|                 for clip_ in module_.get('clips', []): | ||||
|                     clip_index = clip_.get('clipIndex') | ||||
|                     if clip_index is None: | ||||
| @@ -187,7 +186,7 @@ class PluralsightIE(PluralsightBaseIE): | ||||
|         if not clip: | ||||
|             raise ExtractorError('Unable to resolve clip') | ||||
|  | ||||
|         title = '%s - %s' % (module['title'], clip['title']) | ||||
|         title = clip['title'] | ||||
|  | ||||
|         QUALITIES = { | ||||
|             'low': {'width': 640, 'height': 480}, | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import functools | ||||
| import itertools | ||||
| import operator | ||||
| # import os | ||||
| import re | ||||
|  | ||||
| @@ -18,6 +20,7 @@ from ..utils import ( | ||||
|     js_to_json, | ||||
|     orderedSet, | ||||
|     # sanitized_Request, | ||||
|     remove_quotes, | ||||
|     str_to_int, | ||||
| ) | ||||
| # from ..aes import ( | ||||
| @@ -129,9 +132,32 @@ class PornHubIE(InfoExtractor): | ||||
|  | ||||
|         tv_webpage = dl_webpage('tv') | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, | ||||
|             'video url', group='url') | ||||
|         assignments = self._search_regex( | ||||
|             r'(var.+?mediastring.+?)</script>', tv_webpage, | ||||
|             'encoded url').split(';') | ||||
|  | ||||
|         js_vars = {} | ||||
|  | ||||
|         def parse_js_value(inp): | ||||
|             inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) | ||||
|             if '+' in inp: | ||||
|                 inps = inp.split('+') | ||||
|                 return functools.reduce( | ||||
|                     operator.concat, map(parse_js_value, inps)) | ||||
|             inp = inp.strip() | ||||
|             if inp in js_vars: | ||||
|                 return js_vars[inp] | ||||
|             return remove_quotes(inp) | ||||
|  | ||||
|         for assn in assignments: | ||||
|             assn = assn.strip() | ||||
|             if not assn: | ||||
|                 continue | ||||
|             assn = re.sub(r'var\s+', '', assn) | ||||
|             vname, value = assn.split('=', 1) | ||||
|             js_vars[vname] = parse_js_value(value) | ||||
|  | ||||
|         video_url = js_vars['mediastring'] | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) | ||||
|   | ||||
| @@ -300,6 +300,21 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # title in <h2 class="subtitle"> | ||||
|             'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', | ||||
|             'info_dict': { | ||||
|                 'id': '4895826', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', | ||||
|                 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', | ||||
|                 'upload_date': '20170302', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'geo restricted to Germany', | ||||
|         }, | ||||
|         { | ||||
|             # geo restricted to Germany | ||||
|             'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', | ||||
| @@ -338,6 +353,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): | ||||
|         r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', | ||||
|         r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', | ||||
|         r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>', | ||||
|         r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>', | ||||
|     ] | ||||
|     _DESCRIPTION_REGEXES = [ | ||||
|         r'<p itemprop="description">\s*(.+?)</p>', | ||||
| @@ -369,7 +385,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): | ||||
|     def _extract_clip(self, url, webpage): | ||||
|         clip_id = self._html_search_regex( | ||||
|             self._CLIPID_REGEXES, webpage, 'clip id') | ||||
|         title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') | ||||
|         title = self._html_search_regex( | ||||
|             self._TITLE_REGEXES, webpage, 'title', | ||||
|             default=None) or self._og_search_title(webpage) | ||||
|         info = self._extract_video_info(url, clip_id) | ||||
|         description = self._html_search_regex( | ||||
|             self._DESCRIPTION_REGEXES, webpage, 'description', default=None) | ||||
|   | ||||
| @@ -1,23 +1,40 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     compat_str, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     determine_ext, | ||||
|     find_xpath_attr, | ||||
|     fix_xml_ampersands, | ||||
|     GeoRestrictedError, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     strip_or_none, | ||||
|     try_get, | ||||
|     unified_strdate, | ||||
|     unified_timestamp, | ||||
|     update_url_query, | ||||
|     urljoin, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RaiBaseIE(InfoExtractor): | ||||
|     def _extract_relinker_formats(self, relinker_url, video_id): | ||||
|     _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' | ||||
|     _GEO_COUNTRIES = ['IT'] | ||||
|     _GEO_BYPASS = False | ||||
|  | ||||
|     def _extract_relinker_info(self, relinker_url, video_id): | ||||
|         formats = [] | ||||
|         geoprotection = None | ||||
|         is_live = None | ||||
|         duration = None | ||||
|  | ||||
|         for platform in ('mon', 'flash', 'native'): | ||||
|             relinker = self._download_xml( | ||||
| @@ -27,9 +44,27 @@ class RaiBaseIE(InfoExtractor): | ||||
|                 query={'output': 45, 'pl': platform}, | ||||
|                 headers=self.geo_verification_headers()) | ||||
|  | ||||
|             media_url = find_xpath_attr(relinker, './url', 'type', 'content').text | ||||
|             if not geoprotection: | ||||
|                 geoprotection = xpath_text( | ||||
|                     relinker, './geoprotection', default=None) == 'Y' | ||||
|  | ||||
|             if not is_live: | ||||
|                 is_live = xpath_text( | ||||
|                     relinker, './is_live', default=None) == 'Y' | ||||
|             if not duration: | ||||
|                 duration = parse_duration(xpath_text( | ||||
|                     relinker, './duration', default=None)) | ||||
|  | ||||
|             url_elem = find_xpath_attr(relinker, './url', 'type', 'content') | ||||
|             if url_elem is None: | ||||
|                 continue | ||||
|  | ||||
|             media_url = url_elem.text | ||||
|  | ||||
|             # This does not imply geo restriction (e.g. | ||||
|             # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) | ||||
|             if media_url == 'http://download.rai.it/video_no_available.mp4': | ||||
|                 self.raise_geo_restricted() | ||||
|                 continue | ||||
|  | ||||
|             ext = determine_ext(media_url) | ||||
|             if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): | ||||
| @@ -53,35 +88,225 @@ class RaiBaseIE(InfoExtractor): | ||||
|                     'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http', | ||||
|                 }) | ||||
|  | ||||
|         return formats | ||||
|         if not formats and geoprotection is True: | ||||
|             self.raise_geo_restricted(countries=self._GEO_COUNTRIES) | ||||
|  | ||||
|     def _extract_from_content_id(self, content_id, base_url): | ||||
|         return dict((k, v) for k, v in { | ||||
|             'is_live': is_live, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         }.items() if v is not None) | ||||
|  | ||||
|  | ||||
| class RaiPlayIE(RaiBaseIE): | ||||
|     _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', | ||||
|         'md5': '340aa3b7afb54bfd14a8c11786450d76', | ||||
|         'info_dict': { | ||||
|             'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'La Casa Bianca', | ||||
|             'alt_title': 'S2016 - Puntata del 23/10/2016', | ||||
|             'description': 'md5:a09d45890850458077d1f68bb036e0a5', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Rai 3', | ||||
|             'creator': 'Rai 3', | ||||
|             'duration': 3278, | ||||
|             'timestamp': 1477764300, | ||||
|             'upload_date': '20161029', | ||||
|             'series': 'La Casa Bianca', | ||||
|             'season': '2016', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', | ||||
|         'md5': '8970abf8caf8aef4696e7b1f2adfc696', | ||||
|         'info_dict': { | ||||
|             'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Report del 07/04/2014', | ||||
|             'alt_title': 'S2013/14 - Puntata del 07/04/2014', | ||||
|             'description': 'md5:f27c544694cacb46a078db84ec35d2d9', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Rai 5', | ||||
|             'creator': 'Rai 5', | ||||
|             'duration': 6160, | ||||
|             'series': 'Report', | ||||
|             'season_number': 5, | ||||
|             'season': '2013/14', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         url, video_id = mobj.group('url', 'id') | ||||
|  | ||||
|         media = self._download_json( | ||||
|             '%s?json' % url, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         title = media['name'] | ||||
|  | ||||
|         video = media['video'] | ||||
|  | ||||
|         relinker_info = self._extract_relinker_info(video['contentUrl'], video_id) | ||||
|         self._sort_formats(relinker_info['formats']) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         if 'images' in media: | ||||
|             for _, value in media.get('images').items(): | ||||
|                 if value: | ||||
|                     thumbnails.append({ | ||||
|                         'url': value.replace('[RESOLUTION]', '600x400') | ||||
|                     }) | ||||
|  | ||||
|         timestamp = unified_timestamp(try_get( | ||||
|             media, lambda x: x['availabilities'][0]['start'], compat_str)) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'alt_title': media.get('subtitle'), | ||||
|             'description': media.get('description'), | ||||
|             'uploader': media.get('channel'), | ||||
|             'creator': media.get('editor'), | ||||
|             'duration': parse_duration(video.get('duration')), | ||||
|             'timestamp': timestamp, | ||||
|             'thumbnails': thumbnails, | ||||
|             'series': try_get( | ||||
|                 media, lambda x: x['isPartOf']['name'], compat_str), | ||||
|             'season_number': int_or_none(try_get( | ||||
|                 media, lambda x: x['isPartOf']['numeroStagioni'])), | ||||
|             'season': media.get('stagione') or None, | ||||
|         } | ||||
|  | ||||
|         info.update(relinker_info) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class RaiIE(RaiBaseIE): | ||||
|     _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE | ||||
|     _TESTS = [{ | ||||
|         # var uniquename = "ContentItem-..." | ||||
|         # data-id="ContentItem-..." | ||||
|         'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', | ||||
|         'info_dict': { | ||||
|             'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'TG PRIMO TEMPO', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 1758, | ||||
|             'upload_date': '20140612', | ||||
|         } | ||||
|     }, { | ||||
|         # with ContentItem in many metas | ||||
|         'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', | ||||
|         'info_dict': { | ||||
|             'id': '1632c009-c843-4836-bb65-80c33084a64b', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"', | ||||
|             'description': 'I film in uscita questa settimana.', | ||||
|             'thumbnail': r're:^https?://.*\.png$', | ||||
|             'duration': 833, | ||||
|             'upload_date': '20161103', | ||||
|         } | ||||
|     }, { | ||||
|         # with ContentItem in og:url | ||||
|         'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', | ||||
|         'md5': '11959b4e44fa74de47011b5799490adf', | ||||
|         'info_dict': { | ||||
|             'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'TG1 ore 20:00 del 03/11/2016', | ||||
|             'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 2214, | ||||
|             'upload_date': '20161103', | ||||
|         } | ||||
|     }, { | ||||
|         # drawMediaRaiTV(...) | ||||
|         'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', | ||||
|         'md5': '2dd727e61114e1ee9c47f0da6914e178', | ||||
|         'info_dict': { | ||||
|             'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Il pacco', | ||||
|             'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'upload_date': '20141221', | ||||
|         }, | ||||
|     }, { | ||||
|         # initEdizione('ContentItem-...' | ||||
|         'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', | ||||
|         'info_dict': { | ||||
|             'id': 'c2187016-8484-4e3a-8ac8-35e475b07303', | ||||
|             'ext': 'mp4', | ||||
|             'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}', | ||||
|             'duration': 2274, | ||||
|             'upload_date': '20170401', | ||||
|         }, | ||||
|         'skip': 'Changes daily', | ||||
|     }, { | ||||
|         # HDS live stream with only relinker URL | ||||
|         'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews', | ||||
|         'info_dict': { | ||||
|             'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc', | ||||
|             'ext': 'flv', | ||||
|             'title': 'EuroNews', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # HLS live stream with ContentItem in og:url | ||||
|         'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', | ||||
|         'info_dict': { | ||||
|             'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'La diretta di Rainews24', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _extract_from_content_id(self, content_id, url): | ||||
|         media = self._download_json( | ||||
|             'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id, | ||||
|             content_id, 'Downloading video JSON') | ||||
|  | ||||
|         title = media['name'].strip() | ||||
|  | ||||
|         media_type = media['type'] | ||||
|         if 'Audio' in media_type: | ||||
|             relinker_info = { | ||||
|                 'formats': { | ||||
|                     'format_id': media.get('formatoAudio'), | ||||
|                     'url': media['audioUrl'], | ||||
|                     'ext': media.get('formatoAudio'), | ||||
|                 } | ||||
|             } | ||||
|         elif 'Video' in media_type: | ||||
|             relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) | ||||
|         else: | ||||
|             raise ExtractorError('not a media file') | ||||
|  | ||||
|         self._sort_formats(relinker_info['formats']) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for image_type in ('image', 'image_medium', 'image_300'): | ||||
|             thumbnail_url = media.get(image_type) | ||||
|             if thumbnail_url: | ||||
|                 thumbnails.append({ | ||||
|                     'url': compat_urlparse.urljoin(base_url, thumbnail_url), | ||||
|                     'url': compat_urlparse.urljoin(url, thumbnail_url), | ||||
|                 }) | ||||
|  | ||||
|         formats = [] | ||||
|         media_type = media['type'] | ||||
|         if 'Audio' in media_type: | ||||
|             formats.append({ | ||||
|                 'format_id': media.get('formatoAudio'), | ||||
|                 'url': media['audioUrl'], | ||||
|                 'ext': media.get('formatoAudio'), | ||||
|             }) | ||||
|         elif 'Video' in media_type: | ||||
|             formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id)) | ||||
|             self._sort_formats(formats) | ||||
|         else: | ||||
|             raise ExtractorError('not a media file') | ||||
|  | ||||
|         subtitles = {} | ||||
|         captions = media.get('subtitlesUrl') | ||||
|         if captions: | ||||
| @@ -94,174 +319,90 @@ class RaiBaseIE(InfoExtractor): | ||||
|                 'url': captions, | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|         info = { | ||||
|             'id': content_id, | ||||
|             'title': media['name'], | ||||
|             'description': media.get('desc'), | ||||
|             'title': title, | ||||
|             'description': strip_or_none(media.get('desc')), | ||||
|             'thumbnails': thumbnails, | ||||
|             'uploader': media.get('author'), | ||||
|             'upload_date': unified_strdate(media.get('date')), | ||||
|             'duration': parse_duration(media.get('length')), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|         info.update(relinker_info) | ||||
|  | ||||
| class RaiTVIE(RaiBaseIE): | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', | ||||
|             'md5': '8970abf8caf8aef4696e7b1f2adfc696', | ||||
|             'info_dict': { | ||||
|                 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Report del 07/04/2014', | ||||
|                 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', | ||||
|                 'upload_date': '20140407', | ||||
|                 'duration': 6160, | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             # no m3u8 stream | ||||
|             'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', | ||||
|             # HDS download, MD5 is unstable | ||||
|             'info_dict': { | ||||
|                 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'TG PRIMO TEMPO', | ||||
|                 'upload_date': '20140612', | ||||
|                 'duration': 1758, | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'skip': 'Geo-restricted to Italy', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', | ||||
|             'md5': '35cf7c229f22eeef43e48b5cf923bef0', | ||||
|             'info_dict': { | ||||
|                 'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'State of the Net, Antonella La Carpia: regole virali', | ||||
|                 'description': 'md5:b0ba04a324126903e3da7763272ae63c', | ||||
|                 'upload_date': '20140613', | ||||
|             }, | ||||
|             'skip': 'Error 404', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Alluvione in Sardegna e dissesto idrogeologico', | ||||
|                 'description': 'Edizione delle ore 20:30 ', | ||||
|             }, | ||||
|             'skip': 'invalid urls', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', | ||||
|             'md5': 'e57493e1cb8bc7c564663f363b171847', | ||||
|             'info_dict': { | ||||
|                 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Il Candidato - Primo episodio: "Le Primarie"', | ||||
|                 'description': 'md5:364b604f7db50594678f483353164fb8', | ||||
|                 'upload_date': '20140923', | ||||
|                 'duration': 386, | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|         return info | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         return self._extract_from_content_id(video_id, url) | ||||
|  | ||||
|  | ||||
| class RaiIE(RaiBaseIE): | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', | ||||
|             'md5': '2dd727e61114e1ee9c47f0da6914e178', | ||||
|             'info_dict': { | ||||
|                 'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Il pacco', | ||||
|                 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', | ||||
|                 'upload_date': '20141221', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # Direct relinker URL | ||||
|             'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews', | ||||
|             # HDS live stream, MD5 is unstable | ||||
|             'info_dict': { | ||||
|                 'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'EuroNews', | ||||
|             }, | ||||
|             'skip': 'Geo-restricted to Italy', | ||||
|         }, | ||||
|         { | ||||
|             # Embedded content item ID | ||||
|             'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', | ||||
|             'md5': '84c1135ce960e8822ae63cec34441d63', | ||||
|             'info_dict': { | ||||
|                 'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'TG1 ore 20:00 del 02/07/2016', | ||||
|                 'upload_date': '20160702', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', | ||||
|             # HDS live stream, MD5 is unstable | ||||
|             'info_dict': { | ||||
|                 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'La diretta di Rainews24', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if RaiTVIE.suitable(url) else super(RaiIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         iframe_url = self._search_regex( | ||||
|             [r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"', | ||||
|              r'drawMediaRaiTV\(["\'](.+?)["\']'], | ||||
|             webpage, 'iframe', default=None) | ||||
|         if iframe_url: | ||||
|             if not iframe_url.startswith('http'): | ||||
|                 iframe_url = compat_urlparse.urljoin(url, iframe_url) | ||||
|             return self.url_result(iframe_url) | ||||
|         content_item_id = None | ||||
|  | ||||
|         content_item_id = self._search_regex( | ||||
|             r'initEdizione\((?P<q1>[\'"])ContentItem-(?P<content_id>[^\'"]+)(?P=q1)', | ||||
|             webpage, 'content item ID', group='content_id', default=None) | ||||
|         content_item_url = self._html_search_meta( | ||||
|             ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url', | ||||
|              'twitter:player', 'jsonlink'), webpage, default=None) | ||||
|         if content_item_url: | ||||
|             content_item_id = self._search_regex( | ||||
|                 r'ContentItem-(%s)' % self._UUID_RE, content_item_url, | ||||
|                 'content item id', default=None) | ||||
|  | ||||
|         if not content_item_id: | ||||
|             content_item_id = self._search_regex( | ||||
|                 r'''(?x) | ||||
|                     (?: | ||||
|                         (?:initEdizione|drawMediaRaiTV)\(| | ||||
|                         <(?:[^>]+\bdata-id|var\s+uniquename)= | ||||
|                     ) | ||||
|                     (["\']) | ||||
|                     (?:(?!\1).)*\bContentItem-(?P<id>%s) | ||||
|                 ''' % self._UUID_RE, | ||||
|                 webpage, 'content item id', default=None, group='id') | ||||
|  | ||||
|         content_item_ids = set() | ||||
|         if content_item_id: | ||||
|             return self._extract_from_content_id(content_item_id, url) | ||||
|             content_item_ids.add(content_item_id) | ||||
|         if video_id not in content_item_ids: | ||||
|             content_item_ids.add(video_id) | ||||
|  | ||||
|         relinker_url = compat_urlparse.urljoin(url, self._search_regex( | ||||
|             r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P<q1>[\'"])(?P<url>(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)', | ||||
|             webpage, 'relinker URL', group='url')) | ||||
|         formats = self._extract_relinker_formats(relinker_url, video_id) | ||||
|         self._sort_formats(formats) | ||||
|         for content_item_id in content_item_ids: | ||||
|             try: | ||||
|                 return self._extract_from_content_id(content_item_id, url) | ||||
|             except GeoRestrictedError: | ||||
|                 raise | ||||
|             except ExtractorError: | ||||
|                 pass | ||||
|  | ||||
|         relinker_url = self._search_regex( | ||||
|             r'''(?x) | ||||
|                 (?: | ||||
|                     var\s+videoURL| | ||||
|                     mediaInfo\.mediaUri | ||||
|                 )\s*=\s* | ||||
|                 ([\'"]) | ||||
|                 (?P<url> | ||||
|                     (?:https?:)? | ||||
|                     //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? | ||||
|                     (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 | ||||
|             ''', | ||||
|             webpage, 'relinker URL', group='url') | ||||
|  | ||||
|         relinker_info = self._extract_relinker_info( | ||||
|             urljoin(url, relinker_url), video_id) | ||||
|         self._sort_formats(relinker_info['formats']) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1', | ||||
|             webpage, 'title', group='title', default=None) or self._og_search_title(webpage) | ||||
|             webpage, 'title', group='title', | ||||
|             default=None) or self._og_search_title(webpage) | ||||
|  | ||||
|         return { | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|         info.update(relinker_info) | ||||
|  | ||||
|         return info | ||||
|   | ||||
							
								
								
									
										122
									
								
								youtube_dl/extractor/redbulltv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								youtube_dl/extractor/redbulltv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
|     # unified_timestamp, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RedBullTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?P<id>AP-\w+)' | ||||
|     _TESTS = [{ | ||||
|         # film | ||||
|         'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', | ||||
|         'md5': 'fb0445b98aa4394e504b413d98031d1f', | ||||
|         'info_dict': { | ||||
|             'id': 'AP-1Q756YYX51W11', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'ABC of...WRC', | ||||
|             'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', | ||||
|             'duration': 1582.04, | ||||
|             # 'timestamp': 1488405786, | ||||
|             # 'upload_date': '20170301', | ||||
|         }, | ||||
|     }, { | ||||
|         # episode | ||||
|         'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web', | ||||
|         'info_dict': { | ||||
|             'id': 'AP-1PMT5JCWH1W11', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Grime - Hashtags S2 E4', | ||||
|             'description': 'md5:334b741c8c1ce65be057eab6773c1cf5', | ||||
|             'duration': 904.6, | ||||
|             # 'timestamp': 1487290093, | ||||
|             # 'upload_date': '20170217', | ||||
|             'series': 'Hashtags', | ||||
|             'season_number': 2, | ||||
|             'episode_number': 4, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         session = self._download_json( | ||||
|             'https://api-v2.redbull.tv/session', video_id, | ||||
|             note='Downloading access token', query={ | ||||
|                 'build': '4.370.0', | ||||
|                 'category': 'personal_computer', | ||||
|                 'os_version': '1.0', | ||||
|                 'os_family': 'http', | ||||
|             }) | ||||
|         if session.get('code') == 'error': | ||||
|             raise ExtractorError('%s said: %s' % ( | ||||
|                 self.IE_NAME, session['message'])) | ||||
|         auth = '%s %s' % (session.get('token_type', 'Bearer'), session['access_token']) | ||||
|  | ||||
|         try: | ||||
|             info = self._download_json( | ||||
|                 'https://api-v2.redbull.tv/content/%s' % video_id, | ||||
|                 video_id, note='Downloading video information', | ||||
|                 headers={'Authorization': auth} | ||||
|             ) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: | ||||
|                 error_message = self._parse_json( | ||||
|                     e.cause.read().decode(), video_id)['message'] | ||||
|                 raise ExtractorError('%s said: %s' % ( | ||||
|                     self.IE_NAME, error_message), expected=True) | ||||
|             raise | ||||
|  | ||||
|         video = info['video_product'] | ||||
|  | ||||
|         title = info['title'].strip() | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             video['url'], video_id, 'mp4', 'm3u8_native') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for _, captions in (try_get( | ||||
|                 video, lambda x: x['attachments']['captions'], | ||||
|                 dict) or {}).items(): | ||||
|             if not captions or not isinstance(captions, list): | ||||
|                 continue | ||||
|             for caption in captions: | ||||
|                 caption_url = caption.get('url') | ||||
|                 if not caption_url: | ||||
|                     continue | ||||
|                 ext = caption.get('format') | ||||
|                 if ext == 'xml': | ||||
|                     ext = 'ttml' | ||||
|                 subtitles.setdefault(caption.get('lang') or 'en', []).append({ | ||||
|                     'url': caption_url, | ||||
|                     'ext': ext, | ||||
|                 }) | ||||
|  | ||||
|         subheading = info.get('subheading') | ||||
|         if subheading: | ||||
|             title += ' - %s' % subheading | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': info.get('long_description') or info.get( | ||||
|                 'short_description'), | ||||
|             'duration': float_or_none(video.get('duration'), scale=1000), | ||||
|             # 'timestamp': unified_timestamp(info.get('published')), | ||||
|             'series': info.get('show_title'), | ||||
|             'season_number': int_or_none(info.get('season_number')), | ||||
|             'episode_number': int_or_none(info.get('episode_number')), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
| @@ -17,7 +17,7 @@ from ..utils import ( | ||||
| class RutubeIE(InfoExtractor): | ||||
|     IE_NAME = 'rutube' | ||||
|     IE_DESC = 'Rutube videos' | ||||
|     _VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P<id>[\da-z]{32})' | ||||
|     _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', | ||||
| @@ -39,8 +39,17 @@ class RutubeIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(webpage): | ||||
|         return [mobj.group('url') for mobj in re.finditer( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1', | ||||
|             webpage)] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video = self._download_json( | ||||
|   | ||||
| @@ -82,6 +82,9 @@ class RuutuIE(InfoExtractor): | ||||
|                         formats.extend(self._extract_f4m_formats( | ||||
|                             video_url, video_id, f4m_id='hds', fatal=False)) | ||||
|                     elif ext == 'mpd': | ||||
|                         # video-only and audio-only streams are of different | ||||
|                         # duration resulting in out of sync issue | ||||
|                         continue | ||||
|                         formats.extend(self._extract_mpd_formats( | ||||
|                             video_url, video_id, mpd_id='dash', fatal=False)) | ||||
|                     else: | ||||
|   | ||||
| @@ -1,57 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class SciVeeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.scivee.tv/node/62352', | ||||
|         'md5': 'b16699b74c9e6a120f6772a44960304f', | ||||
|         'info_dict': { | ||||
|             'id': '62352', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting', | ||||
|             'description': 'md5:81f1710638e11a481358fab1b11059d7', | ||||
|         }, | ||||
|         'skip': 'Not accessible from Travis CI server', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         # annotations XML is malformed | ||||
|         annotations = self._download_webpage( | ||||
|             'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations') | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>([^<]+)</title>', annotations, 'title') | ||||
|         description = self._html_search_regex(r'<abstract>([^<]+)</abstract>', annotations, 'abstract', fatal=False) | ||||
|         filesize = int_or_none(self._html_search_regex( | ||||
|             r'<filesize>([^<]+)</filesize>', annotations, 'filesize', fatal=False)) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': 'http://www.scivee.tv/assets/audio/%s' % video_id, | ||||
|                 'ext': 'mp3', | ||||
|                 'format_id': 'audio', | ||||
|             }, | ||||
|             { | ||||
|                 'url': 'http://www.scivee.tv/assets/video/%s' % video_id, | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': 'video', | ||||
|                 'filesize': filesize, | ||||
|             }, | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -89,7 +89,7 @@ class SenateISVPIE(InfoExtractor): | ||||
|     @staticmethod | ||||
|     def _search_iframe_url(webpage): | ||||
|         mobj = re.search( | ||||
|             r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", | ||||
|             r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|   | ||||
							
								
								
									
										42
									
								
								youtube_dl/extractor/skylinewebcams.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								youtube_dl/extractor/skylinewebcams.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SkylineWebcamsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html', | ||||
|         'info_dict': { | ||||
|             'id': 'scalinata-piazza-di-spagna-barcaccia', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         stream_url = self._search_regex( | ||||
|             r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, | ||||
|             'stream url', group='url') | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': stream_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._live_title(title), | ||||
|             'description': description, | ||||
|             'is_live': True, | ||||
|         } | ||||
| @@ -108,12 +108,11 @@ class SohuIE(InfoExtractor): | ||||
|         if vid_data['play'] != 1: | ||||
|             if vid_data.get('status') == 12: | ||||
|                 raise ExtractorError( | ||||
|                     'Sohu said: There\'s something wrong in the video.', | ||||
|                     '%s said: There\'s something wrong in the video.' % self.IE_NAME, | ||||
|                     expected=True) | ||||
|             else: | ||||
|                 raise ExtractorError( | ||||
|                     'Sohu said: The video is only licensed to users in Mainland China.', | ||||
|                     expected=True) | ||||
|                 self.raise_geo_restricted( | ||||
|                     '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME) | ||||
|  | ||||
|         formats_json = {} | ||||
|         for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'): | ||||
|   | ||||
| @@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor): | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA' | ||||
|     _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z' | ||||
|     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' | ||||
|  | ||||
|     @staticmethod | ||||
|   | ||||
| @@ -65,7 +65,7 @@ class StreamableIE(InfoExtractor): | ||||
|         # to return video info like the title properly sometimes, and doesn't | ||||
|         # include info like the video duration | ||||
|         video = self._download_json( | ||||
|             'https://streamable.com/ajax/videos/%s' % video_id, video_id) | ||||
|             'https://ajax.streamable.com/videos/%s' % video_id, video_id) | ||||
|  | ||||
|         # Format IDs: | ||||
|         # 0 The video is being uploaded | ||||
|   | ||||
| @@ -44,6 +44,10 @@ class TelecincoIE(MiTeleBaseIE): | ||||
|     }, { | ||||
|         'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # ooyala video | ||||
|         'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -2,15 +2,17 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     smuggle_url, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TeleQuebecIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york', | ||||
|         'md5': 'fe95a0957e5707b1b01f5013e725c90f', | ||||
|         'info_dict': { | ||||
| @@ -18,10 +20,14 @@ class TeleQuebecIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Le couronnement de New York', | ||||
|             'description': 'md5:f5b3d27a689ec6c1486132b2d687d432', | ||||
|             'upload_date': '20160220', | ||||
|             'timestamp': 1455965438, | ||||
|             'upload_date': '20170201', | ||||
|             'timestamp': 1485972222, | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         # no description | ||||
|         'url': 'http://zonevideo.telequebec.tv/media/30261', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         media_id = self._match_id(url) | ||||
| @@ -31,9 +37,13 @@ class TeleQuebecIE(InfoExtractor): | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': media_id, | ||||
|             'url': smuggle_url('limelight:media:' + media_data['streamInfo']['sourceId'], {'geo_countries': ['CA']}), | ||||
|             'url': smuggle_url( | ||||
|                 'limelight:media:' + media_data['streamInfo']['sourceId'], | ||||
|                 {'geo_countries': ['CA']}), | ||||
|             'title': media_data['title'], | ||||
|             'description': media_data.get('descriptions', [{'text': None}])[0].get('text'), | ||||
|             'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000), | ||||
|             'description': try_get( | ||||
|                 media_data, lambda x: x['descriptions'][0]['text'], compat_str), | ||||
|             'duration': int_or_none( | ||||
|                 media_data.get('durationInMilliseconds'), 1000), | ||||
|             'ie_key': 'LimelightMedia', | ||||
|         } | ||||
|   | ||||
| @@ -3,7 +3,10 @@ from __future__ import unicode_literals | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import qualities | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     qualities, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TheSceneIE(InfoExtractor): | ||||
| @@ -16,6 +19,11 @@ class TheSceneIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear', | ||||
|             'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear', | ||||
|             'duration': 127, | ||||
|             'series': 'Style.com Fashion Shows', | ||||
|             'season': 'Ready To Wear Spring 2013', | ||||
|             'tags': list, | ||||
|             'categories': list, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -32,21 +40,29 @@ class TheSceneIE(InfoExtractor): | ||||
|         player = self._download_webpage(player_url, display_id) | ||||
|         info = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), | ||||
|                 r'(?m)video\s*:\s*({.+?}),$', player, 'info json'), | ||||
|             display_id) | ||||
|  | ||||
|         video_id = info['id'] | ||||
|         title = info['title'] | ||||
|  | ||||
|         qualities_order = qualities(('low', 'high')) | ||||
|         formats = [{ | ||||
|             'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), | ||||
|             'url': f['src'], | ||||
|             'quality': qualities_order(f['quality']), | ||||
|         } for f in info['sources'][0]] | ||||
|         } for f in info['sources']] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': info['id'], | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': info['title'], | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': info.get('poster_frame'), | ||||
|             'duration': int_or_none(info.get('duration')), | ||||
|             'series': info.get('series_title'), | ||||
|             'season': info.get('season_title'), | ||||
|             'tags': info.get('tags'), | ||||
|             'categories': info.get('categories'), | ||||
|         } | ||||
|   | ||||
							
								
								
									
										81
									
								
								youtube_dl/extractor/toongoggles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								youtube_dl/extractor/toongoggles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ToonGogglesIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?toongoggles\.com/shows/(?P<show_id>\d+)(?:/[^/]+/episodes/(?P<episode_id>\d+))?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.toongoggles.com/shows/217143/bernard-season-2/episodes/217147/football', | ||||
|         'md5': '18289fc2b951eff6b953a9d8f01e6831', | ||||
|         'info_dict': { | ||||
|             'id': '217147', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Football', | ||||
|             'uploader_id': '1', | ||||
|             'description': 'Bernard decides to play football in order to be better than Lloyd and tries to beat him no matter how, he even cheats.', | ||||
|             'upload_date': '20160718', | ||||
|             'timestamp': 1468879330, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.toongoggles.com/shows/227759/om-nom-stories-around-the-world', | ||||
|         'info_dict': { | ||||
|             'id': '227759', | ||||
|             'title': 'Om Nom Stories Around The World', | ||||
|         }, | ||||
|         'playlist_mincount': 11, | ||||
|     }] | ||||
|  | ||||
|     def _call_api(self, action, page_id, query): | ||||
|         query.update({ | ||||
|             'for_ng': 1, | ||||
|             'for_web': 1, | ||||
|             'show_meta': 1, | ||||
|             'version': 7.0, | ||||
|         }) | ||||
|         return self._download_json('http://api.toongoggles.com/' + action, page_id, query=query) | ||||
|  | ||||
|     def _parse_episode_data(self, episode_data): | ||||
|         title = episode_data['episode_name'] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': episode_data['episode_id'], | ||||
|             'title': title, | ||||
|             'url': 'kaltura:513551:' + episode_data['entry_id'], | ||||
|             'thumbnail': episode_data.get('thumbnail_url'), | ||||
|             'description': episode_data.get('description'), | ||||
|             'duration': parse_duration(episode_data.get('hms')), | ||||
|             'series': episode_data.get('show_name'), | ||||
|             'season_number': int_or_none(episode_data.get('season_num')), | ||||
|             'episode_id': episode_data.get('episode_id'), | ||||
|             'episode': title, | ||||
|             'episode_number': int_or_none(episode_data.get('episode_num')), | ||||
|             'categories': episode_data.get('categories'), | ||||
|             'ie_key': 'Kaltura', | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_id, episode_id = re.match(self._VALID_URL, url).groups() | ||||
|         if episode_id: | ||||
|             episode_data = self._call_api('search', episode_id, { | ||||
|                 'filter': 'episode', | ||||
|                 'id': episode_id, | ||||
|             })['objects'][0] | ||||
|             return self._parse_episode_data(episode_data) | ||||
|         else: | ||||
|             show_data = self._call_api('getepisodesbyshow', show_id, { | ||||
|                 'max': 1000000000, | ||||
|                 'showid': show_id, | ||||
|             }) | ||||
|             entries = [] | ||||
|             for episode_data in show_data.get('objects', []): | ||||
|                 entries.append(self._parse_episode_data(episode_data)) | ||||
|             return self.playlist_result(entries, show_id, show_data.get('show_name')) | ||||
| @@ -16,6 +16,7 @@ class TubiTvIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)' | ||||
|     _LOGIN_URL = 'http://tubitv.com/login' | ||||
|     _NETRC_MACHINE = 'tubitv' | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|     _TEST = { | ||||
|         'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', | ||||
|         'md5': '43ac06be9326f41912dc64ccf7a80320', | ||||
|   | ||||
							
								
								
									
										90
									
								
								youtube_dl/extractor/tunepk.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								youtube_dl/extractor/tunepk.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,90 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TunePkIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)| | ||||
|                             embed\.tune\.pk/play/ | ||||
|                         ) | ||||
|                         (?P<id>\d+) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins', | ||||
|         'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55', | ||||
|         'info_dict': { | ||||
|             'id': '6919541', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins', | ||||
|             'description': 'md5:eb5a04114fafef5cec90799a93a2d09c', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1487327564, | ||||
|             'upload_date': '20170217', | ||||
|             'uploader': 'Movie Trailers', | ||||
|             'duration': 107, | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'https://tune.pk/video/%s' % video_id, video_id) | ||||
|  | ||||
|         details = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'), | ||||
|             video_id)['details'] | ||||
|  | ||||
|         video = details['video'] | ||||
|         title = video.get('title') or self._og_search_title( | ||||
|             webpage, default=None) or self._html_search_meta( | ||||
|             'title', webpage, 'title', fatal=True) | ||||
|  | ||||
|         formats = self._parse_jwplayer_formats( | ||||
|             details['player']['sources'], video_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = self._og_search_description( | ||||
|             webpage, default=None) or self._html_search_meta( | ||||
|             'description', webpage, 'description') | ||||
|  | ||||
|         thumbnail = video.get('thumb') or self._og_search_thumbnail( | ||||
|             webpage, default=None) or self._html_search_meta( | ||||
|             'thumbnail', webpage, 'thumbnail') | ||||
|  | ||||
|         timestamp = unified_timestamp(video.get('date_added')) | ||||
|         uploader = try_get( | ||||
|             video, lambda x: x['uploader']['name'], | ||||
|             compat_str) or self._html_search_meta('author', webpage, 'author') | ||||
|  | ||||
|         duration = int_or_none(video.get('duration')) | ||||
|         view_count = int_or_none(video.get('views')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										79
									
								
								youtube_dl/extractor/tv5mondeplus.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								youtube_dl/extractor/tv5mondeplus.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,79 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     extract_attributes, | ||||
|     get_element_by_class, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TV5MondePlusIE(InfoExtractor): | ||||
|     IE_DESC = 'TV5MONDE+' | ||||
|     _VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants', | ||||
|         'md5': '12130fc199f020673138a83466542ec6', | ||||
|         'info_dict': { | ||||
|             'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tdah, mon amour - Enfants', | ||||
|             'description': 'md5:230e3aca23115afcf8006d1bece6df74', | ||||
|             'upload_date': '20170401', | ||||
|             'timestamp': 1491022860, | ||||
|         } | ||||
|     } | ||||
|     _GEO_BYPASS = False | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage: | ||||
|             self.raise_geo_restricted(countries=['FR']) | ||||
|  | ||||
|         series = get_element_by_class('video-detail__title', webpage) | ||||
|         title = episode = get_element_by_class( | ||||
|             'video-detail__subtitle', webpage) or series | ||||
|         if series and series != title: | ||||
|             title = '%s - %s' % (series, title) | ||||
|         vpl_data = extract_attributes(self._search_regex( | ||||
|             r'(<[^>]+class="video_player_loader"[^>]+>)', | ||||
|             webpage, 'video player loader')) | ||||
|  | ||||
|         video_files = self._parse_json( | ||||
|             vpl_data['data-broadcast'], display_id).get('files', []) | ||||
|         formats = [] | ||||
|         for video_file in video_files: | ||||
|             v_url = video_file.get('url') | ||||
|             if not v_url: | ||||
|                 continue | ||||
|             video_format = video_file.get('format') or determine_ext(v_url) | ||||
|             if video_format == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     v_url, display_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': v_url, | ||||
|                     'format_id': video_format, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': display_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': clean_html(get_element_by_class('video-detail__description', webpage)), | ||||
|             'thumbnail': vpl_data.get('data-image'), | ||||
|             'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)), | ||||
|             'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)), | ||||
|             'formats': formats, | ||||
|             'episode': episode, | ||||
|             'series': series, | ||||
|         } | ||||
| @@ -17,6 +17,9 @@ class TvigleIE(InfoExtractor): | ||||
|     IE_DESC = 'Интернет-телевидение Tvigle.ru' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))' | ||||
|  | ||||
|     _GEO_BYPASS = False | ||||
|     _GEO_COUNTRIES = ['RU'] | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tvigle.ru/video/sokrat/', | ||||
| @@ -72,8 +75,13 @@ class TvigleIE(InfoExtractor): | ||||
|  | ||||
|         error_message = item.get('errorMessage') | ||||
|         if not videos and error_message: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) | ||||
|             if item.get('isGeoBlocked') is True: | ||||
|                 self.raise_geo_restricted( | ||||
|                     msg=error_message, countries=self._GEO_COUNTRIES) | ||||
|             else: | ||||
|                 raise ExtractorError( | ||||
|                     '%s returned error: %s' % (self.IE_NAME, error_message), | ||||
|                     expected=True) | ||||
|  | ||||
|         title = item['title'] | ||||
|         description = item.get('description') | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user