Compare commits
	
		
			114 Commits
		
	
	
		
			2017.03.22
			...
			2017.04.11
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					1730878167 | ||
| 
						 | 
					689cd458a6 | ||
| 
						 | 
					6b9466de2f | ||
| 
						 | 
					61568e50cf | ||
| 
						 | 
					364a69e8c6 | ||
| 
						 | 
					6240925b40 | ||
| 
						 | 
					964744af95 | ||
| 
						 | 
					1af959ef9f | ||
| 
						 | 
					a206ef62df | ||
| 
						 | 
					3f2ce6896a | ||
| 
						 | 
					a6f7263cf4 | ||
| 
						 | 
					4372436504 | ||
| 
						 | 
					eb8cc8ea3b | ||
| 
						 | 
					41b263ac8a | ||
| 
						 | 
					ca8fca9d9d | ||
| 
						 | 
					e129fa0846 | ||
| 
						 | 
					2bd875edfe | ||
| 
						 | 
					95152630db | ||
| 
						 | 
					04e431cf97 | ||
| 
						 | 
					1591ba258a | ||
| 
						 | 
					29c6726646 | ||
| 
						 | 
					a66e25859a | ||
| 
						 | 
					c93c0fc2fd | ||
| 
						 | 
					90e3f18fc1 | ||
| 
						 | 
					5f3e0b69ef | ||
| 
						 | 
					28b674ca23 | ||
| 
						 | 
					e18f1da97a | ||
| 
						 | 
					78280352ca | ||
| 
						 | 
					a01825a541 | ||
| 
						 | 
					f8f2da25ab | ||
| 
						 | 
					4c03973296 | ||
| 
						 | 
					60e5016199 | ||
| 
						 | 
					c4d6fc6d65 | ||
| 
						 | 
					1b3feca0a7 | ||
| 
						 | 
					80b2fdf9ac | ||
| 
						 | 
					3bef10a50c | ||
| 
						 | 
					a84da06f49 | ||
| 
						 | 
					3461f5db06 | ||
| 
						 | 
					0378b8b917 | ||
| 
						 | 
					7f04386b89 | ||
| 
						 | 
					fac39cccd4 | ||
| 
						 | 
					b68e00b08a | ||
| 
						 | 
					2ab0bfcd81 | ||
| 
						 | 
					b022f4f600 | ||
| 
						 | 
					e2435ba5f3 | ||
| 
						 | 
					a9bb61a425 | ||
| 
						 | 
					dbf70c489f | ||
| 
						 | 
					61e2331ad8 | ||
| 
						 | 
					fd47550885 | ||
| 
						 | 
					4457823dda | ||
| 
						 | 
					b3633fa0ce | ||
| 
						 | 
					b56e41a701 | ||
| 
						 | 
					a76c25146a | ||
| 
						 | 
					361f293ab8 | ||
| 
						 | 
					b8d8cced9b | ||
| 
						 | 
					51342717cd | ||
| 
						 | 
					48ab554feb | ||
| 
						 | 
					a6f3a162f3 | ||
| 
						 | 
					91399b2fcc | ||
| 
						 | 
					eecea00d36 | ||
| 
						 | 
					2cd668ee59 | ||
| 
						 | 
					ca77b92f94 | ||
| 
						 | 
					e97fc8d6b8 | ||
| 
						 | 
					be61efdf17 | ||
| 
						 | 
					77c8ebe631 | ||
| 
						 | 
					7453999580 | ||
| 
						 | 
					1640eb0961 | ||
| 
						 | 
					3e943cfe09 | ||
| 
						 | 
					82be732b17 | ||
| 
						 | 
					639e5b2a84 | ||
| 
						 | 
					128244657b | ||
| 
						 | 
					12ee65ea0d | ||
| 
						 | 
					aea1dccbd0 | ||
| 
						 | 
					9e691da067 | ||
| 
						 | 
					82eefd0be0 | ||
| 
						 | 
					f7923a4c39 | ||
| 
						 | 
					cc63259d18 | ||
| 
						 | 
					2bfaf89b6c | ||
| 
						 | 
					4f06c1c9fc | ||
| 
						 | 
					942b44a052 | ||
| 
						 | 
					a426ef6d78 | ||
| 
						 | 
					41c5e60dd5 | ||
| 
						 | 
					d212c93d16 | ||
| 
						 | 
					15495cf3e5 | ||
| 
						 | 
					5b7cc56b05 | ||
| 
						 | 
					590bc6f6a1 | ||
| 
						 | 
					51098426b8 | ||
| 
						 | 
					c73e330e7a | ||
| 
						 | 
					fb4fc44928 | ||
| 
						 | 
					03486dbb01 | ||
| 
						 | 
					51ef4919df | ||
| 
						 | 
					d66d43c554 | ||
| 
						 | 
					610a6d1053 | ||
| 
						 | 
					c6c22e984d | ||
| 
						 | 
					d97729c83a | ||
| 
						 | 
					7aa0ee321b | ||
| 
						 | 
					e8e4cc5a6a | ||
| 
						 | 
					c7301e677b | ||
| 
						 | 
					048086920b | ||
| 
						 | 
					1088d76da6 | ||
| 
						 | 
					31a1214076 | ||
| 
						 | 
					d0ba55871e | ||
| 
						 | 
					54b960f340 | ||
| 
						 | 
					a3ccd6bd11 | ||
| 
						 | 
					7963b6cba8 | ||
| 
						 | 
					bea7af6947 | ||
| 
						 | 
					a5d783f525 | ||
| 
						 | 
					d0572557c2 | ||
| 
						 | 
					52d5ecabd5 | ||
| 
						 | 
					b0f7f21cb9 | ||
| 
						 | 
					579c99a284 | ||
| 
						 | 
					ca5ed022e9 | ||
| 
						 | 
					391d076d7c | ||
| 
						 | 
					c183e14f89 | 
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							@@ -6,8 +6,8 @@
 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
 | 
			
		||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.22**
 | 
			
		||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
 | 
			
		||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.11**
 | 
			
		||||
 | 
			
		||||
### Before submitting an *issue* make sure you have:
 | 
			
		||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
 | 
			
		||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 | 
			
		||||
[debug] User config: []
 | 
			
		||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 | 
			
		||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
 | 
			
		||||
[debug] youtube-dl version 2017.03.22
 | 
			
		||||
[debug] youtube-dl version 2017.04.11
 | 
			
		||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 | 
			
		||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 | 
			
		||||
[debug] Proxy map: {}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										104
									
								
								ChangeLog
									
									
									
									
									
								
							
							
						
						
									
										104
									
								
								ChangeLog
									
									
									
									
									
								
							@@ -1,3 +1,107 @@
 | 
			
		||||
version 2017.04.11
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
* [afreecatv] Fix extraction (#12706)
 | 
			
		||||
+ [generic] Add support for <object> YouTube embeds (#12637)
 | 
			
		||||
* [bbccouk] Treat bitrate as audio+video bitrate in media selector
 | 
			
		||||
+ [bbccouk] Skip unrecognized formats in media selector (#12701)
 | 
			
		||||
+ [bbccouk] Add support for https protocol in media selector (#12701)
 | 
			
		||||
* [curiositystream] Fix extraction (#12638)
 | 
			
		||||
* [adn] Update subtitle decryption key
 | 
			
		||||
* [chaturbate] Fix extraction (#12665, #12688, #12690)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
version 2017.04.09
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
+ [medici] Add support for medici.tv (#3406)
 | 
			
		||||
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
 | 
			
		||||
+ [npo:live] Add support for default URL (#12555)
 | 
			
		||||
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
 | 
			
		||||
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
 | 
			
		||||
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
 | 
			
		||||
* [ceskateleveize] Improve extraction and remove URL replacement hacks
 | 
			
		||||
+ [kaltura] Add support for iframe embeds (#12679)
 | 
			
		||||
* [airmozilla] Fix extraction (#12670)
 | 
			
		||||
* [wshh] Extract html5 entries and delegate to generic extractor (12676)
 | 
			
		||||
+ [raiplay] Extract subtitles
 | 
			
		||||
+ [xfileshare] Add support for vidlo.us (#12660)
 | 
			
		||||
+ [xfileshare] Add support for vidbom.com (#12661)
 | 
			
		||||
+ [aenetworks] Add more video URL regular expressions (#12657)
 | 
			
		||||
+ [odnoklassniki] Fix format sorting for 1080p quality
 | 
			
		||||
+ [rtl2] Add support for you.rtl2.de (#10257)
 | 
			
		||||
+ [vshare] Add support for vshare.io (#12278)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
version 2017.04.03
 | 
			
		||||
 | 
			
		||||
Core
 | 
			
		||||
+ [extractor/common] Add censorship check for TransTelekom ISP
 | 
			
		||||
* [extractor/common] Move censorship checks to a separate method
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
+ [discoveryvr] Add support for discoveryvr.com (#12578)
 | 
			
		||||
+ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
 | 
			
		||||
+ [periscope] Add support for pscp.tv URLs (#12618, #12625)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
version 2017.04.02
 | 
			
		||||
 | 
			
		||||
Core
 | 
			
		||||
* [YoutubeDL] Return early when extraction of url_transparent fails
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
* [rai] Fix and improve extraction (#11790)
 | 
			
		||||
+ [vrv] Add support for series pages
 | 
			
		||||
* [limelight] Improve extraction for audio only formats
 | 
			
		||||
* [funimation] Fix extraction (#10696, #11773)
 | 
			
		||||
+ [xfileshare] Add support for vidabc.com (#12589)
 | 
			
		||||
+ [xfileshare] Improve extraction and extract hls formats
 | 
			
		||||
+ [crunchyroll] Pass geo verifcation proxy
 | 
			
		||||
+ [cwtv] Extract ISM formats
 | 
			
		||||
+ [tvplay] Bypass geo restriction
 | 
			
		||||
+ [vrv] Add support for vrv.co
 | 
			
		||||
+ [packtpub] Add support for packtpub.com (#12610)
 | 
			
		||||
+ [generic] Pass base_url to _parse_jwplayer_data
 | 
			
		||||
+ [adn] Add support for animedigitalnetwork.fr (#4866)
 | 
			
		||||
+ [allocine] Extract more metadata
 | 
			
		||||
* [allocine] Fix extraction (#12592)
 | 
			
		||||
* [openload] Fix extraction
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
version 2017.03.26
 | 
			
		||||
 | 
			
		||||
Core
 | 
			
		||||
* Don't raise an error if JWPlayer config data is not a Javascript object
 | 
			
		||||
  literal. _find_jwplayer_data now returns a dict rather than an str. (#12307)
 | 
			
		||||
* Expand environment variables for options representing paths (#12556)
 | 
			
		||||
+ [utils] Introduce expand_path
 | 
			
		||||
* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
* [afreecatv] Fix extraction (#12179)
 | 
			
		||||
+ [atvat] Add support for atv.at (#5325)
 | 
			
		||||
+ [fox] Add metadata extraction (#12391)
 | 
			
		||||
+ [atresplayer] Extract DASH formats
 | 
			
		||||
+ [atresplayer] Extract HD manifest (#12548)
 | 
			
		||||
* [atresplayer] Fix login error detection (#12548)
 | 
			
		||||
* [franceculture] Fix extraction (#12547)
 | 
			
		||||
* [youtube] Improve URL regular expression (#12538)
 | 
			
		||||
* [generic] Do not follow redirects to the same URL
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
version 2017.03.24
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
- [9c9media] Remove mp4 URL extraction request
 | 
			
		||||
+ [bellmedia] Add support for etalk.ca and space.ca (#12447)
 | 
			
		||||
* [channel9] Fix extraction (#11323)
 | 
			
		||||
* [cloudy] Fix extraction (#12525)
 | 
			
		||||
+ [hbo] Add support for free episode URLs and new formats extraction (#12519)
 | 
			
		||||
* [condenast] Fix extraction and style (#12526)
 | 
			
		||||
* [viu] Relax URL regular expression (#12529)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
version 2017.03.22
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
 
 | 
			
		||||
@@ -181,10 +181,10 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
 | 
			
		||||
    -R, --retries RETRIES            Number of retries (default is 10), or
 | 
			
		||||
                                     "infinite".
 | 
			
		||||
    --fragment-retries RETRIES       Number of retries for a fragment (default
 | 
			
		||||
                                     is 10), or "infinite" (DASH and hlsnative
 | 
			
		||||
                                     only)
 | 
			
		||||
    --skip-unavailable-fragments     Skip unavailable fragments (DASH and
 | 
			
		||||
                                     hlsnative only)
 | 
			
		||||
                                     is 10), or "infinite" (DASH, hlsnative and
 | 
			
		||||
                                     ISM)
 | 
			
		||||
    --skip-unavailable-fragments     Skip unavailable fragments (DASH, hlsnative
 | 
			
		||||
                                     and ISM)
 | 
			
		||||
    --abort-on-unavailable-fragment  Abort downloading when some fragment is not
 | 
			
		||||
                                     available
 | 
			
		||||
    --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)
 | 
			
		||||
 
 | 
			
		||||
@@ -28,6 +28,7 @@
 | 
			
		||||
 - **acast**
 | 
			
		||||
 - **acast:channel**
 | 
			
		||||
 - **AddAnime**
 | 
			
		||||
 - **ADN**: Anime Digital Network
 | 
			
		||||
 - **AdobeTV**
 | 
			
		||||
 - **AdobeTVChannel**
 | 
			
		||||
 - **AdobeTVShow**
 | 
			
		||||
@@ -67,6 +68,7 @@
 | 
			
		||||
 - **arte.tv:playlist**
 | 
			
		||||
 - **AtresPlayer**
 | 
			
		||||
 - **ATTTechChannel**
 | 
			
		||||
 - **ATVAt**
 | 
			
		||||
 - **AudiMedia**
 | 
			
		||||
 - **AudioBoom**
 | 
			
		||||
 - **audiomack**
 | 
			
		||||
@@ -125,7 +127,7 @@
 | 
			
		||||
 - **CamWithHer**
 | 
			
		||||
 - **canalc2.tv**
 | 
			
		||||
 - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
 | 
			
		||||
 - **Canvas**
 | 
			
		||||
 - **Canvas**: canvas.be and een.be
 | 
			
		||||
 - **CarambaTV**
 | 
			
		||||
 - **CarambaTVPage**
 | 
			
		||||
 - **CartoonNetwork**
 | 
			
		||||
@@ -143,6 +145,7 @@
 | 
			
		||||
 - **CCTV**: 央视网
 | 
			
		||||
 - **CDA**
 | 
			
		||||
 - **CeskaTelevize**
 | 
			
		||||
 - **CeskaTelevizePorady**
 | 
			
		||||
 - **channel9**: Channel 9
 | 
			
		||||
 - **CharlieRose**
 | 
			
		||||
 - **Chaturbate**
 | 
			
		||||
@@ -211,6 +214,7 @@
 | 
			
		||||
 - **DiscoveryGo**
 | 
			
		||||
 - **DiscoveryGoPlaylist**
 | 
			
		||||
 - **DiscoveryNetworksDe**
 | 
			
		||||
 - **DiscoveryVR**
 | 
			
		||||
 - **Disney**
 | 
			
		||||
 - **Dotsub**
 | 
			
		||||
 - **DouyuTV**: 斗鱼
 | 
			
		||||
@@ -312,8 +316,8 @@
 | 
			
		||||
 - **GPUTechConf**
 | 
			
		||||
 - **Groupon**
 | 
			
		||||
 - **Hark**
 | 
			
		||||
 - **HBO**
 | 
			
		||||
 - **HBOEpisode**
 | 
			
		||||
 - **hbo**
 | 
			
		||||
 - **hbo:episode**
 | 
			
		||||
 - **HearThisAt**
 | 
			
		||||
 - **Heise**
 | 
			
		||||
 - **HellPorno**
 | 
			
		||||
@@ -428,6 +432,7 @@
 | 
			
		||||
 - **MDR**: MDR.DE and KiKA
 | 
			
		||||
 - **media.ccc.de**
 | 
			
		||||
 - **Medialaan**
 | 
			
		||||
 - **Medici**
 | 
			
		||||
 - **Meipai**: 美拍
 | 
			
		||||
 - **MelonVOD**
 | 
			
		||||
 - **META**
 | 
			
		||||
@@ -571,6 +576,8 @@
 | 
			
		||||
 - **orf:iptv**: iptv.ORF.at
 | 
			
		||||
 - **orf:oe1**: Radio Österreich 1
 | 
			
		||||
 - **orf:tvthek**: ORF TVthek
 | 
			
		||||
 - **PacktPub**
 | 
			
		||||
 - **PacktPubCourse**
 | 
			
		||||
 - **PandaTV**: 熊猫TV
 | 
			
		||||
 - **pandora.tv**: 판도라TV
 | 
			
		||||
 - **parliamentlive.tv**: UK parliament videos
 | 
			
		||||
@@ -628,7 +635,7 @@
 | 
			
		||||
 - **radiofrance**
 | 
			
		||||
 - **RadioJavan**
 | 
			
		||||
 - **Rai**
 | 
			
		||||
 - **RaiTV**
 | 
			
		||||
 - **RaiPlay**
 | 
			
		||||
 - **RBMARadio**
 | 
			
		||||
 - **RDS**: RDS.ca
 | 
			
		||||
 - **RedBullTV**
 | 
			
		||||
@@ -653,7 +660,9 @@
 | 
			
		||||
 - **rte**: Raidió Teilifís Éireann TV
 | 
			
		||||
 - **rte:radio**: Raidió Teilifís Éireann radio
 | 
			
		||||
 - **rtl.nl**: rtl.nl and rtlxl.nl
 | 
			
		||||
 - **RTL2**
 | 
			
		||||
 - **rtl2**
 | 
			
		||||
 - **rtl2:you**
 | 
			
		||||
 - **rtl2:you:series**
 | 
			
		||||
 - **RTP**
 | 
			
		||||
 - **RTS**: RTS.ch
 | 
			
		||||
 - **rtve.es:alacarta**: RTVE a la carta
 | 
			
		||||
@@ -775,6 +784,7 @@
 | 
			
		||||
 - **TheScene**
 | 
			
		||||
 - **TheSixtyOne**
 | 
			
		||||
 - **TheStar**
 | 
			
		||||
 - **TheSun**
 | 
			
		||||
 - **TheWeatherChannel**
 | 
			
		||||
 - **ThisAmericanLife**
 | 
			
		||||
 - **ThisAV**
 | 
			
		||||
@@ -811,6 +821,7 @@
 | 
			
		||||
 - **TV2Article**
 | 
			
		||||
 - **TV3**
 | 
			
		||||
 - **TV4**: tv4.se and tv4play.se
 | 
			
		||||
 - **TV5MondePlus**: TV5MONDE+
 | 
			
		||||
 - **TVA**
 | 
			
		||||
 - **TVANouvelles**
 | 
			
		||||
 - **TVANouvellesArticle**
 | 
			
		||||
@@ -887,7 +898,7 @@
 | 
			
		||||
 - **vidme:user**
 | 
			
		||||
 - **vidme:user:likes**
 | 
			
		||||
 - **Vidzi**
 | 
			
		||||
 - **vier**
 | 
			
		||||
 - **vier**: vier.be and vijf.be
 | 
			
		||||
 - **vier:videos**
 | 
			
		||||
 - **ViewLift**
 | 
			
		||||
 - **ViewLiftEmbed**
 | 
			
		||||
@@ -924,7 +935,10 @@
 | 
			
		||||
 - **Vporn**
 | 
			
		||||
 - **vpro**: npo.nl and ntr.nl
 | 
			
		||||
 - **Vrak**
 | 
			
		||||
 - **VRT**
 | 
			
		||||
 - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
 | 
			
		||||
 - **vrv**
 | 
			
		||||
 - **vrv:series**
 | 
			
		||||
 - **VShare**
 | 
			
		||||
 - **vube**: Vube.com
 | 
			
		||||
 - **VuClip**
 | 
			
		||||
 - **VVVVID**
 | 
			
		||||
@@ -952,7 +966,7 @@
 | 
			
		||||
 - **WSJ**: Wall Street Journal
 | 
			
		||||
 - **XBef**
 | 
			
		||||
 - **XboxClips**
 | 
			
		||||
 - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
 | 
			
		||||
 - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
 | 
			
		||||
 - **XHamster**
 | 
			
		||||
 - **XHamsterEmbed**
 | 
			
		||||
 - **xiami:album**: 虾米音乐 - 专辑
 | 
			
		||||
 
 | 
			
		||||
@@ -27,11 +27,11 @@ from youtube_dl.compat import (
 | 
			
		||||
class TestCompat(unittest.TestCase):
 | 
			
		||||
    def test_compat_getenv(self):
 | 
			
		||||
        test_str = 'тест'
 | 
			
		||||
        compat_setenv('YOUTUBE-DL-TEST', test_str)
 | 
			
		||||
        self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
 | 
			
		||||
        compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
 | 
			
		||||
        self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
 | 
			
		||||
 | 
			
		||||
    def test_compat_setenv(self):
 | 
			
		||||
        test_var = 'YOUTUBE-DL-TEST'
 | 
			
		||||
        test_var = 'YOUTUBE_DL_COMPAT_SETENV'
 | 
			
		||||
        test_str = 'тест'
 | 
			
		||||
        compat_setenv(test_var, test_str)
 | 
			
		||||
        compat_getenv(test_var)
 | 
			
		||||
 
 | 
			
		||||
@@ -71,6 +71,18 @@ class TestDownload(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    maxDiff = None
 | 
			
		||||
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        """Identify each test with the `add_ie` attribute, if available."""
 | 
			
		||||
 | 
			
		||||
        def strclass(cls):
 | 
			
		||||
            """From 2.7's unittest; 2.6 had _strclass so we can't import it."""
 | 
			
		||||
            return '%s.%s' % (cls.__module__, cls.__name__)
 | 
			
		||||
 | 
			
		||||
        add_ie = getattr(self, self._testMethodName).add_ie
 | 
			
		||||
        return '%s (%s)%s:' % (self._testMethodName,
 | 
			
		||||
                               strclass(self.__class__),
 | 
			
		||||
                               ' [%s]' % add_ie if add_ie else '')
 | 
			
		||||
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        self.defs = defs
 | 
			
		||||
 | 
			
		||||
@@ -139,7 +151,7 @@ def generator(test_case, tname):
 | 
			
		||||
            try_num = 1
 | 
			
		||||
            while True:
 | 
			
		||||
                try:
 | 
			
		||||
                    # We're not using .download here sine that is just a shim
 | 
			
		||||
                    # We're not using .download here since that is just a shim
 | 
			
		||||
                    # for outside error handling, and returns the exit code
 | 
			
		||||
                    # instead of the result dict.
 | 
			
		||||
                    res_dict = ydl.extract_info(
 | 
			
		||||
@@ -187,7 +199,16 @@ def generator(test_case, tname):
 | 
			
		||||
                self.assertEqual(
 | 
			
		||||
                    test_case['playlist_duration_sum'], got_duration)
 | 
			
		||||
 | 
			
		||||
            for tc in test_cases:
 | 
			
		||||
            # Generalize both playlists and single videos to unified format for
 | 
			
		||||
            # simplicity
 | 
			
		||||
            if 'entries' not in res_dict:
 | 
			
		||||
                res_dict['entries'] = [res_dict]
 | 
			
		||||
 | 
			
		||||
            for tc_num, tc in enumerate(test_cases):
 | 
			
		||||
                tc_res_dict = res_dict['entries'][tc_num]
 | 
			
		||||
                # First, check test cases' data against extracted data alone
 | 
			
		||||
                expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
 | 
			
		||||
                # Now, check downloaded file consistency
 | 
			
		||||
                tc_filename = get_tc_filename(tc)
 | 
			
		||||
                if not test_case.get('params', {}).get('skip_download', False):
 | 
			
		||||
                    self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
 | 
			
		||||
@@ -205,13 +226,14 @@ def generator(test_case, tname):
 | 
			
		||||
                    if 'md5' in tc:
 | 
			
		||||
                        md5_for_file = _file_md5(tc_filename)
 | 
			
		||||
                        self.assertEqual(md5_for_file, tc['md5'])
 | 
			
		||||
                # Finally, check test cases' data again but this time against
 | 
			
		||||
                # extracted data from info JSON file written during processing
 | 
			
		||||
                info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
 | 
			
		||||
                self.assertTrue(
 | 
			
		||||
                    os.path.exists(info_json_fn),
 | 
			
		||||
                    'Missing info file %s' % info_json_fn)
 | 
			
		||||
                with io.open(info_json_fn, encoding='utf-8') as infof:
 | 
			
		||||
                    info_dict = json.load(infof)
 | 
			
		||||
 | 
			
		||||
                expect_info_dict(self, info_dict, tc.get('info_dict', {}))
 | 
			
		||||
        finally:
 | 
			
		||||
            try_rm_tcs_files()
 | 
			
		||||
@@ -233,6 +255,8 @@ for n, test_case in enumerate(defs):
 | 
			
		||||
        i += 1
 | 
			
		||||
    test_method = generator(test_case, tname)
 | 
			
		||||
    test_method.__name__ = str(tname)
 | 
			
		||||
    ie_list = test_case.get('add_ie')
 | 
			
		||||
    test_method.add_ie = ie_list and ','.join(ie_list)
 | 
			
		||||
    setattr(TestDownload, test_method.__name__, test_method)
 | 
			
		||||
    del test_method
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
 | 
			
		||||
    NPOIE,
 | 
			
		||||
    ComedyCentralIE,
 | 
			
		||||
    NRKTVIE,
 | 
			
		||||
    RaiTVIE,
 | 
			
		||||
    RaiPlayIE,
 | 
			
		||||
    VikiIE,
 | 
			
		||||
    ThePlatformIE,
 | 
			
		||||
    ThePlatformFeedIE,
 | 
			
		||||
@@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
 | 
			
		||||
        self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestRaiSubtitles(BaseTestSubtitles):
 | 
			
		||||
    url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
 | 
			
		||||
    IE = RaiTVIE
 | 
			
		||||
class TestRaiPlaySubtitles(BaseTestSubtitles):
 | 
			
		||||
    url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
 | 
			
		||||
    IE = RaiPlayIE
 | 
			
		||||
 | 
			
		||||
    def test_allsubtitles(self):
 | 
			
		||||
        self.DL.params['writesubtitles'] = True
 | 
			
		||||
 
 | 
			
		||||
@@ -56,6 +56,7 @@ from youtube_dl.utils import (
 | 
			
		||||
    read_batch_urls,
 | 
			
		||||
    sanitize_filename,
 | 
			
		||||
    sanitize_path,
 | 
			
		||||
    expand_path,
 | 
			
		||||
    prepend_extension,
 | 
			
		||||
    replace_extension,
 | 
			
		||||
    remove_start,
 | 
			
		||||
@@ -95,6 +96,8 @@ from youtube_dl.utils import (
 | 
			
		||||
from youtube_dl.compat import (
 | 
			
		||||
    compat_chr,
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_getenv,
 | 
			
		||||
    compat_setenv,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
)
 | 
			
		||||
@@ -214,6 +217,18 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(sanitize_path('./abc'), 'abc')
 | 
			
		||||
        self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 | 
			
		||||
 | 
			
		||||
    def test_expand_path(self):
 | 
			
		||||
        def env(var):
 | 
			
		||||
            return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
 | 
			
		||||
 | 
			
		||||
        compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
 | 
			
		||||
        self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
 | 
			
		||||
        self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
 | 
			
		||||
        self.assertEqual(expand_path('~'), compat_getenv('HOME'))
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
 | 
			
		||||
            '%s/expanded' % compat_getenv('HOME'))
 | 
			
		||||
 | 
			
		||||
    def test_prepend_extension(self):
 | 
			
		||||
        self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
 | 
			
		||||
        self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,6 @@ import random
 | 
			
		||||
from .compat import (
 | 
			
		||||
    compat_basestring,
 | 
			
		||||
    compat_cookiejar,
 | 
			
		||||
    compat_expanduser,
 | 
			
		||||
    compat_get_terminal_size,
 | 
			
		||||
    compat_http_client,
 | 
			
		||||
    compat_kwargs,
 | 
			
		||||
@@ -54,6 +53,7 @@ from .utils import (
 | 
			
		||||
    encode_compat_str,
 | 
			
		||||
    encodeFilename,
 | 
			
		||||
    error_to_compat_str,
 | 
			
		||||
    expand_path,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    format_bytes,
 | 
			
		||||
    formatSeconds,
 | 
			
		||||
@@ -672,7 +672,7 @@ class YoutubeDL(object):
 | 
			
		||||
                        FORMAT_RE.format(numeric_field),
 | 
			
		||||
                        r'%({0})s'.format(numeric_field), outtmpl)
 | 
			
		||||
 | 
			
		||||
            tmpl = compat_expanduser(outtmpl)
 | 
			
		||||
            tmpl = expand_path(outtmpl)
 | 
			
		||||
            filename = tmpl % template_dict
 | 
			
		||||
            # Temporary fix for #4787
 | 
			
		||||
            # 'Treat' all problem characters by passing filename through preferredencoding
 | 
			
		||||
@@ -837,6 +837,12 @@ class YoutubeDL(object):
 | 
			
		||||
                ie_result['url'], ie_key=ie_result.get('ie_key'),
 | 
			
		||||
                extra_info=extra_info, download=False, process=False)
 | 
			
		||||
 | 
			
		||||
            # extract_info may return None when ignoreerrors is enabled and
 | 
			
		||||
            # extraction failed with an error, don't crash and return early
 | 
			
		||||
            # in this case
 | 
			
		||||
            if not info:
 | 
			
		||||
                return info
 | 
			
		||||
 | 
			
		||||
            force_properties = dict(
 | 
			
		||||
                (k, v) for k, v in ie_result.items() if v is not None)
 | 
			
		||||
            for f in ('_type', 'url', 'ie_key'):
 | 
			
		||||
@@ -2170,7 +2176,7 @@ class YoutubeDL(object):
 | 
			
		||||
        if opts_cookiefile is None:
 | 
			
		||||
            self.cookiejar = compat_cookiejar.CookieJar()
 | 
			
		||||
        else:
 | 
			
		||||
            opts_cookiefile = compat_expanduser(opts_cookiefile)
 | 
			
		||||
            opts_cookiefile = expand_path(opts_cookiefile)
 | 
			
		||||
            self.cookiejar = compat_cookiejar.MozillaCookieJar(
 | 
			
		||||
                opts_cookiefile)
 | 
			
		||||
            if os.access(opts_cookiefile, os.R_OK):
 | 
			
		||||
 
 | 
			
		||||
@@ -16,7 +16,6 @@ from .options import (
 | 
			
		||||
    parseOpts,
 | 
			
		||||
)
 | 
			
		||||
from .compat import (
 | 
			
		||||
    compat_expanduser,
 | 
			
		||||
    compat_getpass,
 | 
			
		||||
    compat_shlex_split,
 | 
			
		||||
    workaround_optparse_bug9161,
 | 
			
		||||
@@ -26,6 +25,7 @@ from .utils import (
 | 
			
		||||
    decodeOption,
 | 
			
		||||
    DEFAULT_OUTTMPL,
 | 
			
		||||
    DownloadError,
 | 
			
		||||
    expand_path,
 | 
			
		||||
    match_filter_func,
 | 
			
		||||
    MaxDownloadsReached,
 | 
			
		||||
    preferredencoding,
 | 
			
		||||
@@ -88,7 +88,7 @@ def _real_main(argv=None):
 | 
			
		||||
                batchfd = sys.stdin
 | 
			
		||||
            else:
 | 
			
		||||
                batchfd = io.open(
 | 
			
		||||
                    compat_expanduser(opts.batchfile),
 | 
			
		||||
                    expand_path(opts.batchfile),
 | 
			
		||||
                    'r', encoding='utf-8', errors='ignore')
 | 
			
		||||
            batch_urls = read_batch_urls(batchfd)
 | 
			
		||||
            if opts.verbose:
 | 
			
		||||
@@ -238,7 +238,7 @@ def _real_main(argv=None):
 | 
			
		||||
 | 
			
		||||
    any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
 | 
			
		||||
    any_printing = opts.print_json
 | 
			
		||||
    download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 | 
			
		||||
    download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 | 
			
		||||
 | 
			
		||||
    # PostProcessors
 | 
			
		||||
    postprocessors = []
 | 
			
		||||
@@ -449,7 +449,7 @@ def _real_main(argv=None):
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            if opts.load_info_filename is not None:
 | 
			
		||||
                retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
 | 
			
		||||
                retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
 | 
			
		||||
            else:
 | 
			
		||||
                retcode = ydl.download(all_urls)
 | 
			
		||||
        except MaxDownloadsReached:
 | 
			
		||||
 
 | 
			
		||||
@@ -8,8 +8,11 @@ import re
 | 
			
		||||
import shutil
 | 
			
		||||
import traceback
 | 
			
		||||
 | 
			
		||||
from .compat import compat_expanduser, compat_getenv
 | 
			
		||||
from .utils import write_json_file
 | 
			
		||||
from .compat import compat_getenv
 | 
			
		||||
from .utils import (
 | 
			
		||||
    expand_path,
 | 
			
		||||
    write_json_file,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Cache(object):
 | 
			
		||||
@@ -21,7 +24,7 @@ class Cache(object):
 | 
			
		||||
        if res is None:
 | 
			
		||||
            cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
 | 
			
		||||
            res = os.path.join(cache_root, 'youtube-dl')
 | 
			
		||||
        return compat_expanduser(res)
 | 
			
		||||
        return expand_path(res)
 | 
			
		||||
 | 
			
		||||
    def _get_cache_fn(self, section, key, dtype):
 | 
			
		||||
        assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
 | 
			
		||||
 
 | 
			
		||||
@@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}):
 | 
			
		||||
        if ed.can_download(info_dict):
 | 
			
		||||
            return ed
 | 
			
		||||
 | 
			
		||||
    if protocol.startswith('m3u8') and info_dict.get('is_live'):
 | 
			
		||||
        return FFmpegFD
 | 
			
		||||
 | 
			
		||||
    if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
 | 
			
		||||
        return HlsFD
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										136
									
								
								youtube_dl/extractor/adn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								youtube_dl/extractor/adn.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,136 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import base64
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..aes import aes_cbc_decrypt
 | 
			
		||||
from ..compat import compat_ord
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    bytes_to_intlist,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    intlist_to_bytes,
 | 
			
		||||
    srt_subtitles_timecode,
 | 
			
		||||
    strip_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ADNIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'Anime Digital Network'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
 | 
			
		||||
        'md5': 'e497370d847fd79d9d4c74be55575c7a',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '7778',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
 | 
			
		||||
            'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _get_subtitles(self, sub_path, video_id):
 | 
			
		||||
        if not sub_path:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        enc_subtitles = self._download_webpage(
 | 
			
		||||
            'http://animedigitalnetwork.fr/' + sub_path,
 | 
			
		||||
            video_id, fatal=False)
 | 
			
		||||
        if not enc_subtitles:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
 | 
			
		||||
        dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
 | 
			
		||||
            bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
 | 
			
		||||
            bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
 | 
			
		||||
            bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
 | 
			
		||||
        ))
 | 
			
		||||
        subtitles_json = self._parse_json(
 | 
			
		||||
            dec_subtitles[:-compat_ord(dec_subtitles[-1])],
 | 
			
		||||
            None, fatal=False)
 | 
			
		||||
        if not subtitles_json:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        for sub_lang, sub in subtitles_json.items():
 | 
			
		||||
            srt = ''
 | 
			
		||||
            for num, current in enumerate(sub):
 | 
			
		||||
                start, end, text = (
 | 
			
		||||
                    float_or_none(current.get('startTime')),
 | 
			
		||||
                    float_or_none(current.get('endTime')),
 | 
			
		||||
                    current.get('text'))
 | 
			
		||||
                if start is None or end is None or text is None:
 | 
			
		||||
                    continue
 | 
			
		||||
                srt += os.linesep.join(
 | 
			
		||||
                    (
 | 
			
		||||
                        '%d' % num,
 | 
			
		||||
                        '%s --> %s' % (
 | 
			
		||||
                            srt_subtitles_timecode(start),
 | 
			
		||||
                            srt_subtitles_timecode(end)),
 | 
			
		||||
                        text,
 | 
			
		||||
                        os.linesep,
 | 
			
		||||
                    ))
 | 
			
		||||
 | 
			
		||||
            if sub_lang == 'vostf':
 | 
			
		||||
                sub_lang = 'fr'
 | 
			
		||||
            subtitles.setdefault(sub_lang, []).extend([{
 | 
			
		||||
                'ext': 'json',
 | 
			
		||||
                'data': json.dumps(sub),
 | 
			
		||||
            }, {
 | 
			
		||||
                'ext': 'srt',
 | 
			
		||||
                'data': srt,
 | 
			
		||||
            }])
 | 
			
		||||
        return subtitles
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        player_config = self._parse_json(self._search_regex(
 | 
			
		||||
            r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
 | 
			
		||||
 | 
			
		||||
        video_info = {}
 | 
			
		||||
        video_info_str = self._search_regex(
 | 
			
		||||
            r'videoInfo\s*=\s*({.+});', webpage,
 | 
			
		||||
            'video info', fatal=False)
 | 
			
		||||
        if video_info_str:
 | 
			
		||||
            video_info = self._parse_json(
 | 
			
		||||
                video_info_str, video_id, fatal=False) or {}
 | 
			
		||||
 | 
			
		||||
        options = player_config.get('options') or {}
 | 
			
		||||
        metas = options.get('metas') or {}
 | 
			
		||||
        title = metas.get('title') or video_info['title']
 | 
			
		||||
        links = player_config.get('links') or {}
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_id, qualities in links.items():
 | 
			
		||||
            for load_balancer_url in qualities.values():
 | 
			
		||||
                load_balancer_data = self._download_json(
 | 
			
		||||
                    load_balancer_url, video_id, fatal=False) or {}
 | 
			
		||||
                m3u8_url = load_balancer_data.get('location')
 | 
			
		||||
                if not m3u8_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                m3u8_formats = self._extract_m3u8_formats(
 | 
			
		||||
                    m3u8_url, video_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                    m3u8_id=format_id, fatal=False)
 | 
			
		||||
                if format_id == 'vf':
 | 
			
		||||
                    for f in m3u8_formats:
 | 
			
		||||
                        f['language'] = 'fr'
 | 
			
		||||
                formats.extend(m3u8_formats)
 | 
			
		||||
        error = options.get('error')
 | 
			
		||||
        if not formats and error:
 | 
			
		||||
            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
 | 
			
		||||
            'thumbnail': video_info.get('image'),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
 | 
			
		||||
            'episode': metas.get('subtitle') or video_info.get('videoTitle'),
 | 
			
		||||
            'series': video_info.get('playlistTitle'),
 | 
			
		||||
        }
 | 
			
		||||
@@ -107,7 +107,10 @@ class AENetworksIE(AENetworksBaseIE):
 | 
			
		||||
        }
 | 
			
		||||
        video_id = self._html_search_meta('aetn:VideoID', webpage)
 | 
			
		||||
        media_url = self._search_regex(
 | 
			
		||||
            r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
 | 
			
		||||
            [r"media_url\s*=\s*'(?P<url>[^']+)'",
 | 
			
		||||
             r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
 | 
			
		||||
             r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
 | 
			
		||||
            webpage, 'video url', group='url')
 | 
			
		||||
        theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
 | 
			
		||||
            r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
 | 
			
		||||
        info = self._parse_theplatform_metadata(theplatform_metadata)
 | 
			
		||||
 
 | 
			
		||||
@@ -4,15 +4,11 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
)
 | 
			
		||||
from ..compat import compat_xpath
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    update_url_query,
 | 
			
		||||
    xpath_element,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -43,7 +39,8 @@ class AfreecaTVIE(InfoExtractor):
 | 
			
		||||
            'uploader': 'dailyapril',
 | 
			
		||||
            'uploader_id': 'dailyapril',
 | 
			
		||||
            'upload_date': '20160503',
 | 
			
		||||
        }
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Video is gone',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -71,6 +68,60 @@ class AfreecaTVIE(InfoExtractor):
 | 
			
		||||
                'upload_date': '20160502',
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
        'skip': 'Video is gone',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '18650793',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'uploader': '윈아디',
 | 
			
		||||
            'uploader_id': 'badkids',
 | 
			
		||||
            'duration': 107,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '10481652',
 | 
			
		||||
            'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
 | 
			
		||||
            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
 | 
			
		||||
            'uploader': 'dailyapril',
 | 
			
		||||
            'uploader_id': 'dailyapril',
 | 
			
		||||
            'duration': 6492,
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 2,
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'md5': 'd8b7c174568da61d774ef0203159bf97',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '10481652_1',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
 | 
			
		||||
                'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
 | 
			
		||||
                'uploader': 'dailyapril',
 | 
			
		||||
                'uploader_id': 'dailyapril',
 | 
			
		||||
                'upload_date': '20160502',
 | 
			
		||||
                'duration': 3601,
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'md5': '58f2ce7f6044e34439ab2d50612ab02b',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '10481652_2',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
 | 
			
		||||
                'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
 | 
			
		||||
                'uploader': 'dailyapril',
 | 
			
		||||
                'uploader_id': 'dailyapril',
 | 
			
		||||
                'upload_date': '20160502',
 | 
			
		||||
                'duration': 2891,
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
@@ -85,42 +136,76 @@ class AfreecaTVIE(InfoExtractor):
 | 
			
		||||
        m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
 | 
			
		||||
        if m:
 | 
			
		||||
            video_key['upload_date'] = m.group('upload_date')
 | 
			
		||||
            video_key['part'] = m.group('part')
 | 
			
		||||
            video_key['part'] = int(m.group('part'))
 | 
			
		||||
        return video_key
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        parsed_url = compat_urllib_parse_urlparse(url)
 | 
			
		||||
        info_url = compat_urlparse.urlunparse(parsed_url._replace(
 | 
			
		||||
            netloc='afbbs.afreecatv.com:8080',
 | 
			
		||||
            path='/api/video/get_video_info.php'))
 | 
			
		||||
 | 
			
		||||
        video_xml = self._download_xml(
 | 
			
		||||
            update_url_query(info_url, {'nTitleNo': video_id}), video_id)
 | 
			
		||||
            'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
 | 
			
		||||
            video_id, query={'nTitleNo': video_id})
 | 
			
		||||
 | 
			
		||||
        if xpath_element(video_xml, './track/video/file') is None:
 | 
			
		||||
        video_element = video_xml.findall(compat_xpath('./track/video'))[1]
 | 
			
		||||
        if video_element is None or video_element.text is None:
 | 
			
		||||
            raise ExtractorError('Specified AfreecaTV video does not exist',
 | 
			
		||||
                                 expected=True)
 | 
			
		||||
 | 
			
		||||
        title = xpath_text(video_xml, './track/title', 'title')
 | 
			
		||||
        video_url = video_element.text.strip()
 | 
			
		||||
 | 
			
		||||
        title = xpath_text(video_xml, './track/title', 'title', fatal=True)
 | 
			
		||||
 | 
			
		||||
        uploader = xpath_text(video_xml, './track/nickname', 'uploader')
 | 
			
		||||
        uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
 | 
			
		||||
        duration = int_or_none(xpath_text(video_xml, './track/duration',
 | 
			
		||||
                                          'duration'))
 | 
			
		||||
        duration = int_or_none(xpath_text(
 | 
			
		||||
            video_xml, './track/duration', 'duration'))
 | 
			
		||||
        thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for i, video_file in enumerate(video_xml.findall('./track/video/file')):
 | 
			
		||||
            video_key = self.parse_video_key(video_file.get('key', ''))
 | 
			
		||||
            if not video_key:
 | 
			
		||||
                continue
 | 
			
		||||
            entries.append({
 | 
			
		||||
                'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'upload_date': video_key.get('upload_date'),
 | 
			
		||||
                'duration': int_or_none(video_file.get('duration')),
 | 
			
		||||
                'url': video_file.text,
 | 
			
		||||
        common_entry = {
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'uploader_id': uploader_id,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        info = common_entry.copy()
 | 
			
		||||
        info.update({
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
        if not video_url:
 | 
			
		||||
            entries = []
 | 
			
		||||
            for file_num, file_element in enumerate(
 | 
			
		||||
                    video_element.findall(compat_xpath('./file')), start=1):
 | 
			
		||||
                file_url = file_element.text
 | 
			
		||||
                if not file_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                video_key = self.parse_video_key(file_element.get('key', ''))
 | 
			
		||||
                if not video_key:
 | 
			
		||||
                    continue
 | 
			
		||||
                file_duration = int_or_none(file_element.get('duration'))
 | 
			
		||||
                part = video_key.get('part', file_num)
 | 
			
		||||
                format_id = '%s_%s' % (video_id, part)
 | 
			
		||||
                formats = self._extract_m3u8_formats(
 | 
			
		||||
                    file_url, video_id, 'mp4', entry_protocol='m3u8_native',
 | 
			
		||||
                    m3u8_id='hls',
 | 
			
		||||
                    note='Downloading part %d m3u8 information' % file_num)
 | 
			
		||||
                file_info = common_entry.copy()
 | 
			
		||||
                file_info.update({
 | 
			
		||||
                    'id': format_id,
 | 
			
		||||
                    'title': '%s (part %d)' % (title, part),
 | 
			
		||||
                    'upload_date': video_key.get('upload_date'),
 | 
			
		||||
                    'duration': file_duration,
 | 
			
		||||
                    'formats': formats,
 | 
			
		||||
                })
 | 
			
		||||
                entries.append(file_info)
 | 
			
		||||
            entries_info = info.copy()
 | 
			
		||||
            entries_info.update({
 | 
			
		||||
                '_type': 'multi_video',
 | 
			
		||||
                'entries': entries,
 | 
			
		||||
            })
 | 
			
		||||
            return entries_info
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
@@ -131,17 +216,18 @@ class AfreecaTVIE(InfoExtractor):
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if len(entries) > 1:
 | 
			
		||||
            info['_type'] = 'multi_video'
 | 
			
		||||
            info['entries'] = entries
 | 
			
		||||
        elif len(entries) == 1:
 | 
			
		||||
            info['url'] = entries[0]['url']
 | 
			
		||||
            info['upload_date'] = entries[0].get('upload_date')
 | 
			
		||||
        if determine_ext(video_url) == 'm3u8':
 | 
			
		||||
            info['formats'] = self._extract_m3u8_formats(
 | 
			
		||||
                video_url, video_id, 'mp4', entry_protocol='m3u8_native',
 | 
			
		||||
                m3u8_id='hls')
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'No files found for the specified AfreecaTV video, either'
 | 
			
		||||
                ' the URL is incorrect or the video has been made private.',
 | 
			
		||||
                expected=True)
 | 
			
		||||
            app, playpath = video_url.split('mp4:')
 | 
			
		||||
            info.update({
 | 
			
		||||
                'url': app,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'play_path': 'mp4:' + playpath,
 | 
			
		||||
                'rtmp_live': True,  # downloading won't end without this
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
 | 
			
		||||
        'md5': '2e3e7486ba5d180e829d453875b9b8bf',
 | 
			
		||||
        'md5': '8d02f53ee39cf006009180e21df1f3ba',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '6x4q2w',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
 | 
			
		||||
            'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
 | 
			
		||||
            'thumbnail': r're:https?://.*/poster\.jpg',
 | 
			
		||||
            'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
 | 
			
		||||
            'timestamp': 1422487800,
 | 
			
		||||
            'upload_date': '20150128',
 | 
			
		||||
@@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
        video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
 | 
			
		||||
        video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
 | 
			
		||||
 | 
			
		||||
        embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
 | 
			
		||||
        jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
 | 
			
		||||
        metadata = self._parse_json(jwconfig, video_id)
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'url': source['file'],
 | 
			
		||||
            'ext': source['type'],
 | 
			
		||||
            'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
 | 
			
		||||
            'format': source['label'],
 | 
			
		||||
            'height': int(source['label'].rstrip('p')),
 | 
			
		||||
        } for source in metadata['playlist'][0]['sources']]
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
        jwconfig = self._parse_json(self._search_regex(
 | 
			
		||||
            r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
 | 
			
		||||
 | 
			
		||||
        info_dict = self._parse_jwplayer_data(jwconfig, video_id)
 | 
			
		||||
        view_count = int_or_none(self._html_search_regex(
 | 
			
		||||
            r'Views since archived: ([0-9]+)',
 | 
			
		||||
            webpage, 'view count', fatal=False))
 | 
			
		||||
@@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
 | 
			
		||||
            r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
 | 
			
		||||
            webpage, 'duration', fatal=False))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
        info_dict.update({
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': self._og_search_title(webpage),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'url': self._og_search_url(webpage),
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'thumbnail': metadata['playlist'][0].get('image'),
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
 | 
			
		||||
        }
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
        return info_dict
 | 
			
		||||
 
 | 
			
		||||
@@ -2,9 +2,13 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_str
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    remove_end,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    qualities,
 | 
			
		||||
    remove_end,
 | 
			
		||||
    try_get,
 | 
			
		||||
    unified_timestamp,
 | 
			
		||||
    url_basename,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor):
 | 
			
		||||
            'title': 'Astérix - Le Domaine des Dieux Teaser VF',
 | 
			
		||||
            'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
 | 
			
		||||
            'thumbnail': r're:http://.*\.jpg',
 | 
			
		||||
            'duration': 39,
 | 
			
		||||
            'timestamp': 1404273600,
 | 
			
		||||
            'upload_date': '20140702',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
 | 
			
		||||
@@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor):
 | 
			
		||||
            'title': 'Planes 2 Bande-annonce VF',
 | 
			
		||||
            'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
 | 
			
		||||
            'thumbnail': r're:http://.*\.jpg',
 | 
			
		||||
            'duration': 69,
 | 
			
		||||
            'timestamp': 1385659800,
 | 
			
		||||
            'upload_date': '20131128',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
 | 
			
		||||
@@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor):
 | 
			
		||||
            'title': 'Dragons 2 - Bande annonce finale VF',
 | 
			
		||||
            'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
 | 
			
		||||
            'thumbnail': r're:http://.*\.jpg',
 | 
			
		||||
            'duration': 144,
 | 
			
		||||
            'timestamp': 1397589900,
 | 
			
		||||
            'upload_date': '20140415',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.allocine.fr/video/video-19550147/',
 | 
			
		||||
@@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor):
 | 
			
		||||
            r'data-model="([^"]+)"', webpage, 'data model', default=None)
 | 
			
		||||
        if model:
 | 
			
		||||
            model_data = self._parse_json(model, display_id)
 | 
			
		||||
 | 
			
		||||
            for video_url in model_data['sources'].values():
 | 
			
		||||
            video = model_data['videos'][0]
 | 
			
		||||
            title = video['title']
 | 
			
		||||
            for video_url in video['sources'].values():
 | 
			
		||||
                video_id, format_id = url_basename(video_url).split('_')[:2]
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': format_id,
 | 
			
		||||
                    'quality': quality(format_id),
 | 
			
		||||
                    'url': video_url,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            title = model_data['title']
 | 
			
		||||
            duration = int_or_none(video.get('duration'))
 | 
			
		||||
            view_count = int_or_none(video.get('view_count'))
 | 
			
		||||
            timestamp = unified_timestamp(try_get(
 | 
			
		||||
                video, lambda x: x['added_at']['date'], compat_str))
 | 
			
		||||
        else:
 | 
			
		||||
            video_id = display_id
 | 
			
		||||
            media_data = self._download_json(
 | 
			
		||||
                'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
 | 
			
		||||
            title = remove_end(
 | 
			
		||||
                self._html_search_regex(
 | 
			
		||||
                    r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
 | 
			
		||||
                ' - AlloCiné')
 | 
			
		||||
            for key, value in media_data['video'].items():
 | 
			
		||||
                if not key.endswith('Path'):
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                format_id = key[:-len('Path')]
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': format_id,
 | 
			
		||||
                    'quality': quality(format_id),
 | 
			
		||||
                    'url': value,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            title = remove_end(self._html_search_regex(
 | 
			
		||||
                r'(?s)<title>(.+?)</title>', webpage, 'title'
 | 
			
		||||
            ).strip(), ' - AlloCiné')
 | 
			
		||||
            duration, view_count, timestamp = [None] * 3
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
@@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor):
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor):
 | 
			
		||||
                exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
 | 
			
		||||
                if kind == 'm3u8' or 'm3u8' in exts:
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        f_url, video_id, 'mp4',
 | 
			
		||||
                        entry_protocol='m3u8' if is_live else 'm3u8_native',
 | 
			
		||||
                        f_url, video_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                        m3u8_id=kind, fatal=False, live=is_live))
 | 
			
		||||
                elif kind == 'flash' or 'f4m' in exts:
 | 
			
		||||
                    formats.extend(self._extract_f4m_formats(
 | 
			
		||||
 
 | 
			
		||||
@@ -90,7 +90,8 @@ class AtresPlayerIE(InfoExtractor):
 | 
			
		||||
            request, None, 'Logging in as %s' % username)
 | 
			
		||||
 | 
			
		||||
        error = self._html_search_regex(
 | 
			
		||||
            r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None)
 | 
			
		||||
            r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
 | 
			
		||||
            response, 'error', default=None)
 | 
			
		||||
        if error:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'Unable to login: %s' % error, expected=True)
 | 
			
		||||
@@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor):
 | 
			
		||||
            if format_id == 'token' or not video_url.startswith('http'):
 | 
			
		||||
                continue
 | 
			
		||||
            if 'geodeswowsmpra3player' in video_url:
 | 
			
		||||
                f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
 | 
			
		||||
                f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
 | 
			
		||||
                # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
 | 
			
		||||
                # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
 | 
			
		||||
                # this videos are protected by DRM, the f4m downloader doesn't support them
 | 
			
		||||
                continue
 | 
			
		||||
            else:
 | 
			
		||||
                f4m_url = video_url[:-9] + '/manifest.f4m'
 | 
			
		||||
            formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
 | 
			
		||||
            video_url_hd = video_url.replace('free_es', 'es')
 | 
			
		||||
            formats.extend(self._extract_f4m_formats(
 | 
			
		||||
                video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
 | 
			
		||||
                fatal=False))
 | 
			
		||||
            formats.extend(self._extract_mpd_formats(
 | 
			
		||||
                video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
 | 
			
		||||
                fatal=False))
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        path_data = player.get('pathData')
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/atvat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/atvat.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,73 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ATVAtIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
 | 
			
		||||
        'md5': 'c3b6b975fb3150fc628572939df205f2',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1698447',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
        video_data = self._parse_json(unescapeHTML(self._search_regex(
 | 
			
		||||
            r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
 | 
			
		||||
            webpage, 'player data')), display_id)['config']['initial_video']
 | 
			
		||||
 | 
			
		||||
        video_id = video_data['id']
 | 
			
		||||
        video_title = video_data['title']
 | 
			
		||||
 | 
			
		||||
        parts = []
 | 
			
		||||
        for part in video_data.get('parts', []):
 | 
			
		||||
            part_id = part['id']
 | 
			
		||||
            part_title = part['title']
 | 
			
		||||
 | 
			
		||||
            formats = []
 | 
			
		||||
            for source in part.get('sources', []):
 | 
			
		||||
                source_url = source.get('src')
 | 
			
		||||
                if not source_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                ext = determine_ext(source_url)
 | 
			
		||||
                if ext == 'm3u8':
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        source_url, part_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                        m3u8_id='hls', fatal=False))
 | 
			
		||||
                else:
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'format_id': source.get('delivery'),
 | 
			
		||||
                        'url': source_url,
 | 
			
		||||
                    })
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
            parts.append({
 | 
			
		||||
                'id': part_id,
 | 
			
		||||
                'title': part_title,
 | 
			
		||||
                'thumbnail': part.get('preview_image_url'),
 | 
			
		||||
                'duration': int_or_none(part.get('duration')),
 | 
			
		||||
                'is_live': part.get('is_livestream'),
 | 
			
		||||
                'formats': formats,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'multi_video',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'entries': parts,
 | 
			
		||||
        }
 | 
			
		||||
@@ -361,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
 | 
			
		||||
                            fmt.update({
 | 
			
		||||
                                'width': width,
 | 
			
		||||
                                'height': height,
 | 
			
		||||
                                'vbr': bitrate,
 | 
			
		||||
                                'tbr': bitrate,
 | 
			
		||||
                                'vcodec': encoding,
 | 
			
		||||
                            })
 | 
			
		||||
                        else:
 | 
			
		||||
@@ -370,7 +370,7 @@ class BBCCoUkIE(InfoExtractor):
 | 
			
		||||
                                'acodec': encoding,
 | 
			
		||||
                                'vcodec': 'none',
 | 
			
		||||
                            })
 | 
			
		||||
                        if protocol == 'http':
 | 
			
		||||
                        if protocol in ('http', 'https'):
 | 
			
		||||
                            # Direct link
 | 
			
		||||
                            fmt.update({
 | 
			
		||||
                                'url': href,
 | 
			
		||||
@@ -389,6 +389,8 @@ class BBCCoUkIE(InfoExtractor):
 | 
			
		||||
                                'rtmp_live': False,
 | 
			
		||||
                                'ext': 'flv',
 | 
			
		||||
                            })
 | 
			
		||||
                        else:
 | 
			
		||||
                            continue
 | 
			
		||||
                        formats.append(fmt)
 | 
			
		||||
            elif kind == 'captions':
 | 
			
		||||
                subtitles = self.extract_subtitles(media, programme_id)
 | 
			
		||||
 
 | 
			
		||||
@@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor):
 | 
			
		||||
                animalplanet|
 | 
			
		||||
                bravo|
 | 
			
		||||
                mtv|
 | 
			
		||||
                space
 | 
			
		||||
                space|
 | 
			
		||||
                etalk
 | 
			
		||||
            )\.ca|
 | 
			
		||||
            much\.com
 | 
			
		||||
        )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
 | 
			
		||||
        )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.ctv.ca/video/player?vid=706966',
 | 
			
		||||
        'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
 | 
			
		||||
@@ -58,6 +59,9 @@ class BellMediaIE(InfoExtractor):
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.etalk.ca/video?videoid=663455',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
    _DOMAINS = {
 | 
			
		||||
        'thecomedynetwork': 'comedy',
 | 
			
		||||
@@ -65,6 +69,7 @@ class BellMediaIE(InfoExtractor):
 | 
			
		||||
        'sciencechannel': 'discsci',
 | 
			
		||||
        'investigationdiscovery': 'invdisc',
 | 
			
		||||
        'animalplanet': 'aniplan',
 | 
			
		||||
        'etalk': 'ctv',
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -7,6 +7,7 @@ from ..utils import float_or_none
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CanvasIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'canvas.be and een.be'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
 | 
			
		||||
 
 | 
			
		||||
@@ -12,13 +12,14 @@ from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    urlencode_postdata,
 | 
			
		||||
    USER_AGENTS,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CeskaTelevizeIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -62,40 +63,12 @@ class CeskaTelevizeIE(InfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Georestricted to Czech Republic',
 | 
			
		||||
    }, {
 | 
			
		||||
        # video with 18+ caution trailer
 | 
			
		||||
        'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '215562210900007-bogotart',
 | 
			
		||||
            'title': 'Queer: Bogotart',
 | 
			
		||||
            'description': 'Alternativní průvodce současným queer světem',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '61924494876844842',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Queer: Bogotart (Varování 18+)',
 | 
			
		||||
                'duration': 10.2,
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '61924494877068022',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Queer: Bogotart (Queer)',
 | 
			
		||||
                'thumbnail': r're:^https?://.*\.jpg',
 | 
			
		||||
                'duration': 1558.3,
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
        'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
 | 
			
		||||
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        playlist_id = mobj.group('id')
 | 
			
		||||
        playlist_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, playlist_id)
 | 
			
		||||
 | 
			
		||||
@@ -103,13 +76,28 @@ class CeskaTelevizeIE(InfoExtractor):
 | 
			
		||||
        if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
 | 
			
		||||
            raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
 | 
			
		||||
 | 
			
		||||
        typ = self._html_search_regex(
 | 
			
		||||
            r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
 | 
			
		||||
        episode_id = self._html_search_regex(
 | 
			
		||||
            r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
 | 
			
		||||
        type_ = None
 | 
			
		||||
        episode_id = None
 | 
			
		||||
 | 
			
		||||
        playlist = self._parse_json(
 | 
			
		||||
            self._search_regex(
 | 
			
		||||
                r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
 | 
			
		||||
                default='{}'), playlist_id)
 | 
			
		||||
        if playlist:
 | 
			
		||||
            type_ = playlist.get('type')
 | 
			
		||||
            episode_id = playlist.get('id')
 | 
			
		||||
 | 
			
		||||
        if not type_:
 | 
			
		||||
            type_ = self._html_search_regex(
 | 
			
		||||
                r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
 | 
			
		||||
                webpage, 'type')
 | 
			
		||||
        if not episode_id:
 | 
			
		||||
            episode_id = self._html_search_regex(
 | 
			
		||||
                r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
 | 
			
		||||
                webpage, 'episode_id')
 | 
			
		||||
 | 
			
		||||
        data = {
 | 
			
		||||
            'playlist[0][type]': typ,
 | 
			
		||||
            'playlist[0][type]': type_,
 | 
			
		||||
            'playlist[0][id]': episode_id,
 | 
			
		||||
            'requestUrl': compat_urllib_parse_urlparse(url).path,
 | 
			
		||||
            'requestSource': 'iVysilani',
 | 
			
		||||
@@ -160,8 +148,7 @@ class CeskaTelevizeIE(InfoExtractor):
 | 
			
		||||
                for format_id, stream_url in item.get('streamUrls', {}).items():
 | 
			
		||||
                    if 'playerType=flash' in stream_url:
 | 
			
		||||
                        stream_formats = self._extract_m3u8_formats(
 | 
			
		||||
                            stream_url, playlist_id, 'mp4',
 | 
			
		||||
                            entry_protocol='m3u8' if is_live else 'm3u8_native',
 | 
			
		||||
                            stream_url, playlist_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                            m3u8_id='hls-%s' % format_id, fatal=False)
 | 
			
		||||
                    else:
 | 
			
		||||
                        stream_formats = self._extract_mpd_formats(
 | 
			
		||||
@@ -246,3 +233,47 @@ class CeskaTelevizeIE(InfoExtractor):
 | 
			
		||||
                    yield line
 | 
			
		||||
 | 
			
		||||
        return '\r\n'.join(_fix_subtitle(subtitles))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CeskaTelevizePoradyIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # video with 18+ caution trailer
 | 
			
		||||
        'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '215562210900007-bogotart',
 | 
			
		||||
            'title': 'Queer: Bogotart',
 | 
			
		||||
            'description': 'Alternativní průvodce současným queer světem',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist': [{
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '61924494876844842',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Queer: Bogotart (Varování 18+)',
 | 
			
		||||
                'duration': 10.2,
 | 
			
		||||
            },
 | 
			
		||||
        }, {
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '61924494877068022',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Queer: Bogotart (Queer)',
 | 
			
		||||
                'thumbnail': r're:^https?://.*\.jpg',
 | 
			
		||||
                'duration': 1558.3,
 | 
			
		||||
            },
 | 
			
		||||
        }],
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        data_url = unescapeHTML(self._search_regex(
 | 
			
		||||
            r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
 | 
			
		||||
            webpage, 'iframe player url', group='url'))
 | 
			
		||||
 | 
			
		||||
        return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
 | 
			
		||||
 
 | 
			
		||||
@@ -4,62 +4,62 @@ import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    parse_filesize,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
    qualities,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Channel9IE(InfoExtractor):
 | 
			
		||||
    '''
 | 
			
		||||
    Common extractor for channel9.msdn.com.
 | 
			
		||||
 | 
			
		||||
    The type of provided URL (video or playlist) is determined according to
 | 
			
		||||
    meta Search.PageType from web page HTML rather than URL itself, as it is
 | 
			
		||||
    not always possible to do.
 | 
			
		||||
    '''
 | 
			
		||||
    IE_DESC = 'Channel 9'
 | 
			
		||||
    IE_NAME = 'channel9'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
 | 
			
		||||
        'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
 | 
			
		||||
        'md5': '32083d4eaf1946db6d454313f44510ca',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'Events/TechEd/Australia/2013/KOS002',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'id': '6c413323-383a-49dc-88f9-a22800cab024',
 | 
			
		||||
            'ext': 'wmv',
 | 
			
		||||
            'title': 'Developer Kick-Off Session: Stuff We Love',
 | 
			
		||||
            'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
 | 
			
		||||
            'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
 | 
			
		||||
            'duration': 4576,
 | 
			
		||||
            'thumbnail': r're:http://.*\.jpg',
 | 
			
		||||
            'thumbnail': r're:https?://.*\.jpg',
 | 
			
		||||
            'timestamp': 1377717420,
 | 
			
		||||
            'upload_date': '20130828',
 | 
			
		||||
            'session_code': 'KOS002',
 | 
			
		||||
            'session_day': 'Day 1',
 | 
			
		||||
            'session_room': 'Arena 1A',
 | 
			
		||||
            'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
 | 
			
		||||
                                 'Mads Kristensen'],
 | 
			
		||||
            'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'],
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
 | 
			
		||||
        'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
 | 
			
		||||
        'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
 | 
			
		||||
            'ext': 'wmv',
 | 
			
		||||
            'title': 'Self-service BI with Power BI - nuclear testing',
 | 
			
		||||
            'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
 | 
			
		||||
            'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
 | 
			
		||||
            'duration': 1540,
 | 
			
		||||
            'thumbnail': r're:http://.*\.jpg',
 | 
			
		||||
            'thumbnail': r're:https?://.*\.jpg',
 | 
			
		||||
            'timestamp': 1386381991,
 | 
			
		||||
            'upload_date': '20131207',
 | 
			
		||||
            'authors': ['Mike Wilmot'],
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # low quality mp4 is best
 | 
			
		||||
        'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
 | 
			
		||||
            'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Ranges for the Standard Library',
 | 
			
		||||
            'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
 | 
			
		||||
            'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
 | 
			
		||||
            'duration': 5646,
 | 
			
		||||
            'thumbnail': r're:http://.*\.jpg',
 | 
			
		||||
            'thumbnail': r're:https?://.*\.jpg',
 | 
			
		||||
            'upload_date': '20150930',
 | 
			
		||||
            'timestamp': 1443640735,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
@@ -70,7 +70,7 @@ class Channel9IE(InfoExtractor):
 | 
			
		||||
            'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
 | 
			
		||||
            'title': 'Channel 9',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 2,
 | 
			
		||||
        'playlist_mincount': 100,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
@@ -81,189 +81,6 @@ class Channel9IE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
 | 
			
		||||
 | 
			
		||||
    def _formats_from_html(self, html):
 | 
			
		||||
        FORMAT_REGEX = r'''
 | 
			
		||||
            (?x)
 | 
			
		||||
            <a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
 | 
			
		||||
            <span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
 | 
			
		||||
            (?:<div\s+class="popup\s+rounded">\s*
 | 
			
		||||
            <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
 | 
			
		||||
            </div>)?                                                # File size part may be missing
 | 
			
		||||
        '''
 | 
			
		||||
        quality = qualities((
 | 
			
		||||
            'MP3', 'MP4',
 | 
			
		||||
            'Low Quality WMV', 'Low Quality MP4',
 | 
			
		||||
            'Mid Quality WMV', 'Mid Quality MP4',
 | 
			
		||||
            'High Quality WMV', 'High Quality MP4'))
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'url': x.group('url'),
 | 
			
		||||
            'format_id': x.group('quality'),
 | 
			
		||||
            'format_note': x.group('note'),
 | 
			
		||||
            'format': '%s (%s)' % (x.group('quality'), x.group('note')),
 | 
			
		||||
            'filesize_approx': parse_filesize(x.group('filesize')),
 | 
			
		||||
            'quality': quality(x.group('quality')),
 | 
			
		||||
            'vcodec': 'none' if x.group('note') == 'Audio only' else None,
 | 
			
		||||
        } for x in list(re.finditer(FORMAT_REGEX, html))]
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return formats
 | 
			
		||||
 | 
			
		||||
    def _extract_title(self, html):
 | 
			
		||||
        title = self._html_search_meta('title', html, 'title')
 | 
			
		||||
        if title is None:
 | 
			
		||||
            title = self._og_search_title(html)
 | 
			
		||||
            TITLE_SUFFIX = ' (Channel 9)'
 | 
			
		||||
            if title is not None and title.endswith(TITLE_SUFFIX):
 | 
			
		||||
                title = title[:-len(TITLE_SUFFIX)]
 | 
			
		||||
        return title
 | 
			
		||||
 | 
			
		||||
    def _extract_description(self, html):
 | 
			
		||||
        DESCRIPTION_REGEX = r'''(?sx)
 | 
			
		||||
            <div\s+class="entry-content">\s*
 | 
			
		||||
            <div\s+id="entry-body">\s*
 | 
			
		||||
            (?P<description>.+?)\s*
 | 
			
		||||
            </div>\s*
 | 
			
		||||
            </div>
 | 
			
		||||
        '''
 | 
			
		||||
        m = re.search(DESCRIPTION_REGEX, html)
 | 
			
		||||
        if m is not None:
 | 
			
		||||
            return m.group('description')
 | 
			
		||||
        return self._html_search_meta('description', html, 'description')
 | 
			
		||||
 | 
			
		||||
    def _extract_duration(self, html):
 | 
			
		||||
        m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
 | 
			
		||||
        return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
 | 
			
		||||
 | 
			
		||||
    def _extract_slides(self, html):
 | 
			
		||||
        m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
 | 
			
		||||
        return m.group('slidesurl') if m is not None else None
 | 
			
		||||
 | 
			
		||||
    def _extract_zip(self, html):
 | 
			
		||||
        m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
 | 
			
		||||
        return m.group('zipurl') if m is not None else None
 | 
			
		||||
 | 
			
		||||
    def _extract_avg_rating(self, html):
 | 
			
		||||
        m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
 | 
			
		||||
        return float(m.group('avgrating')) if m is not None else 0
 | 
			
		||||
 | 
			
		||||
    def _extract_rating_count(self, html):
 | 
			
		||||
        m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
 | 
			
		||||
        return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
 | 
			
		||||
 | 
			
		||||
    def _extract_view_count(self, html):
 | 
			
		||||
        m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
 | 
			
		||||
        return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
 | 
			
		||||
 | 
			
		||||
    def _extract_comment_count(self, html):
 | 
			
		||||
        m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
 | 
			
		||||
        return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
 | 
			
		||||
 | 
			
		||||
    def _fix_count(self, count):
 | 
			
		||||
        return int(str(count).replace(',', '')) if count is not None else None
 | 
			
		||||
 | 
			
		||||
    def _extract_authors(self, html):
 | 
			
		||||
        m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
 | 
			
		||||
        if m is None:
 | 
			
		||||
            return None
 | 
			
		||||
        return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
 | 
			
		||||
 | 
			
		||||
    def _extract_session_code(self, html):
 | 
			
		||||
        m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
 | 
			
		||||
        return m.group('code') if m is not None else None
 | 
			
		||||
 | 
			
		||||
    def _extract_session_day(self, html):
 | 
			
		||||
        m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
 | 
			
		||||
        return m.group('day').strip() if m is not None else None
 | 
			
		||||
 | 
			
		||||
    def _extract_session_room(self, html):
 | 
			
		||||
        m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
 | 
			
		||||
        return m.group('room') if m is not None else None
 | 
			
		||||
 | 
			
		||||
    def _extract_session_speakers(self, html):
 | 
			
		||||
        return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
 | 
			
		||||
 | 
			
		||||
    def _extract_content(self, html, content_path):
 | 
			
		||||
        # Look for downloadable content
 | 
			
		||||
        formats = self._formats_from_html(html)
 | 
			
		||||
        slides = self._extract_slides(html)
 | 
			
		||||
        zip_ = self._extract_zip(html)
 | 
			
		||||
 | 
			
		||||
        # Nothing to download
 | 
			
		||||
        if len(formats) == 0 and slides is None and zip_ is None:
 | 
			
		||||
            self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        # Extract meta
 | 
			
		||||
        title = self._extract_title(html)
 | 
			
		||||
        description = self._extract_description(html)
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(html)
 | 
			
		||||
        duration = self._extract_duration(html)
 | 
			
		||||
        avg_rating = self._extract_avg_rating(html)
 | 
			
		||||
        rating_count = self._extract_rating_count(html)
 | 
			
		||||
        view_count = self._extract_view_count(html)
 | 
			
		||||
        comment_count = self._extract_comment_count(html)
 | 
			
		||||
 | 
			
		||||
        common = {
 | 
			
		||||
            '_type': 'video',
 | 
			
		||||
            'id': content_path,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'avg_rating': avg_rating,
 | 
			
		||||
            'rating_count': rating_count,
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
            'comment_count': comment_count,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        result = []
 | 
			
		||||
 | 
			
		||||
        if slides is not None:
 | 
			
		||||
            d = common.copy()
 | 
			
		||||
            d.update({'title': title + '-Slides', 'url': slides})
 | 
			
		||||
            result.append(d)
 | 
			
		||||
 | 
			
		||||
        if zip_ is not None:
 | 
			
		||||
            d = common.copy()
 | 
			
		||||
            d.update({'title': title + '-Zip', 'url': zip_})
 | 
			
		||||
            result.append(d)
 | 
			
		||||
 | 
			
		||||
        if len(formats) > 0:
 | 
			
		||||
            d = common.copy()
 | 
			
		||||
            d.update({'title': title, 'formats': formats})
 | 
			
		||||
            result.append(d)
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def _extract_entry_item(self, html, content_path):
 | 
			
		||||
        contents = self._extract_content(html, content_path)
 | 
			
		||||
        if contents is None:
 | 
			
		||||
            return contents
 | 
			
		||||
 | 
			
		||||
        if len(contents) > 1:
 | 
			
		||||
            raise ExtractorError('Got more than one entry')
 | 
			
		||||
        result = contents[0]
 | 
			
		||||
        result['authors'] = self._extract_authors(html)
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def _extract_session(self, html, content_path):
 | 
			
		||||
        contents = self._extract_content(html, content_path)
 | 
			
		||||
        if contents is None:
 | 
			
		||||
            return contents
 | 
			
		||||
 | 
			
		||||
        session_meta = {
 | 
			
		||||
            'session_code': self._extract_session_code(html),
 | 
			
		||||
            'session_day': self._extract_session_day(html),
 | 
			
		||||
            'session_room': self._extract_session_room(html),
 | 
			
		||||
            'session_speakers': self._extract_session_speakers(html),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for content in contents:
 | 
			
		||||
            content.update(session_meta)
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(contents)
 | 
			
		||||
 | 
			
		||||
    def _extract_list(self, video_id, rss_url=None):
 | 
			
		||||
        if not rss_url:
 | 
			
		||||
            rss_url = self._RSS_URL % video_id
 | 
			
		||||
@@ -274,9 +91,7 @@ class Channel9IE(InfoExtractor):
 | 
			
		||||
        return self.playlist_result(entries, video_id, title_text)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        content_path = mobj.group('contentpath')
 | 
			
		||||
        rss = mobj.group('rss')
 | 
			
		||||
        content_path, rss = re.match(self._VALID_URL, url).groups()
 | 
			
		||||
 | 
			
		||||
        if rss:
 | 
			
		||||
            return self._extract_list(content_path, url)
 | 
			
		||||
@@ -284,17 +99,158 @@ class Channel9IE(InfoExtractor):
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            url, content_path, 'Downloading web page')
 | 
			
		||||
 | 
			
		||||
        page_type = self._search_regex(
 | 
			
		||||
            r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
 | 
			
		||||
            webpage, 'page type', default=None, group='pagetype')
 | 
			
		||||
        if page_type:
 | 
			
		||||
            if page_type == 'Entry':      # Any 'item'-like page, may contain downloadable content
 | 
			
		||||
                return self._extract_entry_item(webpage, content_path)
 | 
			
		||||
            elif page_type == 'Session':  # Event session page, may contain downloadable content
 | 
			
		||||
                return self._extract_session(webpage, content_path)
 | 
			
		||||
            elif page_type == 'Event':
 | 
			
		||||
                return self._extract_list(content_path)
 | 
			
		||||
        episode_data = self._search_regex(
 | 
			
		||||
            r"data-episode='([^']+)'", webpage, 'episode data', default=None)
 | 
			
		||||
        if episode_data:
 | 
			
		||||
            episode_data = self._parse_json(unescapeHTML(
 | 
			
		||||
                episode_data), content_path)
 | 
			
		||||
            content_id = episode_data['contentId']
 | 
			
		||||
            is_session = '/Sessions(' in episode_data['api']
 | 
			
		||||
            content_url = 'https://channel9.msdn.com/odata' + episode_data['api']
 | 
			
		||||
            if is_session:
 | 
			
		||||
                content_url += '?$expand=Speakers'
 | 
			
		||||
            else:
 | 
			
		||||
                raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
 | 
			
		||||
        else:  # Assuming list
 | 
			
		||||
                content_url += '?$expand=Authors'
 | 
			
		||||
            content_data = self._download_json(content_url, content_id)
 | 
			
		||||
            title = content_data['Title']
 | 
			
		||||
 | 
			
		||||
            QUALITIES = (
 | 
			
		||||
                'mp3',
 | 
			
		||||
                'wmv', 'mp4',
 | 
			
		||||
                'wmv-low', 'mp4-low',
 | 
			
		||||
                'wmv-mid', 'mp4-mid',
 | 
			
		||||
                'wmv-high', 'mp4-high',
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            quality_key = qualities(QUALITIES)
 | 
			
		||||
 | 
			
		||||
            def quality(quality_id, format_url):
 | 
			
		||||
                return (len(QUALITIES) if '_Source.' in format_url
 | 
			
		||||
                        else quality_key(quality_id))
 | 
			
		||||
 | 
			
		||||
            formats = []
 | 
			
		||||
            urls = set()
 | 
			
		||||
 | 
			
		||||
            SITE_QUALITIES = {
 | 
			
		||||
                'MP3': 'mp3',
 | 
			
		||||
                'MP4': 'mp4',
 | 
			
		||||
                'Low Quality WMV': 'wmv-low',
 | 
			
		||||
                'Low Quality MP4': 'mp4-low',
 | 
			
		||||
                'Mid Quality WMV': 'wmv-mid',
 | 
			
		||||
                'Mid Quality MP4': 'mp4-mid',
 | 
			
		||||
                'High Quality WMV': 'wmv-high',
 | 
			
		||||
                'High Quality MP4': 'mp4-high',
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            formats_select = self._search_regex(
 | 
			
		||||
                r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
 | 
			
		||||
                'formats select', default=None)
 | 
			
		||||
            if formats_select:
 | 
			
		||||
                for mobj in re.finditer(
 | 
			
		||||
                        r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
 | 
			
		||||
                        formats_select):
 | 
			
		||||
                    format_url = mobj.group('url')
 | 
			
		||||
                    if format_url in urls:
 | 
			
		||||
                        continue
 | 
			
		||||
                    urls.add(format_url)
 | 
			
		||||
                    format_id = mobj.group('format')
 | 
			
		||||
                    quality_id = SITE_QUALITIES.get(format_id, format_id)
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'url': format_url,
 | 
			
		||||
                        'format_id': quality_id,
 | 
			
		||||
                        'quality': quality(quality_id, format_url),
 | 
			
		||||
                        'vcodec': 'none' if quality_id == 'mp3' else None,
 | 
			
		||||
                    })
 | 
			
		||||
 | 
			
		||||
            API_QUALITIES = {
 | 
			
		||||
                'VideoMP4Low': 'mp4-low',
 | 
			
		||||
                'VideoWMV': 'wmv-mid',
 | 
			
		||||
                'VideoMP4Medium': 'mp4-mid',
 | 
			
		||||
                'VideoMP4High': 'mp4-high',
 | 
			
		||||
                'VideoWMVHQ': 'wmv-hq',
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            for format_id, q in API_QUALITIES.items():
 | 
			
		||||
                q_url = content_data.get(format_id)
 | 
			
		||||
                if not q_url or q_url in urls:
 | 
			
		||||
                    continue
 | 
			
		||||
                urls.add(q_url)
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': q_url,
 | 
			
		||||
                    'format_id': q,
 | 
			
		||||
                    'quality': quality(q, q_url),
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
            slides = content_data.get('Slides')
 | 
			
		||||
            zip_file = content_data.get('ZipFile')
 | 
			
		||||
 | 
			
		||||
            if not formats and not slides and not zip_file:
 | 
			
		||||
                raise ExtractorError(
 | 
			
		||||
                    'None of recording, slides or zip are available for %s' % content_path)
 | 
			
		||||
 | 
			
		||||
            subtitles = {}
 | 
			
		||||
            for caption in content_data.get('Captions', []):
 | 
			
		||||
                caption_url = caption.get('Url')
 | 
			
		||||
                if not caption_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                subtitles.setdefault(caption.get('Language', 'en'), []).append({
 | 
			
		||||
                    'url': caption_url,
 | 
			
		||||
                    'ext': 'vtt',
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            common = {
 | 
			
		||||
                'id': content_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'description': clean_html(content_data.get('Description') or content_data.get('Body')),
 | 
			
		||||
                'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'),
 | 
			
		||||
                'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
 | 
			
		||||
                'timestamp': parse_iso8601(content_data.get('PublishedDate')),
 | 
			
		||||
                'avg_rating': int_or_none(content_data.get('Rating')),
 | 
			
		||||
                'rating_count': int_or_none(content_data.get('RatingCount')),
 | 
			
		||||
                'view_count': int_or_none(content_data.get('Views')),
 | 
			
		||||
                'comment_count': int_or_none(content_data.get('CommentCount')),
 | 
			
		||||
                'subtitles': subtitles,
 | 
			
		||||
            }
 | 
			
		||||
            if is_session:
 | 
			
		||||
                speakers = []
 | 
			
		||||
                for s in content_data.get('Speakers', []):
 | 
			
		||||
                    speaker_name = s.get('FullName')
 | 
			
		||||
                    if not speaker_name:
 | 
			
		||||
                        continue
 | 
			
		||||
                    speakers.append(speaker_name)
 | 
			
		||||
 | 
			
		||||
                common.update({
 | 
			
		||||
                    'session_code': content_data.get('Code'),
 | 
			
		||||
                    'session_room': content_data.get('Room'),
 | 
			
		||||
                    'session_speakers': speakers,
 | 
			
		||||
                })
 | 
			
		||||
            else:
 | 
			
		||||
                authors = []
 | 
			
		||||
                for a in content_data.get('Authors', []):
 | 
			
		||||
                    author_name = a.get('DisplayName')
 | 
			
		||||
                    if not author_name:
 | 
			
		||||
                        continue
 | 
			
		||||
                    authors.append(author_name)
 | 
			
		||||
                common['authors'] = authors
 | 
			
		||||
 | 
			
		||||
            contents = []
 | 
			
		||||
 | 
			
		||||
            if slides:
 | 
			
		||||
                d = common.copy()
 | 
			
		||||
                d.update({'title': title + '-Slides', 'url': slides})
 | 
			
		||||
                contents.append(d)
 | 
			
		||||
 | 
			
		||||
            if zip_file:
 | 
			
		||||
                d = common.copy()
 | 
			
		||||
                d.update({'title': title + '-Zip', 'url': zip_file})
 | 
			
		||||
                contents.append(d)
 | 
			
		||||
 | 
			
		||||
            if formats:
 | 
			
		||||
                d = common.copy()
 | 
			
		||||
                d.update({'title': title, 'formats': formats})
 | 
			
		||||
                contents.append(d)
 | 
			
		||||
            return self.playlist_result(contents)
 | 
			
		||||
        else:
 | 
			
		||||
            return self._extract_list(content_path)
 | 
			
		||||
 
 | 
			
		||||
@@ -33,10 +33,17 @@ class ChaturbateIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
 | 
			
		||||
            r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
 | 
			
		||||
        m3u8_urls = []
 | 
			
		||||
 | 
			
		||||
        if not m3u8_formats:
 | 
			
		||||
        for m in re.finditer(
 | 
			
		||||
                r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
 | 
			
		||||
            m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
 | 
			
		||||
                'url').replace('_fast', '')
 | 
			
		||||
            for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
 | 
			
		||||
                if m3u8_url not in m3u8_urls:
 | 
			
		||||
                    m3u8_urls.append(m3u8_url)
 | 
			
		||||
 | 
			
		||||
        if not m3u8_urls:
 | 
			
		||||
            error = self._search_regex(
 | 
			
		||||
                [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
 | 
			
		||||
                 r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
 | 
			
		||||
@@ -50,7 +57,8 @@ class ChaturbateIE(InfoExtractor):
 | 
			
		||||
            raise ExtractorError('Unable to find stream URL')
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for m3u8_id, m3u8_url in m3u8_formats:
 | 
			
		||||
        for m3u8_url in m3u8_urls:
 | 
			
		||||
            m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
 | 
			
		||||
            formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                m3u8_url, video_id, ext='mp4',
 | 
			
		||||
                # ffmpeg skips segments for fast m3u8
 | 
			
		||||
 
 | 
			
		||||
@@ -1,97 +1,56 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
    compat_HTTPError,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    HEADRequest,
 | 
			
		||||
    remove_end,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CloudyIE(InfoExtractor):
 | 
			
		||||
    _IE_DESC = 'cloudy.ec'
 | 
			
		||||
    _VALID_URL = r'''(?x)
 | 
			
		||||
        https?://(?:www\.)?cloudy\.ec/
 | 
			
		||||
        (?:v/|embed\.php\?id=)
 | 
			
		||||
        (?P<id>[A-Za-z0-9]+)
 | 
			
		||||
        '''
 | 
			
		||||
    _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
 | 
			
		||||
    _API_URL = 'http://www.cloudy.ec/api/player.api.php'
 | 
			
		||||
    _MAX_TRIES = 2
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://www.cloudy.ec/v/af511e2527aac',
 | 
			
		||||
        'md5': '5cb253ace826a42f35b4740539bedf07',
 | 
			
		||||
        'md5': '29832b05028ead1b58be86bf319397ca',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'af511e2527aac',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Funny Cats and Animals Compilation june 2013',
 | 
			
		||||
            'upload_date': '20130913',
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
 | 
			
		||||
 | 
			
		||||
        if try_num > self._MAX_TRIES - 1:
 | 
			
		||||
            raise ExtractorError('Unable to extract video URL', expected=True)
 | 
			
		||||
 | 
			
		||||
        form = {
 | 
			
		||||
            'file': video_id,
 | 
			
		||||
            'key': file_key,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if error_url:
 | 
			
		||||
            form.update({
 | 
			
		||||
                'numOfErrors': try_num,
 | 
			
		||||
                'errorCode': '404',
 | 
			
		||||
                'errorUrl': error_url,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        player_data = self._download_webpage(
 | 
			
		||||
            self._API_URL, video_id, 'Downloading player data', query=form)
 | 
			
		||||
        data = compat_parse_qs(player_data)
 | 
			
		||||
 | 
			
		||||
        try_num += 1
 | 
			
		||||
 | 
			
		||||
        if 'error' in data:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
        title = data.get('title', [None])[0]
 | 
			
		||||
        if title:
 | 
			
		||||
            title = remove_end(title, '&asdasdas').strip()
 | 
			
		||||
 | 
			
		||||
        video_url = data.get('url', [None])[0]
 | 
			
		||||
 | 
			
		||||
        if video_url:
 | 
			
		||||
            try:
 | 
			
		||||
                self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
 | 
			
		||||
            except ExtractorError as e:
 | 
			
		||||
                if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
 | 
			
		||||
                    self.report_warning('Invalid video URL, requesting another', video_id)
 | 
			
		||||
                    return self._extract_video(video_id, file_key, video_url, try_num)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        url = self._EMBED_URL % video_id
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id)
 | 
			
		||||
 | 
			
		||||
        file_key = self._search_regex(
 | 
			
		||||
            [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
 | 
			
		||||
            webpage, 'file_key')
 | 
			
		||||
        info = self._parse_html5_media_entries(url, webpage, video_id)[0]
 | 
			
		||||
 | 
			
		||||
        return self._extract_video(video_id, file_key)
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
 | 
			
		||||
 | 
			
		||||
        if webpage:
 | 
			
		||||
            info.update({
 | 
			
		||||
                'title': self._search_regex(
 | 
			
		||||
                    r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
 | 
			
		||||
                'upload_date': unified_strdate(self._search_regex(
 | 
			
		||||
                    r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
 | 
			
		||||
                    'upload date', fatal=False)),
 | 
			
		||||
                'view_count': str_to_int(self._search_regex(
 | 
			
		||||
                    r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        if not info.get('title'):
 | 
			
		||||
            info['title'] = video_id
 | 
			
		||||
 | 
			
		||||
        info['id'] = video_id
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,4 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import base64
 | 
			
		||||
@@ -547,6 +548,34 @@ class InfoExtractor(object):
 | 
			
		||||
 | 
			
		||||
        return encoding
 | 
			
		||||
 | 
			
		||||
    def __check_blocked(self, content):
 | 
			
		||||
        first_block = content[:512]
 | 
			
		||||
        if ('<title>Access to this site is blocked</title>' in content and
 | 
			
		||||
                'Websense' in first_block):
 | 
			
		||||
            msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
 | 
			
		||||
            blocked_iframe = self._html_search_regex(
 | 
			
		||||
                r'<iframe src="([^"]+)"', content,
 | 
			
		||||
                'Websense information URL', default=None)
 | 
			
		||||
            if blocked_iframe:
 | 
			
		||||
                msg += ' Visit %s for more details' % blocked_iframe
 | 
			
		||||
            raise ExtractorError(msg, expected=True)
 | 
			
		||||
        if '<title>The URL you requested has been blocked</title>' in first_block:
 | 
			
		||||
            msg = (
 | 
			
		||||
                'Access to this webpage has been blocked by Indian censorship. '
 | 
			
		||||
                'Use a VPN or proxy server (with --proxy) to route around it.')
 | 
			
		||||
            block_msg = self._html_search_regex(
 | 
			
		||||
                r'</h1><p>(.*?)</p>',
 | 
			
		||||
                content, 'block message', default=None)
 | 
			
		||||
            if block_msg:
 | 
			
		||||
                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
 | 
			
		||||
            raise ExtractorError(msg, expected=True)
 | 
			
		||||
        if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
 | 
			
		||||
                'blocklist.rkn.gov.ru' in content):
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                'Access to this webpage has been blocked by decision of the Russian government. '
 | 
			
		||||
                'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
 | 
			
		||||
        content_type = urlh.headers.get('Content-Type', '')
 | 
			
		||||
        webpage_bytes = urlh.read()
 | 
			
		||||
@@ -588,25 +617,7 @@ class InfoExtractor(object):
 | 
			
		||||
        except LookupError:
 | 
			
		||||
            content = webpage_bytes.decode('utf-8', 'replace')
 | 
			
		||||
 | 
			
		||||
        if ('<title>Access to this site is blocked</title>' in content and
 | 
			
		||||
                'Websense' in content[:512]):
 | 
			
		||||
            msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
 | 
			
		||||
            blocked_iframe = self._html_search_regex(
 | 
			
		||||
                r'<iframe src="([^"]+)"', content,
 | 
			
		||||
                'Websense information URL', default=None)
 | 
			
		||||
            if blocked_iframe:
 | 
			
		||||
                msg += ' Visit %s for more details' % blocked_iframe
 | 
			
		||||
            raise ExtractorError(msg, expected=True)
 | 
			
		||||
        if '<title>The URL you requested has been blocked</title>' in content[:512]:
 | 
			
		||||
            msg = (
 | 
			
		||||
                'Access to this webpage has been blocked by Indian censorship. '
 | 
			
		||||
                'Use a VPN or proxy server (with --proxy) to route around it.')
 | 
			
		||||
            block_msg = self._html_search_regex(
 | 
			
		||||
                r'</h1><p>(.*?)</p>',
 | 
			
		||||
                content, 'block message', default=None)
 | 
			
		||||
            if block_msg:
 | 
			
		||||
                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
 | 
			
		||||
            raise ExtractorError(msg, expected=True)
 | 
			
		||||
        self.__check_blocked(content)
 | 
			
		||||
 | 
			
		||||
        return content
 | 
			
		||||
 | 
			
		||||
@@ -2169,18 +2180,24 @@ class InfoExtractor(object):
 | 
			
		||||
                    })
 | 
			
		||||
        return formats
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _find_jwplayer_data(webpage):
 | 
			
		||||
    def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            return mobj.group('options')
 | 
			
		||||
            try:
 | 
			
		||||
                jwplayer_data = self._parse_json(mobj.group('options'),
 | 
			
		||||
                                                 video_id=video_id,
 | 
			
		||||
                                                 transform_source=transform_source)
 | 
			
		||||
            except ExtractorError:
 | 
			
		||||
                pass
 | 
			
		||||
            else:
 | 
			
		||||
                if isinstance(jwplayer_data, dict):
 | 
			
		||||
                    return jwplayer_data
 | 
			
		||||
 | 
			
		||||
    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
 | 
			
		||||
        jwplayer_data = self._parse_json(
 | 
			
		||||
            self._find_jwplayer_data(webpage), video_id,
 | 
			
		||||
            transform_source=js_to_json)
 | 
			
		||||
        jwplayer_data = self._find_jwplayer_data(
 | 
			
		||||
            webpage, video_id, transform_source=js_to_json)
 | 
			
		||||
        return self._parse_jwplayer_data(
 | 
			
		||||
            jwplayer_data, video_id, *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -9,13 +9,14 @@ from ..compat import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    remove_end,
 | 
			
		||||
    extract_attributes,
 | 
			
		||||
    mimetype2ext,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    extract_attributes,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    mimetype2ext,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
    remove_end,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -66,6 +67,16 @@ class CondeNastIE(InfoExtractor):
 | 
			
		||||
            'upload_date': '20130314',
 | 
			
		||||
            'timestamp': 1363219200,
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '58d1865bfd2e6126e2000015',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'The Only True Surprise? Trump’s an Idiot',
 | 
			
		||||
            'uploader': 'gq',
 | 
			
		||||
            'upload_date': '20170321',
 | 
			
		||||
            'timestamp': 1490126427,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # JS embed
 | 
			
		||||
        'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
 | 
			
		||||
@@ -114,26 +125,33 @@ class CondeNastIE(InfoExtractor):
 | 
			
		||||
            })
 | 
			
		||||
        video_id = query['videoId']
 | 
			
		||||
        video_info = None
 | 
			
		||||
        info_page = self._download_webpage(
 | 
			
		||||
        info_page = self._download_json(
 | 
			
		||||
            'http://player.cnevids.com/player/video.js',
 | 
			
		||||
            video_id, 'Downloading video info', query=query, fatal=False)
 | 
			
		||||
            video_id, 'Downloading video info', fatal=False, query=query)
 | 
			
		||||
        if info_page:
 | 
			
		||||
            video_info = self._parse_json(self._search_regex(
 | 
			
		||||
                r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
 | 
			
		||||
        else:
 | 
			
		||||
            video_info = info_page.get('video')
 | 
			
		||||
        if not video_info:
 | 
			
		||||
            info_page = self._download_webpage(
 | 
			
		||||
                'http://player.cnevids.com/player/loader.js',
 | 
			
		||||
                video_id, 'Downloading loader info', query=query)
 | 
			
		||||
            video_info = self._parse_json(self._search_regex(
 | 
			
		||||
                r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
 | 
			
		||||
            video_info = self._parse_json(
 | 
			
		||||
                self._search_regex(
 | 
			
		||||
                    r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
 | 
			
		||||
                video_id, transform_source=js_to_json)['video']
 | 
			
		||||
 | 
			
		||||
        title = video_info['title']
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for fdata in video_info.get('sources', [{}])[0]:
 | 
			
		||||
        for fdata in video_info['sources']:
 | 
			
		||||
            src = fdata.get('src')
 | 
			
		||||
            if not src:
 | 
			
		||||
                continue
 | 
			
		||||
            ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
 | 
			
		||||
            if ext == 'm3u8':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    src, video_id, 'mp4', entry_protocol='m3u8_native',
 | 
			
		||||
                    m3u8_id='hls', fatal=False))
 | 
			
		||||
                continue
 | 
			
		||||
            quality = fdata.get('quality')
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': ext + ('-%s' % quality if quality else ''),
 | 
			
		||||
@@ -169,7 +187,6 @@ class CondeNastIE(InfoExtractor):
 | 
			
		||||
                path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
 | 
			
		||||
            url_type = 'embed'
 | 
			
		||||
 | 
			
		||||
        self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
 | 
			
		||||
        webpage = self._download_webpage(url, item_id)
 | 
			
		||||
 | 
			
		||||
        if url_type == 'series':
 | 
			
		||||
 
 | 
			
		||||
@@ -390,7 +390,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 | 
			
		||||
        else:
 | 
			
		||||
            webpage_url = 'http://www.' + mobj.group('url')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            self._add_skip_wall(webpage_url), video_id,
 | 
			
		||||
            headers=self.geo_verification_headers())
 | 
			
		||||
        note_m = self._html_search_regex(
 | 
			
		||||
            r'<div class="showmedia-trailer-notice">(.+?)</div>',
 | 
			
		||||
            webpage, 'trailer-notice', default='')
 | 
			
		||||
@@ -565,7 +567,9 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        show_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(self._add_skip_wall(url), show_id)
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            self._add_skip_wall(url), show_id,
 | 
			
		||||
            headers=self.geo_verification_headers())
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
 | 
			
		||||
            webpage, 'title')
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,8 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
@@ -49,6 +51,48 @@ class CuriosityStreamBaseIE(InfoExtractor):
 | 
			
		||||
        limelight_media_id = media['limelight_media_id']
 | 
			
		||||
        title = media['title']
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for encoding in media.get('encodings', []):
 | 
			
		||||
            m3u8_url = encoding.get('master_playlist_url')
 | 
			
		||||
            if m3u8_url:
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    m3u8_url, video_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                    m3u8_id='hls', fatal=False))
 | 
			
		||||
            encoding_url = encoding.get('url')
 | 
			
		||||
            file_url = encoding.get('file_url')
 | 
			
		||||
            if not encoding_url and not file_url:
 | 
			
		||||
                continue
 | 
			
		||||
            f = {
 | 
			
		||||
                'width': int_or_none(encoding.get('width')),
 | 
			
		||||
                'height': int_or_none(encoding.get('height')),
 | 
			
		||||
                'vbr': int_or_none(encoding.get('video_bitrate')),
 | 
			
		||||
                'abr': int_or_none(encoding.get('audio_bitrate')),
 | 
			
		||||
                'filesize': int_or_none(encoding.get('size_in_bytes')),
 | 
			
		||||
                'vcodec': encoding.get('video_codec'),
 | 
			
		||||
                'acodec': encoding.get('audio_codec'),
 | 
			
		||||
                'container': encoding.get('container_type'),
 | 
			
		||||
            }
 | 
			
		||||
            for f_url in (encoding_url, file_url):
 | 
			
		||||
                if not f_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                fmt = f.copy()
 | 
			
		||||
                rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
 | 
			
		||||
                if rtmp:
 | 
			
		||||
                    fmt.update({
 | 
			
		||||
                        'url': rtmp.group('url'),
 | 
			
		||||
                        'play_path': rtmp.group('playpath'),
 | 
			
		||||
                        'app': rtmp.group('app'),
 | 
			
		||||
                        'ext': 'flv',
 | 
			
		||||
                        'format_id': 'rtmp',
 | 
			
		||||
                    })
 | 
			
		||||
                else:
 | 
			
		||||
                    fmt.update({
 | 
			
		||||
                        'url': f_url,
 | 
			
		||||
                        'format_id': 'http',
 | 
			
		||||
                    })
 | 
			
		||||
                formats.append(fmt)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        for closed_caption in media.get('closed_captions', []):
 | 
			
		||||
            sub_url = closed_caption.get('file')
 | 
			
		||||
@@ -60,16 +104,14 @@ class CuriosityStreamBaseIE(InfoExtractor):
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': 'limelight:media:' + limelight_media_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': media.get('description'),
 | 
			
		||||
            'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
 | 
			
		||||
            'duration': int_or_none(media.get('duration')),
 | 
			
		||||
            'tags': media.get('tags'),
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
            'ie_key': 'LimelightMedia',
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -78,14 +120,12 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://app.curiositystream.com/video/2',
 | 
			
		||||
        'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
 | 
			
		||||
        'md5': '262bb2f257ff301115f1973540de8983',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'How Did You Develop The Internet?',
 | 
			
		||||
            'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
 | 
			
		||||
            'timestamp': 1448388615,
 | 
			
		||||
            'upload_date': '20151124',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -105,7 +145,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
 | 
			
		||||
            'title': 'Curious Minds: The Internet',
 | 
			
		||||
            'description': 'How is the internet shaping our lives in the 21st Century?',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 17,
 | 
			
		||||
        'playlist_mincount': 12,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -82,6 +82,11 @@ class CWTVIE(InfoExtractor):
 | 
			
		||||
                            'url': quality_url,
 | 
			
		||||
                            'tbr': tbr,
 | 
			
		||||
                        })
 | 
			
		||||
        video_metadata = video_data['assetFields']
 | 
			
		||||
        ism_url = video_metadata.get('smoothStreamingUrl')
 | 
			
		||||
        if ism_url:
 | 
			
		||||
            formats.extend(self._extract_ism_formats(
 | 
			
		||||
                ism_url, video_id, ism_id='mss', fatal=False))
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        thumbnails = [{
 | 
			
		||||
@@ -90,8 +95,6 @@ class CWTVIE(InfoExtractor):
 | 
			
		||||
            'height': image.get('height'),
 | 
			
		||||
        } for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
 | 
			
		||||
 | 
			
		||||
        video_metadata = video_data['assetFields']
 | 
			
		||||
 | 
			
		||||
        subtitles = {
 | 
			
		||||
            'en': [{
 | 
			
		||||
                'url': video_metadata['UnicornCcUrl'],
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										59
									
								
								youtube_dl/extractor/discoveryvr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								youtube_dl/extractor/discoveryvr.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,59 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import parse_duration
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DiscoveryVRIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
 | 
			
		||||
        'md5': '32b1929798c464a54356378b7912eca4',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'discovery-vr-an-introduction',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Discovery VR - An Introduction',
 | 
			
		||||
            'description': 'md5:80d418a10efb8899d9403e61d8790f06',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        bootstrap_data = self._search_regex(
 | 
			
		||||
            r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
 | 
			
		||||
            webpage, 'bootstrap data')
 | 
			
		||||
        bootstrap_data = self._parse_json(
 | 
			
		||||
            bootstrap_data.encode('utf-8').decode('unicode_escape'),
 | 
			
		||||
            display_id)
 | 
			
		||||
        videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
 | 
			
		||||
        video_data = next(video for video in videos if video.get('slug') == display_id)
 | 
			
		||||
 | 
			
		||||
        series = video_data.get('showTitle')
 | 
			
		||||
        title = episode = video_data.get('title') or series
 | 
			
		||||
        if series and series != title:
 | 
			
		||||
            title = '%s - %s' % (series, title)
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
 | 
			
		||||
            f_url = video_data.get(f)
 | 
			
		||||
            if not f_url:
 | 
			
		||||
                continue
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': format_id,
 | 
			
		||||
                'url': f_url,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': display_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': video_data.get('description'),
 | 
			
		||||
            'thumbnail': video_data.get('thumbnail'),
 | 
			
		||||
            'duration': parse_duration(video_data.get('runTime')),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'episode': episode,
 | 
			
		||||
            'series': series,
 | 
			
		||||
        }
 | 
			
		||||
@@ -19,6 +19,7 @@ from .acast import (
 | 
			
		||||
    ACastChannelIE,
 | 
			
		||||
)
 | 
			
		||||
from .addanime import AddAnimeIE
 | 
			
		||||
from .adn import ADNIE
 | 
			
		||||
from .adobetv import (
 | 
			
		||||
    AdobeTVIE,
 | 
			
		||||
    AdobeTVShowIE,
 | 
			
		||||
@@ -71,6 +72,7 @@ from .arte import (
 | 
			
		||||
)
 | 
			
		||||
from .atresplayer import AtresPlayerIE
 | 
			
		||||
from .atttechchannel import ATTTechChannelIE
 | 
			
		||||
from .atvat import ATVAtIE
 | 
			
		||||
from .audimedia import AudiMediaIE
 | 
			
		||||
from .audioboom import AudioBoomIE
 | 
			
		||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
 | 
			
		||||
@@ -163,7 +165,10 @@ from .ccc import CCCIE
 | 
			
		||||
from .ccma import CCMAIE
 | 
			
		||||
from .cctv import CCTVIE
 | 
			
		||||
from .cda import CDAIE
 | 
			
		||||
from .ceskatelevize import CeskaTelevizeIE
 | 
			
		||||
from .ceskatelevize import (
 | 
			
		||||
    CeskaTelevizeIE,
 | 
			
		||||
    CeskaTelevizePoradyIE,
 | 
			
		||||
)
 | 
			
		||||
from .channel9 import Channel9IE
 | 
			
		||||
from .charlierose import CharlieRoseIE
 | 
			
		||||
from .chaturbate import ChaturbateIE
 | 
			
		||||
@@ -271,6 +276,7 @@ from .discoverygo import (
 | 
			
		||||
    DiscoveryGoPlaylistIE,
 | 
			
		||||
)
 | 
			
		||||
from .discoverynetworks import DiscoveryNetworksDeIE
 | 
			
		||||
from .discoveryvr import DiscoveryVRIE
 | 
			
		||||
from .disney import DisneyIE
 | 
			
		||||
from .dispeak import DigitallySpeakingIE
 | 
			
		||||
from .dropbox import DropboxIE
 | 
			
		||||
@@ -535,6 +541,7 @@ from .mangomolo import (
 | 
			
		||||
)
 | 
			
		||||
from .matchtv import MatchTVIE
 | 
			
		||||
from .mdr import MDRIE
 | 
			
		||||
from .medici import MediciIE
 | 
			
		||||
from .meipai import MeipaiIE
 | 
			
		||||
from .melonvod import MelonVODIE
 | 
			
		||||
from .meta import METAIE
 | 
			
		||||
@@ -727,6 +734,10 @@ from .orf import (
 | 
			
		||||
    ORFFM4IE,
 | 
			
		||||
    ORFIPTVIE,
 | 
			
		||||
)
 | 
			
		||||
from .packtpub import (
 | 
			
		||||
    PacktPubIE,
 | 
			
		||||
    PacktPubCourseIE,
 | 
			
		||||
)
 | 
			
		||||
from .pandatv import PandaTVIE
 | 
			
		||||
from .pandoratv import PandoraTVIE
 | 
			
		||||
from .parliamentliveuk import ParliamentLiveUKIE
 | 
			
		||||
@@ -796,7 +807,7 @@ from .radiojavan import RadioJavanIE
 | 
			
		||||
from .radiobremen import RadioBremenIE
 | 
			
		||||
from .radiofrance import RadioFranceIE
 | 
			
		||||
from .rai import (
 | 
			
		||||
    RaiTVIE,
 | 
			
		||||
    RaiPlayIE,
 | 
			
		||||
    RaiIE,
 | 
			
		||||
)
 | 
			
		||||
from .rbmaradio import RBMARadioIE
 | 
			
		||||
@@ -827,7 +838,11 @@ from .rozhlas import RozhlasIE
 | 
			
		||||
from .rtbf import RTBFIE
 | 
			
		||||
from .rte import RteIE, RteRadioIE
 | 
			
		||||
from .rtlnl import RtlNlIE
 | 
			
		||||
from .rtl2 import RTL2IE
 | 
			
		||||
from .rtl2 import (
 | 
			
		||||
    RTL2IE,
 | 
			
		||||
    RTL2YouIE,
 | 
			
		||||
    RTL2YouSeriesIE,
 | 
			
		||||
)
 | 
			
		||||
from .rtp import RTPIE
 | 
			
		||||
from .rts import RTSIE
 | 
			
		||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
 | 
			
		||||
@@ -969,6 +984,7 @@ from .theplatform import (
 | 
			
		||||
from .thescene import TheSceneIE
 | 
			
		||||
from .thesixtyone import TheSixtyOneIE
 | 
			
		||||
from .thestar import TheStarIE
 | 
			
		||||
from .thesun import TheSunIE
 | 
			
		||||
from .theweatherchannel import TheWeatherChannelIE
 | 
			
		||||
from .thisamericanlife import ThisAmericanLifeIE
 | 
			
		||||
from .thisav import ThisAVIE
 | 
			
		||||
@@ -1017,6 +1033,7 @@ from .tv2 import (
 | 
			
		||||
)
 | 
			
		||||
from .tv3 import TV3IE
 | 
			
		||||
from .tv4 import TV4IE
 | 
			
		||||
from .tv5mondeplus import TV5MondePlusIE
 | 
			
		||||
from .tva import TVAIE
 | 
			
		||||
from .tvanouvelles import (
 | 
			
		||||
    TVANouvellesIE,
 | 
			
		||||
@@ -1176,6 +1193,11 @@ from .voxmedia import VoxMediaIE
 | 
			
		||||
from .vporn import VpornIE
 | 
			
		||||
from .vrt import VRTIE
 | 
			
		||||
from .vrak import VrakIE
 | 
			
		||||
from .vrv import (
 | 
			
		||||
    VRVIE,
 | 
			
		||||
    VRVSeriesIE,
 | 
			
		||||
)
 | 
			
		||||
from .vshare import VShareIE
 | 
			
		||||
from .medialaan import MedialaanIE
 | 
			
		||||
from .vube import VubeIE
 | 
			
		||||
from .vuclip import VuClipIE
 | 
			
		||||
 
 | 
			
		||||
@@ -54,7 +54,7 @@ class EyedoTVIE(InfoExtractor):
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'formats': self._extract_m3u8_formats(
 | 
			
		||||
                m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
 | 
			
		||||
                m3u8_url, video_id, 'mp4', 'm3u8_native'),
 | 
			
		||||
            'description': xpath_text(video_data, _add_ns('Description')),
 | 
			
		||||
            'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
 | 
			
		||||
            'uploader': xpath_text(video_data, _add_ns('Createur')),
 | 
			
		||||
 
 | 
			
		||||
@@ -47,9 +47,12 @@ class FOXIE(AdobePassIE):
 | 
			
		||||
            resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
 | 
			
		||||
            query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
        info = self._search_json_ld(webpage, video_id, fatal=False)
 | 
			
		||||
        info.update({
 | 
			
		||||
            '_type': 'url_transparent',
 | 
			
		||||
            'ie_key': 'ThePlatform',
 | 
			
		||||
            'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
        }
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,8 @@ from __future__ import unicode_literals
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    extract_attributes,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -19,6 +20,7 @@ class FranceCultureIE(InfoExtractor):
 | 
			
		||||
            'title': 'Rendez-vous au pays des geeks',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'upload_date': '20140301',
 | 
			
		||||
            'timestamp': 1393642916,
 | 
			
		||||
            'vcodec': 'none',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
@@ -28,30 +30,34 @@ class FranceCultureIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
 | 
			
		||||
            webpage, 'video path')
 | 
			
		||||
        video_data = extract_attributes(self._search_regex(
 | 
			
		||||
            r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
 | 
			
		||||
            webpage, 'video data'))
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(webpage)
 | 
			
		||||
        video_url = video_data['data-asset-source']
 | 
			
		||||
        title = video_data.get('data-asset-title') or self._og_search_title(webpage)
 | 
			
		||||
 | 
			
		||||
        upload_date = unified_strdate(self._search_regex(
 | 
			
		||||
            '(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
 | 
			
		||||
            webpage, 'upload date', fatal=False))
 | 
			
		||||
        description = self._html_search_regex(
 | 
			
		||||
            r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
 | 
			
		||||
            webpage, 'description', default=None)
 | 
			
		||||
        thumbnail = self._search_regex(
 | 
			
		||||
            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
 | 
			
		||||
            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
 | 
			
		||||
            webpage, 'thumbnail', fatal=False)
 | 
			
		||||
        uploader = self._html_search_regex(
 | 
			
		||||
            r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
 | 
			
		||||
            r'(?s)<span class="author">(.*?)</span>',
 | 
			
		||||
            webpage, 'uploader', default=None)
 | 
			
		||||
        vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
 | 
			
		||||
        ext = determine_ext(video_url.lower())
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': display_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'vcodec': vcodec,
 | 
			
		||||
            'ext': ext,
 | 
			
		||||
            'vcodec': 'none' if ext == 'mp3' else None,
 | 
			
		||||
            'uploader': uploader,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'timestamp': int_or_none(video_data.get('data-asset-created-date')),
 | 
			
		||||
            'duration': int_or_none(video_data.get('data-duration')),
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -56,9 +56,8 @@ class FreshLiveIE(InfoExtractor):
 | 
			
		||||
        is_live = info.get('liveStreamUrl') is not None
 | 
			
		||||
 | 
			
		||||
        formats = self._extract_m3u8_formats(
 | 
			
		||||
            stream_url, video_id, ext='mp4',
 | 
			
		||||
            entry_protocol='m3u8' if is_live else 'm3u8_native',
 | 
			
		||||
            m3u8_id='hls')
 | 
			
		||||
            stream_url, video_id, 'mp4',
 | 
			
		||||
            'm3u8_native', m3u8_id='hls')
 | 
			
		||||
 | 
			
		||||
        if is_live:
 | 
			
		||||
            title = self._live_title(title)
 | 
			
		||||
 
 | 
			
		||||
@@ -7,9 +7,9 @@ from ..compat import (
 | 
			
		||||
    compat_urllib_parse_unquote_plus,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    urlencode_postdata
 | 
			
		||||
@@ -17,34 +17,26 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FunimationIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
 | 
			
		||||
 | 
			
		||||
    _NETRC_MACHINE = 'funimation'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.funimation.com/shows/air/videos/official/breeze',
 | 
			
		||||
        'url': 'https://www.funimation.com/shows/hacksign/role-play/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '658',
 | 
			
		||||
            'display_id': 'breeze',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Air - 1 - Breeze',
 | 
			
		||||
            'description': 'md5:1769f43cd5fc130ace8fd87232207892',
 | 
			
		||||
            'thumbnail': r're:https?://.*\.jpg',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '31128',
 | 
			
		||||
            'id': '91144',
 | 
			
		||||
            'display_id': 'role-play',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': '.hack//SIGN - 1 - Role Play',
 | 
			
		||||
            'title': '.hack//SIGN - Role Play',
 | 
			
		||||
            'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
 | 
			
		||||
            'thumbnail': r're:https?://.*\.jpg',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Access without user interaction is forbidden by CloudFlare',
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
 | 
			
		||||
        'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '9635',
 | 
			
		||||
            'display_id': 'broadcast-dub-preview',
 | 
			
		||||
@@ -54,25 +46,13 @@ class FunimationIE(InfoExtractor):
 | 
			
		||||
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Access without user interaction is forbidden by CloudFlare',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    _LOGIN_URL = 'http://www.funimation.com/login'
 | 
			
		||||
 | 
			
		||||
    def _download_webpage(self, *args, **kwargs):
 | 
			
		||||
        try:
 | 
			
		||||
            return super(FunimationIE, self)._download_webpage(*args, **kwargs)
 | 
			
		||||
        except ExtractorError as ee:
 | 
			
		||||
            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
 | 
			
		||||
                response = ee.cause.read()
 | 
			
		||||
                if b'>Please complete the security check to access<' in response:
 | 
			
		||||
                    raise ExtractorError(
 | 
			
		||||
                        'Access to funimation.com is blocked by CloudFlare. '
 | 
			
		||||
                        'Please browse to http://www.funimation.com/, solve '
 | 
			
		||||
                        'the reCAPTCHA, export browser cookies to a text file,'
 | 
			
		||||
                        ' and then try again with --cookies YOUR_COOKIE_FILE.',
 | 
			
		||||
                        expected=True)
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
    def _extract_cloudflare_session_ua(self, url):
 | 
			
		||||
        ci_session_cookie = self._get_cookies(url).get('ci_session')
 | 
			
		||||
        if ci_session_cookie:
 | 
			
		||||
@@ -114,119 +94,74 @@ class FunimationIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        def _search_kane(name):
 | 
			
		||||
            return self._search_regex(
 | 
			
		||||
                r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
 | 
			
		||||
                webpage, name, default=None)
 | 
			
		||||
 | 
			
		||||
        title_data = self._parse_json(self._search_regex(
 | 
			
		||||
            r'TITLE_DATA\s*=\s*({[^}]+})',
 | 
			
		||||
            webpage, 'title data', default=''),
 | 
			
		||||
            display_id, js_to_json, fatal=False) or {}
 | 
			
		||||
 | 
			
		||||
        video_id = title_data.get('id') or self._search_regex([
 | 
			
		||||
            r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
 | 
			
		||||
            r'<iframe[^>]+src="/player/(\d+)"',
 | 
			
		||||
        ], webpage, 'video_id', default=None)
 | 
			
		||||
        if not video_id:
 | 
			
		||||
            player_url = self._html_search_meta([
 | 
			
		||||
                'al:web:url',
 | 
			
		||||
                'og:video:url',
 | 
			
		||||
                'og:video:secure_url',
 | 
			
		||||
            ], webpage, fatal=True)
 | 
			
		||||
            video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
 | 
			
		||||
 | 
			
		||||
        title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
 | 
			
		||||
        series = _search_kane('showName')
 | 
			
		||||
        if series:
 | 
			
		||||
            title = '%s - %s' % (series, title)
 | 
			
		||||
        description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            sources = self._download_json(
 | 
			
		||||
                'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
 | 
			
		||||
                video_id)['items']
 | 
			
		||||
        except ExtractorError as e:
 | 
			
		||||
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
 | 
			
		||||
                error = self._parse_json(e.cause.read(), video_id)['errors'][0]
 | 
			
		||||
                raise ExtractorError('%s said: %s' % (
 | 
			
		||||
                    self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        errors = []
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        ERRORS_MAP = {
 | 
			
		||||
            'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn',
 | 
			
		||||
            'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut',
 | 
			
		||||
            'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut',
 | 
			
		||||
            'ERROR_VIDEO_EXPIRED': 'videoExpired',
 | 
			
		||||
            'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable',
 | 
			
		||||
            'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription',
 | 
			
		||||
            'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription',
 | 
			
		||||
            'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding',
 | 
			
		||||
            'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN',
 | 
			
		||||
            'ERROR_STREAM_NOT_FOUND': 'streamNotFound',
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        USER_AGENTS = (
 | 
			
		||||
            # PC UA is served with m3u8 that provides some bonus lower quality formats
 | 
			
		||||
            ('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'),
 | 
			
		||||
            # Mobile UA allows to extract direct links and also does not fail when
 | 
			
		||||
            # PC UA fails with hulu error (e.g.
 | 
			
		||||
            # http://www.funimation.com/shows/hacksign/videos/official/role-play)
 | 
			
		||||
            ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        user_agent = self._extract_cloudflare_session_ua(url)
 | 
			
		||||
        if user_agent:
 | 
			
		||||
            USER_AGENTS = ((None, user_agent),)
 | 
			
		||||
 | 
			
		||||
        for kind, user_agent in USER_AGENTS:
 | 
			
		||||
            request = sanitized_Request(url)
 | 
			
		||||
            request.add_header('User-Agent', user_agent)
 | 
			
		||||
            webpage = self._download_webpage(
 | 
			
		||||
                request, display_id,
 | 
			
		||||
                'Downloading %s webpage' % kind if kind else 'Downloading webpage')
 | 
			
		||||
 | 
			
		||||
            playlist = self._parse_json(
 | 
			
		||||
                self._search_regex(
 | 
			
		||||
                    r'var\s+playersData\s*=\s*(\[.+?\]);\n',
 | 
			
		||||
                    webpage, 'players data'),
 | 
			
		||||
                display_id)[0]['playlist']
 | 
			
		||||
 | 
			
		||||
            items = next(item['items'] for item in playlist if item.get('items'))
 | 
			
		||||
            item = next(item for item in items if item.get('itemAK') == display_id)
 | 
			
		||||
 | 
			
		||||
            error_messages = {}
 | 
			
		||||
            video_error_messages = self._search_regex(
 | 
			
		||||
                r'var\s+videoErrorMessages\s*=\s*({.+?});\n',
 | 
			
		||||
                webpage, 'error messages', default=None)
 | 
			
		||||
            if video_error_messages:
 | 
			
		||||
                error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False)
 | 
			
		||||
                if error_messages_json:
 | 
			
		||||
                    for _, error in error_messages_json.items():
 | 
			
		||||
                        type_ = error.get('type')
 | 
			
		||||
                        description = error.get('description')
 | 
			
		||||
                        content = error.get('content')
 | 
			
		||||
                        if type_ == 'text' and description and content:
 | 
			
		||||
                            error_message = ERRORS_MAP.get(description)
 | 
			
		||||
                            if error_message:
 | 
			
		||||
                                error_messages[error_message] = content
 | 
			
		||||
 | 
			
		||||
            for video in item.get('videoSet', []):
 | 
			
		||||
                auth_token = video.get('authToken')
 | 
			
		||||
                if not auth_token:
 | 
			
		||||
                    continue
 | 
			
		||||
                funimation_id = video.get('FUNImationID') or video.get('videoId')
 | 
			
		||||
                preference = 1 if video.get('languageMode') == 'dub' else 0
 | 
			
		||||
                if not auth_token.startswith('?'):
 | 
			
		||||
                    auth_token = '?%s' % auth_token
 | 
			
		||||
                for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)):
 | 
			
		||||
                    format_url = video.get('%sUrl' % quality)
 | 
			
		||||
                    if not format_url:
 | 
			
		||||
                        continue
 | 
			
		||||
                    if not format_url.startswith(('http', '//')):
 | 
			
		||||
                        errors.append(format_url)
 | 
			
		||||
                        continue
 | 
			
		||||
                    if determine_ext(format_url) == 'm3u8':
 | 
			
		||||
                        formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                            format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native',
 | 
			
		||||
                            preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False))
 | 
			
		||||
                    else:
 | 
			
		||||
                        tbr = int_or_none(self._search_regex(
 | 
			
		||||
                            r'-(\d+)[Kk]', format_url, 'tbr', default=None))
 | 
			
		||||
                        formats.append({
 | 
			
		||||
                            'url': format_url + auth_token,
 | 
			
		||||
                            'format_id': '%s-http-%dp' % (funimation_id, height),
 | 
			
		||||
                            'height': height,
 | 
			
		||||
                            'tbr': tbr,
 | 
			
		||||
                            'preference': preference,
 | 
			
		||||
                        })
 | 
			
		||||
 | 
			
		||||
        if not formats and errors:
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '%s returned error: %s'
 | 
			
		||||
                % (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))),
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
        for source in sources:
 | 
			
		||||
            source_url = source.get('src')
 | 
			
		||||
            if not source_url:
 | 
			
		||||
                continue
 | 
			
		||||
            source_type = source.get('videoType') or determine_ext(source_url)
 | 
			
		||||
            if source_type == 'm3u8':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    source_url, video_id, 'mp4',
 | 
			
		||||
                    m3u8_id='hls', fatal=False))
 | 
			
		||||
            else:
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'format_id': source_type,
 | 
			
		||||
                    'url': source_url,
 | 
			
		||||
                })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        title = item['title']
 | 
			
		||||
        artist = item.get('artist')
 | 
			
		||||
        if artist:
 | 
			
		||||
            title = '%s - %s' % (artist, title)
 | 
			
		||||
        description = self._og_search_description(webpage) or item.get('description')
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl')
 | 
			
		||||
        video_id = item.get('itemId') or display_id
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            'series': series,
 | 
			
		||||
            'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
 | 
			
		||||
            'episode_number': int_or_none(title_data.get('episodeNum')),
 | 
			
		||||
            'episode': episode,
 | 
			
		||||
            'season_id': title_data.get('seriesId'),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -730,6 +730,21 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # YouTube <object> embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
 | 
			
		||||
            'md5': '516718101ec834f74318df76259fb3cc',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'msN87y-iEx0',
 | 
			
		||||
                'ext': 'webm',
 | 
			
		||||
                'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
 | 
			
		||||
                'upload_date': '20080526',
 | 
			
		||||
                'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
 | 
			
		||||
                'uploader': 'Christopher Sykes',
 | 
			
		||||
                'uploader_id': 'ChristopherJSykes',
 | 
			
		||||
            },
 | 
			
		||||
            'add_ie': ['Youtube'],
 | 
			
		||||
        },
 | 
			
		||||
        # Camtasia studio
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 | 
			
		||||
@@ -902,12 +917,13 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        },
 | 
			
		||||
        # LazyYT
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 | 
			
		||||
            'url': 'https://skiplagged.com/',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '1986',
 | 
			
		||||
                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 | 
			
		||||
                'id': 'skiplagged',
 | 
			
		||||
                'title': 'Skiplagged: The smart way to find cheap flights',
 | 
			
		||||
            },
 | 
			
		||||
            'playlist_mincount': 2,
 | 
			
		||||
            'playlist_mincount': 1,
 | 
			
		||||
            'add_ie': ['Youtube'],
 | 
			
		||||
        },
 | 
			
		||||
        # Cinchcast embed
 | 
			
		||||
        {
 | 
			
		||||
@@ -990,6 +1006,20 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # JWPlayer config passed as variable
 | 
			
		||||
            'url': 'http://www.txxx.com/videos/3326530/ariele/',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '3326530_hq',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'ARIELE | Tube Cup',
 | 
			
		||||
                'uploader': 'www.txxx.com',
 | 
			
		||||
                'age_limit': 18,
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        # rtl.nl embed
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 | 
			
		||||
@@ -1065,6 +1095,21 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            },
 | 
			
		||||
            'add_ie': ['Kaltura'],
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # Kaltura iframe embed
 | 
			
		||||
            'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
 | 
			
		||||
            'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '0_f2cfbpwy',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'I. M. Pei: A Centennial Celebration',
 | 
			
		||||
                'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
 | 
			
		||||
                'upload_date': '20170403',
 | 
			
		||||
                'uploader_id': 'batchUser',
 | 
			
		||||
                'timestamp': 1491232186,
 | 
			
		||||
            },
 | 
			
		||||
            'add_ie': ['Kaltura'],
 | 
			
		||||
        },
 | 
			
		||||
        # Eagle.Platform embed (generic URL)
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 | 
			
		||||
@@ -1908,6 +1953,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                data-video-url=|
 | 
			
		||||
                <embed[^>]+?src=|
 | 
			
		||||
                embedSWF\(?:\s*|
 | 
			
		||||
                <object[^>]+data=|
 | 
			
		||||
                new\s+SWFObject\(
 | 
			
		||||
            )
 | 
			
		||||
            (["\'])
 | 
			
		||||
@@ -2549,18 +2595,14 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                self._sort_formats(entry['formats'])
 | 
			
		||||
            return self.playlist_result(entries)
 | 
			
		||||
 | 
			
		||||
        jwplayer_data_str = self._find_jwplayer_data(webpage)
 | 
			
		||||
        if jwplayer_data_str:
 | 
			
		||||
            try:
 | 
			
		||||
                jwplayer_data = self._parse_json(
 | 
			
		||||
                    jwplayer_data_str, video_id, transform_source=js_to_json)
 | 
			
		||||
                info = self._parse_jwplayer_data(
 | 
			
		||||
                    jwplayer_data, video_id, require_title=False)
 | 
			
		||||
                if not info.get('title'):
 | 
			
		||||
                    info['title'] = video_title
 | 
			
		||||
                return info
 | 
			
		||||
            except ExtractorError:
 | 
			
		||||
                pass
 | 
			
		||||
        jwplayer_data = self._find_jwplayer_data(
 | 
			
		||||
            webpage, video_id, transform_source=js_to_json)
 | 
			
		||||
        if jwplayer_data:
 | 
			
		||||
            info = self._parse_jwplayer_data(
 | 
			
		||||
                jwplayer_data, video_id, require_title=False, base_url=url)
 | 
			
		||||
            if not info.get('title'):
 | 
			
		||||
                info['title'] = video_title
 | 
			
		||||
            return info
 | 
			
		||||
 | 
			
		||||
        def check_video(vurl):
 | 
			
		||||
            if YoutubeIE.suitable(vurl):
 | 
			
		||||
@@ -2635,11 +2677,14 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                    found = re.search(REDIRECT_REGEX, refresh_header)
 | 
			
		||||
            if found:
 | 
			
		||||
                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
 | 
			
		||||
                self.report_following_redirect(new_url)
 | 
			
		||||
                return {
 | 
			
		||||
                    '_type': 'url',
 | 
			
		||||
                    'url': new_url,
 | 
			
		||||
                }
 | 
			
		||||
                if new_url != url:
 | 
			
		||||
                    self.report_following_redirect(new_url)
 | 
			
		||||
                    return {
 | 
			
		||||
                        '_type': 'url',
 | 
			
		||||
                        'url': new_url,
 | 
			
		||||
                    }
 | 
			
		||||
                else:
 | 
			
		||||
                    found = None
 | 
			
		||||
 | 
			
		||||
        if not found:
 | 
			
		||||
            # twitter:player is a https URL to iframe player that may or may not
 | 
			
		||||
 
 | 
			
		||||
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_str
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    xpath_text,
 | 
			
		||||
    xpath_element,
 | 
			
		||||
@@ -14,14 +15,26 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class HBOBaseIE(InfoExtractor):
 | 
			
		||||
    _FORMATS_INFO = {
 | 
			
		||||
        'pro7': {
 | 
			
		||||
            'width': 1280,
 | 
			
		||||
            'height': 720,
 | 
			
		||||
        },
 | 
			
		||||
        '1920': {
 | 
			
		||||
            'width': 1280,
 | 
			
		||||
            'height': 720,
 | 
			
		||||
        },
 | 
			
		||||
        'pro6': {
 | 
			
		||||
            'width': 768,
 | 
			
		||||
            'height': 432,
 | 
			
		||||
        },
 | 
			
		||||
        '640': {
 | 
			
		||||
            'width': 768,
 | 
			
		||||
            'height': 432,
 | 
			
		||||
        },
 | 
			
		||||
        'pro5': {
 | 
			
		||||
            'width': 640,
 | 
			
		||||
            'height': 360,
 | 
			
		||||
        },
 | 
			
		||||
        'highwifi': {
 | 
			
		||||
            'width': 640,
 | 
			
		||||
            'height': 360,
 | 
			
		||||
@@ -78,6 +91,17 @@ class HBOBaseIE(InfoExtractor):
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        video_url.replace('.tar', '/base_index_w8.m3u8'),
 | 
			
		||||
                        video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
 | 
			
		||||
                elif source.tag == 'hls':
 | 
			
		||||
                    # #EXT-X-BYTERANGE is not supported by native hls downloader
 | 
			
		||||
                    # and ffmpeg (#10955)
 | 
			
		||||
                    # formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    #     video_url.replace('.tar', '/base_index.m3u8'),
 | 
			
		||||
                    #     video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
 | 
			
		||||
                    continue
 | 
			
		||||
                elif source.tag == 'dash':
 | 
			
		||||
                    formats.extend(self._extract_mpd_formats(
 | 
			
		||||
                        video_url.replace('.tar', '/manifest.mpd'),
 | 
			
		||||
                        video_id, mpd_id='dash', fatal=False))
 | 
			
		||||
                else:
 | 
			
		||||
                    format_info = self._FORMATS_INFO.get(source.tag, {})
 | 
			
		||||
                    formats.append({
 | 
			
		||||
@@ -112,10 +136,11 @@ class HBOBaseIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class HBOIE(HBOBaseIE):
 | 
			
		||||
    IE_NAME = 'hbo'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
 | 
			
		||||
        'md5': '1c33253f0c7782142c993c0ba62a8753',
 | 
			
		||||
        'md5': '2c6a6bc1222c7e91cb3334dad1746e5a',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1437839',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
@@ -131,11 +156,12 @@ class HBOIE(HBOBaseIE):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class HBOEpisodeIE(HBOBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
 | 
			
		||||
    IE_NAME = 'hbo:episode'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
 | 
			
		||||
        'md5': '689132b253cc0ab7434237fc3a293210',
 | 
			
		||||
        'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1439518',
 | 
			
		||||
            'display_id': 'ep-52-inside-the-episode',
 | 
			
		||||
@@ -147,16 +173,19 @@ class HBOEpisodeIE(HBOBaseIE):
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        path, display_id = re.match(self._VALID_URL, url).groups()
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
        content = self._download_json(
 | 
			
		||||
            'http://www.hbo.com/api/content/' + path, display_id)['content']
 | 
			
		||||
 | 
			
		||||
        video_id = self._search_regex(
 | 
			
		||||
            r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
 | 
			
		||||
            webpage, 'video ID', group='video_id')
 | 
			
		||||
        video_id = compat_str((content.get('parsed', {}).get(
 | 
			
		||||
            'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId'])
 | 
			
		||||
 | 
			
		||||
        info_dict = self._extract_from_id(video_id)
 | 
			
		||||
        info_dict['display_id'] = display_id
 | 
			
		||||
 
 | 
			
		||||
@@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
 | 
			
		||||
                    }],
 | 
			
		||||
                },
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
@@ -107,27 +108,37 @@ class KalturaIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _extract_url(webpage):
 | 
			
		||||
        # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
 | 
			
		||||
        mobj = (
 | 
			
		||||
            re.search(
 | 
			
		||||
                r"""(?xs)
 | 
			
		||||
                    kWidget\.(?:thumb)?[Ee]mbed\(
 | 
			
		||||
                    \{.*?
 | 
			
		||||
                        (?P<q1>['\"])wid(?P=q1)\s*:\s*
 | 
			
		||||
                        (?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
 | 
			
		||||
                        (?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
 | 
			
		||||
                        (?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
 | 
			
		||||
                        (?P<q1>['"])wid(?P=q1)\s*:\s*
 | 
			
		||||
                        (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
 | 
			
		||||
                        (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
 | 
			
		||||
                        (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
 | 
			
		||||
                """, webpage) or
 | 
			
		||||
            re.search(
 | 
			
		||||
                r'''(?xs)
 | 
			
		||||
                    (?P<q1>["\'])
 | 
			
		||||
                    (?P<q1>["'])
 | 
			
		||||
                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
 | 
			
		||||
                    (?P=q1).*?
 | 
			
		||||
                    (?:
 | 
			
		||||
                        entry_?[Ii]d|
 | 
			
		||||
                        (?P<q2>["\'])entry_?[Ii]d(?P=q2)
 | 
			
		||||
                        (?P<q2>["'])entry_?[Ii]d(?P=q2)
 | 
			
		||||
                    )\s*:\s*
 | 
			
		||||
                    (?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
 | 
			
		||||
                ''', webpage))
 | 
			
		||||
                    (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
 | 
			
		||||
                ''', webpage) or
 | 
			
		||||
            re.search(
 | 
			
		||||
                r'''(?xs)
 | 
			
		||||
                    <iframe[^>]+src=(?P<q1>["'])
 | 
			
		||||
                      (?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
 | 
			
		||||
                      (?:(?!(?P=q1)).)*
 | 
			
		||||
                      [?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
 | 
			
		||||
                    (?P=q1)
 | 
			
		||||
                ''', webpage)
 | 
			
		||||
        )
 | 
			
		||||
        if mobj:
 | 
			
		||||
            embed_info = mobj.groupdict()
 | 
			
		||||
            url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
 | 
			
		||||
 
 | 
			
		||||
@@ -62,13 +62,21 @@ class LimelightBaseIE(InfoExtractor):
 | 
			
		||||
                fmt = {
 | 
			
		||||
                    'url': stream_url,
 | 
			
		||||
                    'abr': float_or_none(stream.get('audioBitRate')),
 | 
			
		||||
                    'vbr': float_or_none(stream.get('videoBitRate')),
 | 
			
		||||
                    'fps': float_or_none(stream.get('videoFrameRate')),
 | 
			
		||||
                    'width': int_or_none(stream.get('videoWidthInPixels')),
 | 
			
		||||
                    'height': int_or_none(stream.get('videoHeightInPixels')),
 | 
			
		||||
                    'ext': ext,
 | 
			
		||||
                }
 | 
			
		||||
                rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
 | 
			
		||||
                width = int_or_none(stream.get('videoWidthInPixels'))
 | 
			
		||||
                height = int_or_none(stream.get('videoHeightInPixels'))
 | 
			
		||||
                vbr = float_or_none(stream.get('videoBitRate'))
 | 
			
		||||
                if width or height or vbr:
 | 
			
		||||
                    fmt.update({
 | 
			
		||||
                        'width': width,
 | 
			
		||||
                        'height': height,
 | 
			
		||||
                        'vbr': vbr,
 | 
			
		||||
                    })
 | 
			
		||||
                else:
 | 
			
		||||
                    fmt['vcodec'] = 'none'
 | 
			
		||||
                rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
 | 
			
		||||
                if rtmp:
 | 
			
		||||
                    format_id = 'rtmp'
 | 
			
		||||
                    if stream.get('videoBitRate'):
 | 
			
		||||
 
 | 
			
		||||
@@ -119,7 +119,8 @@ class LivestreamIE(InfoExtractor):
 | 
			
		||||
        m3u8_url = video_data.get('m3u8_url')
 | 
			
		||||
        if m3u8_url:
 | 
			
		||||
            formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
 | 
			
		||||
                m3u8_url, video_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                m3u8_id='hls', fatal=False))
 | 
			
		||||
 | 
			
		||||
        f4m_url = video_data.get('f4m_url')
 | 
			
		||||
        if f4m_url:
 | 
			
		||||
@@ -158,11 +159,11 @@ class LivestreamIE(InfoExtractor):
 | 
			
		||||
        if smil_url:
 | 
			
		||||
            formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
 | 
			
		||||
 | 
			
		||||
        entry_protocol = 'm3u8' if is_live else 'm3u8_native'
 | 
			
		||||
        m3u8_url = stream_info.get('m3u8_url')
 | 
			
		||||
        if m3u8_url:
 | 
			
		||||
            formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
 | 
			
		||||
                m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                m3u8_id='hls', fatal=False))
 | 
			
		||||
 | 
			
		||||
        rtsp_url = stream_info.get('rtsp_url')
 | 
			
		||||
        if rtsp_url:
 | 
			
		||||
@@ -276,7 +277,7 @@ class LivestreamOriginalIE(InfoExtractor):
 | 
			
		||||
            'view_count': view_count,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _extract_video_formats(self, video_data, video_id, entry_protocol):
 | 
			
		||||
    def _extract_video_formats(self, video_data, video_id):
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
        progressive_url = video_data.get('progressiveUrl')
 | 
			
		||||
@@ -289,7 +290,8 @@ class LivestreamOriginalIE(InfoExtractor):
 | 
			
		||||
        m3u8_url = video_data.get('httpUrl')
 | 
			
		||||
        if m3u8_url:
 | 
			
		||||
            formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
 | 
			
		||||
                m3u8_url, video_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                m3u8_id='hls', fatal=False))
 | 
			
		||||
 | 
			
		||||
        rtsp_url = video_data.get('rtspUrl')
 | 
			
		||||
        if rtsp_url:
 | 
			
		||||
@@ -340,11 +342,10 @@ class LivestreamOriginalIE(InfoExtractor):
 | 
			
		||||
                }
 | 
			
		||||
            video_data = self._download_json(stream_url, content_id)
 | 
			
		||||
            is_live = video_data.get('isLive')
 | 
			
		||||
            entry_protocol = 'm3u8' if is_live else 'm3u8_native'
 | 
			
		||||
            info.update({
 | 
			
		||||
                'id': content_id,
 | 
			
		||||
                'title': self._live_title(info['title']) if is_live else info['title'],
 | 
			
		||||
                'formats': self._extract_video_formats(video_data, content_id, entry_protocol),
 | 
			
		||||
                'formats': self._extract_video_formats(video_data, content_id),
 | 
			
		||||
                'is_live': is_live,
 | 
			
		||||
            })
 | 
			
		||||
            return info
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										70
									
								
								youtube_dl/extractor/medici.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								youtube_dl/extractor/medici.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,70 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    update_url_query,
 | 
			
		||||
    urlencode_postdata,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MediciIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
 | 
			
		||||
        'md5': '004c21bb0a57248085b6ff3fec72719d',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '3059',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
 | 
			
		||||
            'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'upload_date': '20170408',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        # Sets csrftoken cookie
 | 
			
		||||
        self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        MEDICI_URL = 'http://www.medici.tv/'
 | 
			
		||||
 | 
			
		||||
        data = self._download_json(
 | 
			
		||||
            MEDICI_URL, video_id,
 | 
			
		||||
            data=urlencode_postdata({
 | 
			
		||||
                'json': 'true',
 | 
			
		||||
                'page': '/%s' % video_id,
 | 
			
		||||
                'timezone_offset': -420,
 | 
			
		||||
            }), headers={
 | 
			
		||||
                'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
 | 
			
		||||
                'X-Requested-With': 'XMLHttpRequest',
 | 
			
		||||
                'Referer': MEDICI_URL,
 | 
			
		||||
                'Content-Type': 'application/x-www-form-urlencoded',
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        video = data['video']['videos']['video1']
 | 
			
		||||
 | 
			
		||||
        title = video.get('nom') or data['title']
 | 
			
		||||
 | 
			
		||||
        video_id = video.get('id') or video_id
 | 
			
		||||
        formats = self._extract_f4m_formats(
 | 
			
		||||
            update_url_query(video['url_akamai'], {
 | 
			
		||||
                'hdcore': '3.1.0',
 | 
			
		||||
                'plugin=aasp': '3.1.0.43.124',
 | 
			
		||||
            }), video_id, f4m_id='hds')
 | 
			
		||||
 | 
			
		||||
        description = data.get('meta_description')
 | 
			
		||||
        thumbnail = video.get('url_thumbnail') or data.get('main_image')
 | 
			
		||||
        upload_date = unified_strdate(data['video'].get('date'))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
@@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
 | 
			
		||||
        view_count = str_to_int(self._search_regex(
 | 
			
		||||
            [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
 | 
			
		||||
             r'/listeners/?">([0-9,.]+)</a>',
 | 
			
		||||
             r'm-tooltip=["\']([\d,.]+) plays'],
 | 
			
		||||
             r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
 | 
			
		||||
            webpage, 'play count', default=None))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
@@ -138,12 +138,12 @@ class MixcloudPlaylistBaseIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _get_user_description(self, page_content):
 | 
			
		||||
        return self._html_search_regex(
 | 
			
		||||
            r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
 | 
			
		||||
            r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
 | 
			
		||||
            page_content, 'user description', fatal=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MixcloudUserIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
 | 
			
		||||
    IE_NAME = 'mixcloud:user'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
@@ -151,7 +151,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'dholbach_uploads',
 | 
			
		||||
            'title': 'Daniel Holbach (uploads)',
 | 
			
		||||
            'description': 'md5:327af72d1efeb404a8216c27240d1370',
 | 
			
		||||
            'description': 'md5:def36060ac8747b3aabca54924897e47',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 11,
 | 
			
		||||
    }, {
 | 
			
		||||
@@ -159,7 +159,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'dholbach_uploads',
 | 
			
		||||
            'title': 'Daniel Holbach (uploads)',
 | 
			
		||||
            'description': 'md5:327af72d1efeb404a8216c27240d1370',
 | 
			
		||||
            'description': 'md5:def36060ac8747b3aabca54924897e47',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 11,
 | 
			
		||||
    }, {
 | 
			
		||||
@@ -167,7 +167,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'dholbach_favorites',
 | 
			
		||||
            'title': 'Daniel Holbach (favorites)',
 | 
			
		||||
            'description': 'md5:327af72d1efeb404a8216c27240d1370',
 | 
			
		||||
            'description': 'md5:def36060ac8747b3aabca54924897e47',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'playlist_items': '1-100',
 | 
			
		||||
@@ -178,7 +178,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'dholbach_listens',
 | 
			
		||||
            'title': 'Daniel Holbach (listens)',
 | 
			
		||||
            'description': 'md5:327af72d1efeb404a8216c27240d1370',
 | 
			
		||||
            'description': 'md5:def36060ac8747b3aabca54924897e47',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'playlist_items': '1-100',
 | 
			
		||||
@@ -216,7 +216,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
 | 
			
		||||
    IE_NAME = 'mixcloud:playlist'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
@@ -229,12 +229,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
        'playlist_mincount': 16,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'maxvibes_jazzcat-on-ness-radio',
 | 
			
		||||
            'title': 'Jazzcat on Ness Radio',
 | 
			
		||||
            'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 23
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -243,15 +238,16 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
        playlist_id = mobj.group('playlist')
 | 
			
		||||
        video_id = '%s_%s' % (user_id, playlist_id)
 | 
			
		||||
 | 
			
		||||
        profile = self._download_webpage(
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            url, user_id,
 | 
			
		||||
            note='Downloading playlist page',
 | 
			
		||||
            errnote='Unable to download playlist page')
 | 
			
		||||
 | 
			
		||||
        description = self._get_user_description(profile)
 | 
			
		||||
        playlist_title = self._html_search_regex(
 | 
			
		||||
            r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
 | 
			
		||||
            profile, 'playlist title')
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
 | 
			
		||||
            webpage, 'playlist title',
 | 
			
		||||
            default=None) or self._og_search_title(webpage, fatal=False)
 | 
			
		||||
        description = self._get_user_description(webpage)
 | 
			
		||||
 | 
			
		||||
        entries = OnDemandPagedList(
 | 
			
		||||
            functools.partial(
 | 
			
		||||
@@ -259,11 +255,11 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
                '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
 | 
			
		||||
            self._PAGE_SIZE)
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, video_id, playlist_title, description)
 | 
			
		||||
        return self.playlist_result(entries, video_id, title, description)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
 | 
			
		||||
    IE_NAME = 'mixcloud:stream'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
 
 | 
			
		||||
@@ -34,12 +34,6 @@ class NineCNineMediaStackIE(NineCNineMediaBaseIE):
 | 
			
		||||
        formats.extend(self._extract_f4m_formats(
 | 
			
		||||
            stack_base_url + 'f4m', stack_id,
 | 
			
		||||
            f4m_id='hds', fatal=False))
 | 
			
		||||
        mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
 | 
			
		||||
        if mp4_url:
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': mp4_url,
 | 
			
		||||
                'format_id': 'mp4',
 | 
			
		||||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
 
 | 
			
		||||
@@ -313,9 +313,9 @@ class NPOIE(NPOBaseIE):
 | 
			
		||||
 | 
			
		||||
class NPOLiveIE(NPOBaseIE):
 | 
			
		||||
    IE_NAME = 'npo.nl:live'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.npo.nl/live/npo-1',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'LI_NL1_4188102',
 | 
			
		||||
@@ -327,10 +327,13 @@ class NPOLiveIE(NPOBaseIE):
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.npo.nl/live',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        display_id = self._match_id(url) or 'npo-1'
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -176,7 +176,7 @@ class OdnoklassnikiIE(InfoExtractor):
 | 
			
		||||
            })
 | 
			
		||||
            return info
 | 
			
		||||
 | 
			
		||||
        quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
 | 
			
		||||
        quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd', 'full'))
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'url': f['url'],
 | 
			
		||||
 
 | 
			
		||||
@@ -75,51 +75,38 @@ class OpenloadIE(InfoExtractor):
 | 
			
		||||
            '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
 | 
			
		||||
            webpage, 'openload ID')
 | 
			
		||||
 | 
			
		||||
        video_url_chars = []
 | 
			
		||||
 | 
			
		||||
        first_char = ord(ol_id[0])
 | 
			
		||||
        key = first_char - 55
 | 
			
		||||
        maxKey = max(2, key)
 | 
			
		||||
        key = min(maxKey, len(ol_id) - 38)
 | 
			
		||||
        t = ol_id[key:key + 36]
 | 
			
		||||
 | 
			
		||||
        hashMap = {}
 | 
			
		||||
        v = ol_id.replace(t, '')
 | 
			
		||||
        h = 0
 | 
			
		||||
 | 
			
		||||
        while h < len(t):
 | 
			
		||||
            f = t[h:h + 3]
 | 
			
		||||
            i = int(f, 8)
 | 
			
		||||
            hashMap[h / 3] = i
 | 
			
		||||
            h += 3
 | 
			
		||||
 | 
			
		||||
        h = 0
 | 
			
		||||
        H = 0
 | 
			
		||||
        while h < len(v):
 | 
			
		||||
            B = ''
 | 
			
		||||
            C = ''
 | 
			
		||||
            if len(v) >= h + 2:
 | 
			
		||||
                B = v[h:h + 2]
 | 
			
		||||
            if len(v) >= h + 3:
 | 
			
		||||
                C = v[h:h + 3]
 | 
			
		||||
            i = int(B, 16)
 | 
			
		||||
            h += 2
 | 
			
		||||
            if H % 3 == 0:
 | 
			
		||||
                i = int(C, 8)
 | 
			
		||||
                h += 1
 | 
			
		||||
            elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60:
 | 
			
		||||
                i = int(C, 10)
 | 
			
		||||
                h += 1
 | 
			
		||||
            index = H % 7
 | 
			
		||||
 | 
			
		||||
            A = hashMap[index]
 | 
			
		||||
            i ^= 213
 | 
			
		||||
            i ^= A
 | 
			
		||||
            video_url_chars.append(compat_chr(i))
 | 
			
		||||
            H += 1
 | 
			
		||||
        decoded = ''
 | 
			
		||||
        a = ol_id[0:24]
 | 
			
		||||
        b = []
 | 
			
		||||
        for i in range(0, len(a), 8):
 | 
			
		||||
            b.append(int(a[i:i + 8] or '0', 16))
 | 
			
		||||
        ol_id = ol_id[24:]
 | 
			
		||||
        j = 0
 | 
			
		||||
        k = 0
 | 
			
		||||
        while j < len(ol_id):
 | 
			
		||||
            c = 128
 | 
			
		||||
            d = 0
 | 
			
		||||
            e = 0
 | 
			
		||||
            f = 0
 | 
			
		||||
            _more = True
 | 
			
		||||
            while _more:
 | 
			
		||||
                if j + 1 >= len(ol_id):
 | 
			
		||||
                    c = 143
 | 
			
		||||
                f = int(ol_id[j:j + 2] or '0', 16)
 | 
			
		||||
                j += 2
 | 
			
		||||
                d += (f & 127) << e
 | 
			
		||||
                e += 7
 | 
			
		||||
                _more = f >= c
 | 
			
		||||
            g = d ^ b[k % 3]
 | 
			
		||||
            for i in range(4):
 | 
			
		||||
                char_dec = (g >> 8 * i) & (c + 127)
 | 
			
		||||
                char = compat_chr(char_dec)
 | 
			
		||||
                if char != '#':
 | 
			
		||||
                    decoded += char
 | 
			
		||||
            k += 1
 | 
			
		||||
 | 
			
		||||
        video_url = 'https://openload.co/stream/%s?mime=true'
 | 
			
		||||
        video_url = video_url % (''.join(video_url_chars))
 | 
			
		||||
        video_url = video_url % decoded
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(webpage, default=None) or self._search_regex(
 | 
			
		||||
            r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										138
									
								
								youtube_dl/extractor/packtpub.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								youtube_dl/extractor/packtpub.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,138 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_str
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    remove_end,
 | 
			
		||||
    strip_or_none,
 | 
			
		||||
    unified_timestamp,
 | 
			
		||||
    urljoin,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PacktPubBaseIE(InfoExtractor):
 | 
			
		||||
    _PACKT_BASE = 'https://www.packtpub.com'
 | 
			
		||||
    _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PacktPubIE(PacktPubBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
 | 
			
		||||
        'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '20530',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Project Intro',
 | 
			
		||||
            'thumbnail': r're:(?i)^https?://.*\.jpg',
 | 
			
		||||
            'timestamp': 1490918400,
 | 
			
		||||
            'upload_date': '20170331',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _handle_error(self, response):
 | 
			
		||||
        if response.get('status') != 'success':
 | 
			
		||||
            raise ExtractorError(
 | 
			
		||||
                '% said: %s' % (self.IE_NAME, response['message']),
 | 
			
		||||
                expected=True)
 | 
			
		||||
 | 
			
		||||
    def _download_json(self, *args, **kwargs):
 | 
			
		||||
        response = super(PacktPubIE, self)._download_json(*args, **kwargs)
 | 
			
		||||
        self._handle_error(response)
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        course_id, chapter_id, video_id = mobj.group(
 | 
			
		||||
            'course_id', 'chapter_id', 'id')
 | 
			
		||||
 | 
			
		||||
        video = self._download_json(
 | 
			
		||||
            '%s/users/me/products/%s/chapters/%s/sections/%s'
 | 
			
		||||
            % (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
 | 
			
		||||
            'Downloading JSON video')['data']
 | 
			
		||||
 | 
			
		||||
        content = video.get('content')
 | 
			
		||||
        if not content:
 | 
			
		||||
            raise ExtractorError('This video is locked', expected=True)
 | 
			
		||||
 | 
			
		||||
        video_url = content['file']
 | 
			
		||||
 | 
			
		||||
        metadata = self._download_json(
 | 
			
		||||
            '%s/products/%s/chapters/%s/sections/%s/metadata'
 | 
			
		||||
            % (self._MAPT_REST, course_id, chapter_id, video_id),
 | 
			
		||||
            video_id)['data']
 | 
			
		||||
 | 
			
		||||
        title = metadata['pageTitle']
 | 
			
		||||
        course_title = metadata.get('title')
 | 
			
		||||
        if course_title:
 | 
			
		||||
            title = remove_end(title, ' - %s' % course_title)
 | 
			
		||||
        timestamp = unified_timestamp(metadata.get('publicationDate'))
 | 
			
		||||
        thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PacktPubCourseIE(PacktPubBaseIE):
 | 
			
		||||
    _VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '9781787122215',
 | 
			
		||||
            'title': 'Learn Nodejs by building 12 projects [Video]',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 90,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return False if PacktPubIE.suitable(url) else super(
 | 
			
		||||
            PacktPubCourseIE, cls).suitable(url)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        url, course_id = mobj.group('url', 'id')
 | 
			
		||||
 | 
			
		||||
        course = self._download_json(
 | 
			
		||||
            '%s/products/%s/metadata' % (self._MAPT_REST, course_id),
 | 
			
		||||
            course_id)['data']
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for chapter_num, chapter in enumerate(course['tableOfContents'], 1):
 | 
			
		||||
            if chapter.get('type') != 'chapter':
 | 
			
		||||
                continue
 | 
			
		||||
            children = chapter.get('children')
 | 
			
		||||
            if not isinstance(children, list):
 | 
			
		||||
                continue
 | 
			
		||||
            chapter_info = {
 | 
			
		||||
                'chapter': chapter.get('title'),
 | 
			
		||||
                'chapter_number': chapter_num,
 | 
			
		||||
                'chapter_id': chapter.get('id'),
 | 
			
		||||
            }
 | 
			
		||||
            for section in children:
 | 
			
		||||
                if section.get('type') != 'section':
 | 
			
		||||
                    continue
 | 
			
		||||
                section_url = section.get('seoUrl')
 | 
			
		||||
                if not isinstance(section_url, compat_str):
 | 
			
		||||
                    continue
 | 
			
		||||
                entry = {
 | 
			
		||||
                    '_type': 'url_transparent',
 | 
			
		||||
                    'url': urljoin(url + '/', section_url),
 | 
			
		||||
                    'title': strip_or_none(section.get('title')),
 | 
			
		||||
                    'description': clean_html(section.get('summary')),
 | 
			
		||||
                    'ie_key': PacktPubIE.ie_key(),
 | 
			
		||||
                }
 | 
			
		||||
                entry.update(chapter_info)
 | 
			
		||||
                entries.append(entry)
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, course_id, course.get('title'))
 | 
			
		||||
@@ -20,7 +20,7 @@ class PeriscopeBaseIE(InfoExtractor):
 | 
			
		||||
class PeriscopeIE(PeriscopeBaseIE):
 | 
			
		||||
    IE_DESC = 'Periscope'
 | 
			
		||||
    IE_NAME = 'periscope'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
 | 
			
		||||
    # Alive example URLs can be found here http://onperiscope.com/
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
 | 
			
		||||
@@ -41,6 +41,9 @@ class PeriscopeIE(PeriscopeBaseIE):
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
@@ -103,7 +106,7 @@ class PeriscopeIE(PeriscopeBaseIE):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PeriscopeUserIE(PeriscopeBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
 | 
			
		||||
    IE_DESC = 'Periscope user videos'
 | 
			
		||||
    IE_NAME = 'periscope:user'
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -169,11 +169,10 @@ class PluralsightIE(PluralsightBaseIE):
 | 
			
		||||
 | 
			
		||||
        collection = course['modules']
 | 
			
		||||
 | 
			
		||||
        module, clip = None, None
 | 
			
		||||
        clip = None
 | 
			
		||||
 | 
			
		||||
        for module_ in collection:
 | 
			
		||||
            if name in (module_.get('moduleName'), module_.get('name')):
 | 
			
		||||
                module = module_
 | 
			
		||||
                for clip_ in module_.get('clips', []):
 | 
			
		||||
                    clip_index = clip_.get('clipIndex')
 | 
			
		||||
                    if clip_index is None:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,23 +1,40 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_urlparse
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_str,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    fix_xml_ampersands,
 | 
			
		||||
    GeoRestrictedError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    strip_or_none,
 | 
			
		||||
    try_get,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    unified_timestamp,
 | 
			
		||||
    update_url_query,
 | 
			
		||||
    urljoin,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RaiBaseIE(InfoExtractor):
 | 
			
		||||
    def _extract_relinker_formats(self, relinker_url, video_id):
 | 
			
		||||
    _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
 | 
			
		||||
    _GEO_COUNTRIES = ['IT']
 | 
			
		||||
    _GEO_BYPASS = False
 | 
			
		||||
 | 
			
		||||
    def _extract_relinker_info(self, relinker_url, video_id):
 | 
			
		||||
        formats = []
 | 
			
		||||
        geoprotection = None
 | 
			
		||||
        is_live = None
 | 
			
		||||
        duration = None
 | 
			
		||||
 | 
			
		||||
        for platform in ('mon', 'flash', 'native'):
 | 
			
		||||
            relinker = self._download_xml(
 | 
			
		||||
@@ -27,9 +44,27 @@ class RaiBaseIE(InfoExtractor):
 | 
			
		||||
                query={'output': 45, 'pl': platform},
 | 
			
		||||
                headers=self.geo_verification_headers())
 | 
			
		||||
 | 
			
		||||
            media_url = find_xpath_attr(relinker, './url', 'type', 'content').text
 | 
			
		||||
            if not geoprotection:
 | 
			
		||||
                geoprotection = xpath_text(
 | 
			
		||||
                    relinker, './geoprotection', default=None) == 'Y'
 | 
			
		||||
 | 
			
		||||
            if not is_live:
 | 
			
		||||
                is_live = xpath_text(
 | 
			
		||||
                    relinker, './is_live', default=None) == 'Y'
 | 
			
		||||
            if not duration:
 | 
			
		||||
                duration = parse_duration(xpath_text(
 | 
			
		||||
                    relinker, './duration', default=None))
 | 
			
		||||
 | 
			
		||||
            url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
 | 
			
		||||
            if url_elem is None:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            media_url = url_elem.text
 | 
			
		||||
 | 
			
		||||
            # This does not imply geo restriction (e.g.
 | 
			
		||||
            # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
 | 
			
		||||
            if media_url == 'http://download.rai.it/video_no_available.mp4':
 | 
			
		||||
                self.raise_geo_restricted()
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            ext = determine_ext(media_url)
 | 
			
		||||
            if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
 | 
			
		||||
@@ -53,215 +88,333 @@ class RaiBaseIE(InfoExtractor):
 | 
			
		||||
                    'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
        return formats
 | 
			
		||||
        if not formats and geoprotection is True:
 | 
			
		||||
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 | 
			
		||||
 | 
			
		||||
    def _extract_from_content_id(self, content_id, base_url):
 | 
			
		||||
        return dict((k, v) for k, v in {
 | 
			
		||||
            'is_live': is_live,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }.items() if v is not None)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _extract_subtitles(url, subtitle_url):
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        if subtitle_url and isinstance(subtitle_url, compat_str):
 | 
			
		||||
            subtitle_url = urljoin(url, subtitle_url)
 | 
			
		||||
            STL_EXT = '.stl'
 | 
			
		||||
            SRT_EXT = '.srt'
 | 
			
		||||
            subtitles['it'] = [{
 | 
			
		||||
                'ext': 'stl',
 | 
			
		||||
                'url': subtitle_url,
 | 
			
		||||
            }]
 | 
			
		||||
            if subtitle_url.endswith(STL_EXT):
 | 
			
		||||
                srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
 | 
			
		||||
                subtitles['it'].append({
 | 
			
		||||
                    'ext': 'srt',
 | 
			
		||||
                    'url': srt_url,
 | 
			
		||||
                })
 | 
			
		||||
        return subtitles
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RaiPlayIE(RaiBaseIE):
 | 
			
		||||
    _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
 | 
			
		||||
        'md5': '340aa3b7afb54bfd14a8c11786450d76',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'La Casa Bianca',
 | 
			
		||||
            'alt_title': 'S2016 - Puntata del 23/10/2016',
 | 
			
		||||
            'description': 'md5:a09d45890850458077d1f68bb036e0a5',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'uploader': 'Rai 3',
 | 
			
		||||
            'creator': 'Rai 3',
 | 
			
		||||
            'duration': 3278,
 | 
			
		||||
            'timestamp': 1477764300,
 | 
			
		||||
            'upload_date': '20161029',
 | 
			
		||||
            'series': 'La Casa Bianca',
 | 
			
		||||
            'season': '2016',
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
 | 
			
		||||
        'md5': '8970abf8caf8aef4696e7b1f2adfc696',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Report del 07/04/2014',
 | 
			
		||||
            'alt_title': 'S2013/14 - Puntata del 07/04/2014',
 | 
			
		||||
            'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'uploader': 'Rai 5',
 | 
			
		||||
            'creator': 'Rai 5',
 | 
			
		||||
            'duration': 6160,
 | 
			
		||||
            'series': 'Report',
 | 
			
		||||
            'season_number': 5,
 | 
			
		||||
            'season': '2013/14',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        url, video_id = mobj.group('url', 'id')
 | 
			
		||||
 | 
			
		||||
        media = self._download_json(
 | 
			
		||||
            '%s?json' % url, video_id, 'Downloading video JSON')
 | 
			
		||||
 | 
			
		||||
        title = media['name']
 | 
			
		||||
 | 
			
		||||
        video = media['video']
 | 
			
		||||
 | 
			
		||||
        relinker_info = self._extract_relinker_info(video['contentUrl'], video_id)
 | 
			
		||||
        self._sort_formats(relinker_info['formats'])
 | 
			
		||||
 | 
			
		||||
        thumbnails = []
 | 
			
		||||
        if 'images' in media:
 | 
			
		||||
            for _, value in media.get('images').items():
 | 
			
		||||
                if value:
 | 
			
		||||
                    thumbnails.append({
 | 
			
		||||
                        'url': value.replace('[RESOLUTION]', '600x400')
 | 
			
		||||
                    })
 | 
			
		||||
 | 
			
		||||
        timestamp = unified_timestamp(try_get(
 | 
			
		||||
            media, lambda x: x['availabilities'][0]['start'], compat_str))
 | 
			
		||||
 | 
			
		||||
        subtitles = self._extract_subtitles(url, video.get('subtitles'))
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'alt_title': media.get('subtitle'),
 | 
			
		||||
            'description': media.get('description'),
 | 
			
		||||
            'uploader': media.get('channel'),
 | 
			
		||||
            'creator': media.get('editor'),
 | 
			
		||||
            'duration': parse_duration(video.get('duration')),
 | 
			
		||||
            'timestamp': timestamp,
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
            'series': try_get(
 | 
			
		||||
                media, lambda x: x['isPartOf']['name'], compat_str),
 | 
			
		||||
            'season_number': int_or_none(try_get(
 | 
			
		||||
                media, lambda x: x['isPartOf']['numeroStagioni'])),
 | 
			
		||||
            'season': media.get('stagione') or None,
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        info.update(relinker_info)
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RaiIE(RaiBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        # var uniquename = "ContentItem-..."
 | 
			
		||||
        # data-id="ContentItem-..."
 | 
			
		||||
        'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'TG PRIMO TEMPO',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'duration': 1758,
 | 
			
		||||
            'upload_date': '20140612',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        # with ContentItem in many metas
 | 
			
		||||
        'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1632c009-c843-4836-bb65-80c33084a64b',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
 | 
			
		||||
            'description': 'I film in uscita questa settimana.',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.png$',
 | 
			
		||||
            'duration': 833,
 | 
			
		||||
            'upload_date': '20161103',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        # with ContentItem in og:url
 | 
			
		||||
        'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
 | 
			
		||||
        'md5': '11959b4e44fa74de47011b5799490adf',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'TG1 ore 20:00 del 03/11/2016',
 | 
			
		||||
            'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'duration': 2214,
 | 
			
		||||
            'upload_date': '20161103',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        # drawMediaRaiTV(...)
 | 
			
		||||
        'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
 | 
			
		||||
        'md5': '2dd727e61114e1ee9c47f0da6914e178',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Il pacco',
 | 
			
		||||
            'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'upload_date': '20141221',
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # initEdizione('ContentItem-...'
 | 
			
		||||
        'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
 | 
			
		||||
            'duration': 2274,
 | 
			
		||||
            'upload_date': '20170401',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Changes daily',
 | 
			
		||||
    }, {
 | 
			
		||||
        # HDS live stream with only relinker URL
 | 
			
		||||
        'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': 'EuroNews',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # HLS live stream with ContentItem in og:url
 | 
			
		||||
        'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'La diretta di Rainews24',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _extract_from_content_id(self, content_id, url):
 | 
			
		||||
        media = self._download_json(
 | 
			
		||||
            'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
 | 
			
		||||
            content_id, 'Downloading video JSON')
 | 
			
		||||
 | 
			
		||||
        title = media['name'].strip()
 | 
			
		||||
 | 
			
		||||
        media_type = media['type']
 | 
			
		||||
        if 'Audio' in media_type:
 | 
			
		||||
            relinker_info = {
 | 
			
		||||
                'formats': {
 | 
			
		||||
                    'format_id': media.get('formatoAudio'),
 | 
			
		||||
                    'url': media['audioUrl'],
 | 
			
		||||
                    'ext': media.get('formatoAudio'),
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        elif 'Video' in media_type:
 | 
			
		||||
            relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError('not a media file')
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(relinker_info['formats'])
 | 
			
		||||
 | 
			
		||||
        thumbnails = []
 | 
			
		||||
        for image_type in ('image', 'image_medium', 'image_300'):
 | 
			
		||||
            thumbnail_url = media.get(image_type)
 | 
			
		||||
            if thumbnail_url:
 | 
			
		||||
                thumbnails.append({
 | 
			
		||||
                    'url': compat_urlparse.urljoin(base_url, thumbnail_url),
 | 
			
		||||
                    'url': compat_urlparse.urljoin(url, thumbnail_url),
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        media_type = media['type']
 | 
			
		||||
        if 'Audio' in media_type:
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'format_id': media.get('formatoAudio'),
 | 
			
		||||
                'url': media['audioUrl'],
 | 
			
		||||
                'ext': media.get('formatoAudio'),
 | 
			
		||||
            })
 | 
			
		||||
        elif 'Video' in media_type:
 | 
			
		||||
            formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id))
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError('not a media file')
 | 
			
		||||
        subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        captions = media.get('subtitlesUrl')
 | 
			
		||||
        if captions:
 | 
			
		||||
            STL_EXT = '.stl'
 | 
			
		||||
            SRT_EXT = '.srt'
 | 
			
		||||
            if captions.endswith(STL_EXT):
 | 
			
		||||
                captions = captions[:-len(STL_EXT)] + SRT_EXT
 | 
			
		||||
            subtitles['it'] = [{
 | 
			
		||||
                'ext': 'srt',
 | 
			
		||||
                'url': captions,
 | 
			
		||||
            }]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': content_id,
 | 
			
		||||
            'title': media['name'],
 | 
			
		||||
            'description': media.get('desc'),
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': strip_or_none(media.get('desc')),
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
            'uploader': media.get('author'),
 | 
			
		||||
            'upload_date': unified_strdate(media.get('date')),
 | 
			
		||||
            'duration': parse_duration(media.get('length')),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        info.update(relinker_info)
 | 
			
		||||
 | 
			
		||||
class RaiTVIE(RaiBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
 | 
			
		||||
            'md5': '8970abf8caf8aef4696e7b1f2adfc696',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Report del 07/04/2014',
 | 
			
		||||
                'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
 | 
			
		||||
                'upload_date': '20140407',
 | 
			
		||||
                'duration': 6160,
 | 
			
		||||
                'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # no m3u8 stream
 | 
			
		||||
            'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
 | 
			
		||||
            # HDS download, MD5 is unstable
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'TG PRIMO TEMPO',
 | 
			
		||||
                'upload_date': '20140612',
 | 
			
		||||
                'duration': 1758,
 | 
			
		||||
                'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Geo-restricted to Italy',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
 | 
			
		||||
            'md5': '35cf7c229f22eeef43e48b5cf923bef0',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'State of the Net, Antonella La Carpia: regole virali',
 | 
			
		||||
                'description': 'md5:b0ba04a324126903e3da7763272ae63c',
 | 
			
		||||
                'upload_date': '20140613',
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Error 404',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Alluvione in Sardegna e dissesto idrogeologico',
 | 
			
		||||
                'description': 'Edizione delle ore 20:30 ',
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'invalid urls',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
 | 
			
		||||
            'md5': 'e57493e1cb8bc7c564663f363b171847',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Il Candidato - Primo episodio: "Le Primarie"',
 | 
			
		||||
                'description': 'md5:364b604f7db50594678f483353164fb8',
 | 
			
		||||
                'upload_date': '20140923',
 | 
			
		||||
                'duration': 386,
 | 
			
		||||
                'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
        return info
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        return self._extract_from_content_id(video_id, url)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RaiIE(RaiBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
 | 
			
		||||
            'md5': '2dd727e61114e1ee9c47f0da6914e178',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Il pacco',
 | 
			
		||||
                'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
 | 
			
		||||
                'upload_date': '20141221',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # Direct relinker URL
 | 
			
		||||
            'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
 | 
			
		||||
            # HDS live stream, MD5 is unstable
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'EuroNews',
 | 
			
		||||
            },
 | 
			
		||||
            'skip': 'Geo-restricted to Italy',
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            # Embedded content item ID
 | 
			
		||||
            'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
 | 
			
		||||
            'md5': '84c1135ce960e8822ae63cec34441d63',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'TG1 ore 20:00 del 02/07/2016',
 | 
			
		||||
                'upload_date': '20160702',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
 | 
			
		||||
            # HDS live stream, MD5 is unstable
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'title': 'La diretta di Rainews24',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        return False if RaiTVIE.suitable(url) else super(RaiIE, cls).suitable(url)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        iframe_url = self._search_regex(
 | 
			
		||||
            [r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
 | 
			
		||||
             r'drawMediaRaiTV\(["\'](.+?)["\']'],
 | 
			
		||||
            webpage, 'iframe', default=None)
 | 
			
		||||
        if iframe_url:
 | 
			
		||||
            if not iframe_url.startswith('http'):
 | 
			
		||||
                iframe_url = compat_urlparse.urljoin(url, iframe_url)
 | 
			
		||||
            return self.url_result(iframe_url)
 | 
			
		||||
        content_item_id = None
 | 
			
		||||
 | 
			
		||||
        content_item_id = self._search_regex(
 | 
			
		||||
            r'initEdizione\((?P<q1>[\'"])ContentItem-(?P<content_id>[^\'"]+)(?P=q1)',
 | 
			
		||||
            webpage, 'content item ID', group='content_id', default=None)
 | 
			
		||||
        content_item_url = self._html_search_meta(
 | 
			
		||||
            ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
 | 
			
		||||
             'twitter:player', 'jsonlink'), webpage, default=None)
 | 
			
		||||
        if content_item_url:
 | 
			
		||||
            content_item_id = self._search_regex(
 | 
			
		||||
                r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
 | 
			
		||||
                'content item id', default=None)
 | 
			
		||||
 | 
			
		||||
        if not content_item_id:
 | 
			
		||||
            content_item_id = self._search_regex(
 | 
			
		||||
                r'''(?x)
 | 
			
		||||
                    (?:
 | 
			
		||||
                        (?:initEdizione|drawMediaRaiTV)\(|
 | 
			
		||||
                        <(?:[^>]+\bdata-id|var\s+uniquename)=
 | 
			
		||||
                    )
 | 
			
		||||
                    (["\'])
 | 
			
		||||
                    (?:(?!\1).)*\bContentItem-(?P<id>%s)
 | 
			
		||||
                ''' % self._UUID_RE,
 | 
			
		||||
                webpage, 'content item id', default=None, group='id')
 | 
			
		||||
 | 
			
		||||
        content_item_ids = set()
 | 
			
		||||
        if content_item_id:
 | 
			
		||||
            return self._extract_from_content_id(content_item_id, url)
 | 
			
		||||
            content_item_ids.add(content_item_id)
 | 
			
		||||
        if video_id not in content_item_ids:
 | 
			
		||||
            content_item_ids.add(video_id)
 | 
			
		||||
 | 
			
		||||
        relinker_url = compat_urlparse.urljoin(url, self._search_regex(
 | 
			
		||||
            r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P<q1>[\'"])(?P<url>(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)',
 | 
			
		||||
            webpage, 'relinker URL', group='url'))
 | 
			
		||||
        formats = self._extract_relinker_formats(relinker_url, video_id)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
        for content_item_id in content_item_ids:
 | 
			
		||||
            try:
 | 
			
		||||
                return self._extract_from_content_id(content_item_id, url)
 | 
			
		||||
            except GeoRestrictedError:
 | 
			
		||||
                raise
 | 
			
		||||
            except ExtractorError:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        relinker_url = self._search_regex(
 | 
			
		||||
            r'''(?x)
 | 
			
		||||
                (?:
 | 
			
		||||
                    var\s+videoURL|
 | 
			
		||||
                    mediaInfo\.mediaUri
 | 
			
		||||
                )\s*=\s*
 | 
			
		||||
                ([\'"])
 | 
			
		||||
                (?P<url>
 | 
			
		||||
                    (?:https?:)?
 | 
			
		||||
                    //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
 | 
			
		||||
                    (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
 | 
			
		||||
            ''',
 | 
			
		||||
            webpage, 'relinker URL', group='url')
 | 
			
		||||
 | 
			
		||||
        relinker_info = self._extract_relinker_info(
 | 
			
		||||
            urljoin(url, relinker_url), video_id)
 | 
			
		||||
        self._sort_formats(relinker_info['formats'])
 | 
			
		||||
 | 
			
		||||
        title = self._search_regex(
 | 
			
		||||
            r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
 | 
			
		||||
            webpage, 'title', group='title', default=None) or self._og_search_title(webpage)
 | 
			
		||||
            webpage, 'title', group='title',
 | 
			
		||||
            default=None) or self._og_search_title(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        info.update(relinker_info)
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
 
 | 
			
		||||
@@ -13,15 +13,15 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RBMARadioIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
 | 
			
		||||
        'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'ford-lopatin-live-at-primavera-sound-2011',
 | 
			
		||||
            'ext': 'mp3',
 | 
			
		||||
            'title': 'Main Stage - Ford & Lopatin',
 | 
			
		||||
            'description': 'md5:4f340fb48426423530af5a9d87bd7b91',
 | 
			
		||||
            'title': 'Main Stage - Ford & Lopatin at Primavera Sound',
 | 
			
		||||
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg',
 | 
			
		||||
            'duration': 2452,
 | 
			
		||||
            'timestamp': 1307103164,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,13 +1,26 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import base64
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import int_or_none
 | 
			
		||||
from ..aes import aes_cbc_decrypt
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_ord,
 | 
			
		||||
    compat_str,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    bytes_to_intlist,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    intlist_to_bytes,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    strip_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RTL2IE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'rtl2'
 | 
			
		||||
    _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
 | 
			
		||||
@@ -98,3 +111,98 @@ class RTL2IE(InfoExtractor):
 | 
			
		||||
            'duration': int_or_none(video_info.get('duration')),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RTL2YouBaseIE(InfoExtractor):
 | 
			
		||||
    _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RTL2YouIE(RTL2YouBaseIE):
 | 
			
		||||
    IE_NAME = 'rtl2:you'
 | 
			
		||||
    _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '15740',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!',
 | 
			
		||||
            'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
 | 
			
		||||
            'age_limit': 12,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
    _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
 | 
			
		||||
    _GEO_COUNTRIES = ['DE']
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        stream_data = self._download_json(
 | 
			
		||||
            self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
 | 
			
		||||
 | 
			
		||||
        data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':')
 | 
			
		||||
        stream_url = intlist_to_bytes(aes_cbc_decrypt(
 | 
			
		||||
            bytes_to_intlist(base64.b64decode(data)),
 | 
			
		||||
            bytes_to_intlist(self._AES_KEY),
 | 
			
		||||
            bytes_to_intlist(base64.b64decode(iv))
 | 
			
		||||
        ))
 | 
			
		||||
        if b'rtl2_you_video_not_found' in stream_url:
 | 
			
		||||
            raise ExtractorError('video not found', expected=True)
 | 
			
		||||
 | 
			
		||||
        formats = self._extract_m3u8_formats(
 | 
			
		||||
            stream_url[:-compat_ord(stream_url[-1])].decode(),
 | 
			
		||||
            video_id, 'mp4', 'm3u8_native')
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        video_data = self._download_json(
 | 
			
		||||
            self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
 | 
			
		||||
 | 
			
		||||
        series = video_data.get('formatTitle')
 | 
			
		||||
        title = episode = video_data.get('title') or series
 | 
			
		||||
        if series and series != title:
 | 
			
		||||
            title = '%s - %s' % (series, title)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'description': strip_or_none(video_data.get('description')),
 | 
			
		||||
            'thumbnail': video_data.get('image'),
 | 
			
		||||
            'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
 | 
			
		||||
            'series': series,
 | 
			
		||||
            'episode': episode,
 | 
			
		||||
            'age_limit': int_or_none(video_data.get('minimumAge')),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RTL2YouSeriesIE(RTL2YouBaseIE):
 | 
			
		||||
    IE_NAME = 'rtl2:you:series'
 | 
			
		||||
    _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://you.rtl2.de/videos/115/dragon-ball',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '115',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 5,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        series_id = self._match_id(url)
 | 
			
		||||
        stream_data = self._download_json(
 | 
			
		||||
            self._BACKWERK_BASE_URL + 'videos',
 | 
			
		||||
            series_id, query={
 | 
			
		||||
                'formatId': series_id,
 | 
			
		||||
                'limit': 1000000000,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for video in stream_data.get('videos', []):
 | 
			
		||||
            video_id = compat_str(video['videoId'])
 | 
			
		||||
            if not video_id:
 | 
			
		||||
                continue
 | 
			
		||||
            entries.append(self.url_result(
 | 
			
		||||
                'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
 | 
			
		||||
                'RTL2You', video_id))
 | 
			
		||||
        return self.playlist_result(entries, series_id)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										32
									
								
								youtube_dl/extractor/thesun.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								youtube_dl/extractor/thesun.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .ooyala import OoyalaIE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TheSunIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '2261604',
 | 
			
		||||
            'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_count': 2,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        article_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, article_id)
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for ooyala_id in re.findall(
 | 
			
		||||
                r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)',
 | 
			
		||||
                webpage):
 | 
			
		||||
            entries.append(OoyalaIE._build_url_result(ooyala_id))
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(
 | 
			
		||||
            entries, article_id, self._og_search_title(webpage, fatal=False))
 | 
			
		||||
							
								
								
									
										79
									
								
								youtube_dl/extractor/tv5mondeplus.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								youtube_dl/extractor/tv5mondeplus.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,79 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    extract_attributes,
 | 
			
		||||
    get_element_by_class,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TV5MondePlusIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'TV5MONDE+'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
 | 
			
		||||
        'md5': '12130fc199f020673138a83466542ec6',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Tdah, mon amour - Enfants',
 | 
			
		||||
            'description': 'md5:230e3aca23115afcf8006d1bece6df74',
 | 
			
		||||
            'upload_date': '20170401',
 | 
			
		||||
            'timestamp': 1491022860,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    _GEO_BYPASS = False
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        display_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
 | 
			
		||||
            self.raise_geo_restricted(countries=['FR'])
 | 
			
		||||
 | 
			
		||||
        series = get_element_by_class('video-detail__title', webpage)
 | 
			
		||||
        title = episode = get_element_by_class(
 | 
			
		||||
            'video-detail__subtitle', webpage) or series
 | 
			
		||||
        if series and series != title:
 | 
			
		||||
            title = '%s - %s' % (series, title)
 | 
			
		||||
        vpl_data = extract_attributes(self._search_regex(
 | 
			
		||||
            r'(<[^>]+class="video_player_loader"[^>]+>)',
 | 
			
		||||
            webpage, 'video player loader'))
 | 
			
		||||
 | 
			
		||||
        video_files = self._parse_json(
 | 
			
		||||
            vpl_data['data-broadcast'], display_id).get('files', [])
 | 
			
		||||
        formats = []
 | 
			
		||||
        for video_file in video_files:
 | 
			
		||||
            v_url = video_file.get('url')
 | 
			
		||||
            if not v_url:
 | 
			
		||||
                continue
 | 
			
		||||
            video_format = video_file.get('format') or determine_ext(v_url)
 | 
			
		||||
            if video_format == 'm3u8':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    v_url, display_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                    m3u8_id='hls', fatal=False))
 | 
			
		||||
            else:
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': v_url,
 | 
			
		||||
                    'format_id': video_format,
 | 
			
		||||
                })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': display_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': clean_html(get_element_by_class('video-detail__description', webpage)),
 | 
			
		||||
            'thumbnail': vpl_data.get('data-image'),
 | 
			
		||||
            'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
 | 
			
		||||
            'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'episode': episode,
 | 
			
		||||
            'series': series,
 | 
			
		||||
        }
 | 
			
		||||
@@ -31,9 +31,8 @@ class TVNoeIE(InfoExtractor):
 | 
			
		||||
            r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
 | 
			
		||||
 | 
			
		||||
        ifs_page = self._download_webpage(iframe_url, video_id)
 | 
			
		||||
        jwplayer_data = self._parse_json(
 | 
			
		||||
            self._find_jwplayer_data(ifs_page),
 | 
			
		||||
            video_id, transform_source=js_to_json)
 | 
			
		||||
        jwplayer_data = self._find_jwplayer_data(
 | 
			
		||||
            ifs_page, video_id, transform_source=js_to_json)
 | 
			
		||||
        info_dict = self._parse_jwplayer_data(
 | 
			
		||||
            jwplayer_data, video_id, require_title=False, base_url=iframe_url)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -225,7 +225,11 @@ class TVPlayIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        geo_country = self._search_regex(
 | 
			
		||||
            r'https?://[^/]+\.([a-z]{2})', url,
 | 
			
		||||
            'geo country', default=None)
 | 
			
		||||
        if geo_country:
 | 
			
		||||
            self._initialize_geo_bypass([geo_country.upper()])
 | 
			
		||||
        video = self._download_json(
 | 
			
		||||
            'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,7 @@ from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
class VierIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'vier'
 | 
			
		||||
    IE_DESC = 'vier.be and vijf.be'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
 | 
			
		||||
 
 | 
			
		||||
@@ -44,7 +44,7 @@ class ViuBaseIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ViuIE(ViuBaseIE):
 | 
			
		||||
    _VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
 | 
			
		||||
    _VALID_URL = r'(?:viu:|https?://[^/]+\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
@@ -69,6 +69,9 @@ class ViuIE(ViuBaseIE):
 | 
			
		||||
            'skip_download': 'm3u8 download',
 | 
			
		||||
        },
 | 
			
		||||
        'skip': 'Geo-restricted to Indonesia',
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://india.viu.com/en/media/1126286865',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -432,8 +432,7 @@ class VKIE(VKBaseIE):
 | 
			
		||||
                })
 | 
			
		||||
            elif format_id == 'hls':
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    format_url, video_id, 'mp4',
 | 
			
		||||
                    entry_protocol='m3u8' if is_live else 'm3u8_native',
 | 
			
		||||
                    format_url, video_id, 'mp4', 'm3u8_native',
 | 
			
		||||
                    m3u8_id=format_id, fatal=False, live=is_live))
 | 
			
		||||
            elif format_id == 'rtmp':
 | 
			
		||||
                formats.append({
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,7 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VRTIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
 | 
			
		||||
    _VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        # deredactie.be
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										191
									
								
								youtube_dl/extractor/vrv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										191
									
								
								youtube_dl/extractor/vrv.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,191 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import base64
 | 
			
		||||
import json
 | 
			
		||||
import hashlib
 | 
			
		||||
import hmac
 | 
			
		||||
import random
 | 
			
		||||
import string
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urllib_parse_urlencode,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VRVBaseIE(InfoExtractor):
 | 
			
		||||
    _API_DOMAIN = None
 | 
			
		||||
    _API_PARAMS = {}
 | 
			
		||||
    _CMS_SIGNING = {}
 | 
			
		||||
 | 
			
		||||
    def _call_api(self, path, video_id, note, data=None):
 | 
			
		||||
        base_url = self._API_DOMAIN + '/core/' + path
 | 
			
		||||
        encoded_query = compat_urllib_parse_urlencode({
 | 
			
		||||
            'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
 | 
			
		||||
            'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
 | 
			
		||||
            'oauth_signature_method': 'HMAC-SHA1',
 | 
			
		||||
            'oauth_timestamp': int(time.time()),
 | 
			
		||||
            'oauth_version': '1.0',
 | 
			
		||||
        })
 | 
			
		||||
        headers = self.geo_verification_headers()
 | 
			
		||||
        if data:
 | 
			
		||||
            data = json.dumps(data).encode()
 | 
			
		||||
            headers['Content-Type'] = 'application/json'
 | 
			
		||||
        method = 'POST' if data else 'GET'
 | 
			
		||||
        base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
 | 
			
		||||
        oauth_signature = base64.b64encode(hmac.new(
 | 
			
		||||
            (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
 | 
			
		||||
            base_string.encode(), hashlib.sha1).digest()).decode()
 | 
			
		||||
        encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
 | 
			
		||||
        return self._download_json(
 | 
			
		||||
            '?'.join([base_url, encoded_query]), video_id,
 | 
			
		||||
            note='Downloading %s JSON metadata' % note, headers=headers, data=data)
 | 
			
		||||
 | 
			
		||||
    def _call_cms(self, path, video_id, note):
 | 
			
		||||
        if not self._CMS_SIGNING:
 | 
			
		||||
            self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
 | 
			
		||||
        return self._download_json(
 | 
			
		||||
            self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
 | 
			
		||||
            note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
 | 
			
		||||
 | 
			
		||||
    def _set_api_params(self, webpage, video_id):
 | 
			
		||||
        if not self._API_PARAMS:
 | 
			
		||||
            self._API_PARAMS = self._parse_json(self._search_regex(
 | 
			
		||||
                r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
 | 
			
		||||
                webpage, 'api config'), video_id)['cxApiParams']
 | 
			
		||||
            self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
 | 
			
		||||
 | 
			
		||||
    def _get_cms_resource(self, resource_key, video_id):
 | 
			
		||||
        return self._call_api(
 | 
			
		||||
            'cms_resource', video_id, 'resource path', data={
 | 
			
		||||
                'resource_key': resource_key,
 | 
			
		||||
            })['__links__']['cms_resource']['href']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VRVIE(VRVBaseIE):
 | 
			
		||||
    IE_NAME = 'vrv'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'GR9PNZ396',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
 | 
			
		||||
            'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
 | 
			
		||||
            'uploader_id': 'seeso',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            url, video_id,
 | 
			
		||||
            headers=self.geo_verification_headers())
 | 
			
		||||
        media_resource = self._parse_json(self._search_regex(
 | 
			
		||||
            r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
 | 
			
		||||
            webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
 | 
			
		||||
 | 
			
		||||
        video_data = media_resource.get('json')
 | 
			
		||||
        if not video_data:
 | 
			
		||||
            self._set_api_params(webpage, video_id)
 | 
			
		||||
            episode_path = self._get_cms_resource(
 | 
			
		||||
                'cms:/episodes/' + video_id, video_id)
 | 
			
		||||
            video_data = self._call_cms(episode_path, video_id, 'video')
 | 
			
		||||
        title = video_data['title']
 | 
			
		||||
 | 
			
		||||
        streams_json = media_resource.get('streams', {}).get('json', {})
 | 
			
		||||
        if not streams_json:
 | 
			
		||||
            self._set_api_params(webpage, video_id)
 | 
			
		||||
            streams_path = video_data['__links__']['streams']['href']
 | 
			
		||||
            streams_json = self._call_cms(streams_path, video_id, 'streams')
 | 
			
		||||
 | 
			
		||||
        audio_locale = streams_json.get('audio_locale')
 | 
			
		||||
        formats = []
 | 
			
		||||
        for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items():
 | 
			
		||||
            stream_url = stream.get('url')
 | 
			
		||||
            if not stream_url:
 | 
			
		||||
                continue
 | 
			
		||||
            stream_id = stream_id or audio_locale
 | 
			
		||||
            m3u8_formats = self._extract_m3u8_formats(
 | 
			
		||||
                stream_url, video_id, 'mp4', m3u8_id=stream_id,
 | 
			
		||||
                note='Downloading %s m3u8 information' % stream_id,
 | 
			
		||||
                fatal=False)
 | 
			
		||||
            if audio_locale:
 | 
			
		||||
                for f in m3u8_formats:
 | 
			
		||||
                    f['language'] = audio_locale
 | 
			
		||||
            formats.extend(m3u8_formats)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        thumbnails = []
 | 
			
		||||
        for thumbnail in video_data.get('images', {}).get('thumbnails', []):
 | 
			
		||||
            thumbnail_url = thumbnail.get('source')
 | 
			
		||||
            if not thumbnail_url:
 | 
			
		||||
                continue
 | 
			
		||||
            thumbnails.append({
 | 
			
		||||
                'url': thumbnail_url,
 | 
			
		||||
                'width': int_or_none(thumbnail.get('width')),
 | 
			
		||||
                'height': int_or_none(thumbnail.get('height')),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
            'description': video_data.get('description'),
 | 
			
		||||
            'duration': float_or_none(video_data.get('duration_ms'), 1000),
 | 
			
		||||
            'uploader_id': video_data.get('channel_id'),
 | 
			
		||||
            'series': video_data.get('series_title'),
 | 
			
		||||
            'season': video_data.get('season_title'),
 | 
			
		||||
            'season_number': int_or_none(video_data.get('season_number')),
 | 
			
		||||
            'season_id': video_data.get('season_id'),
 | 
			
		||||
            'episode': title,
 | 
			
		||||
            'episode_number': int_or_none(video_data.get('episode_number')),
 | 
			
		||||
            'episode_id': video_data.get('production_episode_id'),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VRVSeriesIE(VRVBaseIE):
 | 
			
		||||
    IE_NAME = 'vrv:series'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'G68VXG3G6',
 | 
			
		||||
        },
 | 
			
		||||
        'playlist_mincount': 11,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        series_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            url, series_id,
 | 
			
		||||
            headers=self.geo_verification_headers())
 | 
			
		||||
 | 
			
		||||
        self._set_api_params(webpage, series_id)
 | 
			
		||||
        seasons_path = self._get_cms_resource(
 | 
			
		||||
            'cms:/seasons?series_id=' + series_id, series_id)
 | 
			
		||||
        seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
 | 
			
		||||
 | 
			
		||||
        entries = []
 | 
			
		||||
        for season in seasons_data.get('items', []):
 | 
			
		||||
            episodes_path = season['__links__']['season/episodes']['href']
 | 
			
		||||
            episodes = self._call_cms(episodes_path, series_id, 'episodes')
 | 
			
		||||
            for episode in episodes.get('items', []):
 | 
			
		||||
                episode_id = episode['id']
 | 
			
		||||
                entries.append(self.url_result(
 | 
			
		||||
                    'https://vrv.co/watch/' + episode_id,
 | 
			
		||||
                    'VRV', episode_id, episode.get('title')))
 | 
			
		||||
 | 
			
		||||
        return self.playlist_result(entries, series_id)
 | 
			
		||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/vshare.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/vshare.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class VShareIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://vshare.io/d/0f64ce6',
 | 
			
		||||
        'md5': '16d7b8fef58846db47419199ff1ab3e7',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '0f64ce6',
 | 
			
		||||
            'title': 'vl14062007715967',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(
 | 
			
		||||
            'https://vshare.io/d/%s' % video_id, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
 | 
			
		||||
            webpage, 'video url', group='url')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,12 +1,10 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WorldStarHipHopIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
 | 
			
		||||
        'md5': '9d04de741161603bf7071bbf4e883186',
 | 
			
		||||
@@ -17,48 +15,26 @@ class WorldStarHipHopIE(InfoExtractor):
 | 
			
		||||
        }
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
 | 
			
		||||
        'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'wshh6a7q1ny0G34ZwuIO',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
 | 
			
		||||
        }
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        m_vevo_id = re.search(r'videoId=(.*?)&?', webpage)
 | 
			
		||||
        if m_vevo_id is not None:
 | 
			
		||||
            return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
 | 
			
		||||
        entries = self._parse_html5_media_entries(url, webpage, video_id)
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(
 | 
			
		||||
            [r'so\.addVariable\("file","(.*?)"\)',
 | 
			
		||||
             r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
 | 
			
		||||
            webpage, 'video URL')
 | 
			
		||||
        if not entries:
 | 
			
		||||
            return self.url_result(url, 'Generic')
 | 
			
		||||
 | 
			
		||||
        if 'youtube' in video_url:
 | 
			
		||||
            return self.url_result(video_url, ie='Youtube')
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
 | 
			
		||||
             r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
 | 
			
		||||
            webpage, 'title')
 | 
			
		||||
 | 
			
		||||
        # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
 | 
			
		||||
        thumbnail = self._html_search_regex(
 | 
			
		||||
            r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
 | 
			
		||||
            default=None)
 | 
			
		||||
        if not thumbnail:
 | 
			
		||||
            _title = r'candytitles.*>(.*)</span>'
 | 
			
		||||
            mobj = re.search(_title, webpage)
 | 
			
		||||
            if mobj is not None:
 | 
			
		||||
                video_title = mobj.group(1)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
        info = entries[0]
 | 
			
		||||
        info.update({
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
            'title': title,
 | 
			
		||||
        })
 | 
			
		||||
        return info
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@ import re
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    decode_packed_codes,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    NO_DEFAULT,
 | 
			
		||||
@@ -26,6 +27,9 @@ class XFileShareIE(InfoExtractor):
 | 
			
		||||
        ('vidto.me', 'Vidto'),
 | 
			
		||||
        ('streamin.to', 'Streamin.To'),
 | 
			
		||||
        ('xvidstage.com', 'XVIDSTAGE'),
 | 
			
		||||
        ('vidabc.com', 'Vid ABC'),
 | 
			
		||||
        ('vidbom.com', 'VidBom'),
 | 
			
		||||
        ('vidlo.us', 'vidlo'),
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
 | 
			
		||||
@@ -95,6 +99,16 @@ class XFileShareIE(InfoExtractor):
 | 
			
		||||
        # removed by administrator
 | 
			
		||||
        'url': 'http://xvidstage.com/amfy7atlkx25',
 | 
			
		||||
        'only_matching': True,
 | 
			
		||||
    }, {
 | 
			
		||||
        'url': 'http://vidabc.com/i8ybqscrphfv',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'i8ybqscrphfv',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 're:Beauty and the Beast 2017',
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
@@ -133,31 +147,45 @@ class XFileShareIE(InfoExtractor):
 | 
			
		||||
            webpage, 'title', default=None) or self._og_search_title(
 | 
			
		||||
            webpage, default=None) or video_id).strip()
 | 
			
		||||
 | 
			
		||||
        def extract_video_url(default=NO_DEFAULT):
 | 
			
		||||
            return self._search_regex(
 | 
			
		||||
                (r'file\s*:\s*(["\'])(?P<url>http.+?)\1,',
 | 
			
		||||
                 r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1',
 | 
			
		||||
                 r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)',
 | 
			
		||||
                 r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'),
 | 
			
		||||
                webpage, 'file url', default=default, group='url')
 | 
			
		||||
        def extract_formats(default=NO_DEFAULT):
 | 
			
		||||
            urls = []
 | 
			
		||||
            for regex in (
 | 
			
		||||
                    r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
 | 
			
		||||
                    r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
 | 
			
		||||
                    r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
 | 
			
		||||
                    r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
 | 
			
		||||
                for mobj in re.finditer(regex, webpage):
 | 
			
		||||
                    video_url = mobj.group('url')
 | 
			
		||||
                    if video_url not in urls:
 | 
			
		||||
                        urls.append(video_url)
 | 
			
		||||
            formats = []
 | 
			
		||||
            for video_url in urls:
 | 
			
		||||
                if determine_ext(video_url) == 'm3u8':
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        video_url, video_id, 'mp4',
 | 
			
		||||
                        entry_protocol='m3u8_native', m3u8_id='hls',
 | 
			
		||||
                        fatal=False))
 | 
			
		||||
                else:
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'url': video_url,
 | 
			
		||||
                        'format_id': 'sd',
 | 
			
		||||
                    })
 | 
			
		||||
            if not formats and default is not NO_DEFAULT:
 | 
			
		||||
                return default
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
            return formats
 | 
			
		||||
 | 
			
		||||
        video_url = extract_video_url(default=None)
 | 
			
		||||
        formats = extract_formats(default=None)
 | 
			
		||||
 | 
			
		||||
        if not video_url:
 | 
			
		||||
        if not formats:
 | 
			
		||||
            webpage = decode_packed_codes(self._search_regex(
 | 
			
		||||
                r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
 | 
			
		||||
                webpage, 'packed code'))
 | 
			
		||||
            video_url = extract_video_url()
 | 
			
		||||
            formats = extract_formats()
 | 
			
		||||
 | 
			
		||||
        thumbnail = self._search_regex(
 | 
			
		||||
            r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'format_id': 'sd',
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'quality': 1,
 | 
			
		||||
        }]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
 
 | 
			
		||||
@@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
			
		||||
    # If True it will raise an error if no login info is provided
 | 
			
		||||
    _LOGIN_REQUIRED = False
 | 
			
		||||
 | 
			
		||||
    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
 | 
			
		||||
 | 
			
		||||
    def _set_language(self):
 | 
			
		||||
        self._set_cookie(
 | 
			
		||||
            '.youtube.com', 'PREF', 'f1=50000000&hl=en',
 | 
			
		||||
@@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                         )
 | 
			
		||||
                     )?                                                       # all until now is optional -> you can pass the naked ID
 | 
			
		||||
                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 | 
			
		||||
                     (?!.*?\blist=)                                            # combined list/video URLs are handled by the playlist IE
 | 
			
		||||
                     (?!.*?\blist=
 | 
			
		||||
                        (?:
 | 
			
		||||
                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 | 
			
		||||
                            WL                                                # WL are handled by the watch later IE
 | 
			
		||||
                        )
 | 
			
		||||
                     )
 | 
			
		||||
                     (?(1).+)?                                                # if we found the ID, everything can follow
 | 
			
		||||
                     $"""
 | 
			
		||||
                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 | 
			
		||||
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 | 
			
		||||
    _formats = {
 | 
			
		||||
        '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 | 
			
		||||
@@ -924,6 +931,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
            'url': 'sJL6WA-aGkQ',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
@@ -1864,8 +1875,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
 | 
			
		||||
                        )
 | 
			
		||||
                        .*
 | 
			
		||||
                     |
 | 
			
		||||
                        ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
 | 
			
		||||
                     )"""
 | 
			
		||||
                        (%(playlist_id)s)
 | 
			
		||||
                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 | 
			
		||||
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
 | 
			
		||||
    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
 | 
			
		||||
    IE_NAME = 'youtube:playlist'
 | 
			
		||||
 
 | 
			
		||||
@@ -459,11 +459,11 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
    downloader.add_option(
 | 
			
		||||
        '--fragment-retries',
 | 
			
		||||
        dest='fragment_retries', metavar='RETRIES', default=10,
 | 
			
		||||
        help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)')
 | 
			
		||||
        help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
 | 
			
		||||
    downloader.add_option(
 | 
			
		||||
        '--skip-unavailable-fragments',
 | 
			
		||||
        action='store_true', dest='skip_unavailable_fragments', default=True,
 | 
			
		||||
        help='Skip unavailable fragments (DASH and hlsnative only)')
 | 
			
		||||
        help='Skip unavailable fragments (DASH, hlsnative and ISM)')
 | 
			
		||||
    downloader.add_option(
 | 
			
		||||
        '--abort-on-unavailable-fragment',
 | 
			
		||||
        action='store_false', dest='skip_unavailable_fragments',
 | 
			
		||||
 
 | 
			
		||||
@@ -39,6 +39,7 @@ from .compat import (
 | 
			
		||||
    compat_basestring,
 | 
			
		||||
    compat_chr,
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_expanduser,
 | 
			
		||||
    compat_html_entities,
 | 
			
		||||
    compat_html_entities_html5,
 | 
			
		||||
    compat_http_client,
 | 
			
		||||
@@ -539,6 +540,11 @@ def sanitized_Request(url, *args, **kwargs):
 | 
			
		||||
    return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def expand_path(s):
 | 
			
		||||
    """Expand shell variables and ~"""
 | 
			
		||||
    return os.path.expandvars(compat_expanduser(s))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def orderedSet(iterable):
 | 
			
		||||
    """ Remove all duplicates from the input iterable """
 | 
			
		||||
    res = []
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,3 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
__version__ = '2017.03.22'
 | 
			
		||||
__version__ = '2017.04.11'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user