Compare commits

...

164 Commits

Author SHA1 Message Date
Philipp Hagemeister
ba7a92b0ce release 2015.11.24 2015-11-24 07:46:38 +01:00
Philipp Hagemeister
4c7d816dd7 [jsinterp] Adapt to updated YouTube code generation (Fixes #7623, fixes #7624, fixes #7625, fixes #7626) 2015-11-24 07:45:38 +01:00
Philipp Hagemeister
032f2f260f README: Document which other programs may be helpful (Fixes #7621) 2015-11-24 03:38:46 +01:00
Philipp Hagemeister
20e98bf6c0 release 2015.11.23 2015-11-23 18:07:58 +01:00
Sergey M?
5c2266df4b Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8
2015-11-23 21:56:23 +06:00
Sergey M․
67dda51722 Rename compat_urllib_request_Request to sanitized_Request and move to utils 2015-11-23 21:55:15 +06:00
Sergey M․
e4c4bcf36f [vimeo] Use compat_urllib_request_Request 2015-11-23 21:55:14 +06:00
Sergey M․
82d8a8b6e2 [YoutubeDL] Wrap plain-text URL requests in compat_urllib_request_Request 2015-11-23 21:55:13 +06:00
Sergey M․
13a10d5aa3 [compat] Add compat_urllib_request_Request
This is actually not a compatibility routine but rather a workaround for URLs without protocol specified.
The protocol-less URL is treated as HTTP one since it's most probable scenario and it will most likely to
redirect to HTTPS if HTTPS was actually expected. This routine could also be useful for any Request
preprocessing that may be added in future.
2015-11-23 21:55:12 +06:00
Sergey M․
9022726446 [youtube] Fix test 2015-11-23 21:37:21 +06:00
Sergey M․
94bfcd23b7 [youtube] Fix test 2015-11-23 21:35:23 +06:00
Sergey M․
526b3b0716 [youtube] Clarify ytplayer.config extraction rationale 2015-11-23 21:14:03 +06:00
Sergey M․
61f92af1cf [youtube] Add test with '};' in tags 2015-11-23 21:02:37 +06:00
Sergey M․
a72778d364 [youtube] Improve ytplayer.config extraction 2015-11-23 21:00:06 +06:00
Sergey M
5ae17037a3 Merge pull request #7599 from lalinsky/fix-youtube
[youtube] More explicit player config JSON extraction (fixes #7468)
2015-11-23 20:52:23 +06:00
Sergey M․
02f0da20b0 [pluralsight] Add support for alternative webpage layout (Closes #7607) 2015-11-23 03:08:38 +06:00
Lukáš Lalinský
b41631c4e6 [youtube] Send the list of patterns directly to _search_regex 2015-11-22 13:53:26 +01:00
Lukáš Lalinský
0e49d9a6b0 [youtube] Fall back to the original regex for ytplayer.config 2015-11-22 13:49:33 +01:00
Sergey M․
4a7d108ab3 [rutube] Remove unnecessary print 2015-11-22 18:24:17 +06:00
Lukáš Lalinský
3cfd000849 [youtube] More explicit player config JSON extraction (fixes #7468) 2015-11-22 13:14:35 +01:00
Sergey M․
1b38185361 [pornhd] Fix title extraction (Closes #7596) 2015-11-22 18:08:30 +06:00
Sergey M․
9cb9a5df77 [utils] Check ext with trailing slash against the list of known extensions 2015-11-22 17:27:13 +06:00
Sergey M․
5035536e3f [test_utils] Add tests for determine_ext 2015-11-22 06:33:52 +06:00
Sergey M․
3e12bc583a [utils] Improve determine_ext (Closes #7593) 2015-11-22 06:29:39 +06:00
Sergey M․
e568c2233e [youtube] Add test for multi page list of playlists 2015-11-22 05:03:23 +06:00
Sergey M․
061a75edd6 [youtube] Extract base for entry list extractors and support multi page lists of playlists 2015-11-22 05:01:01 +06:00
Philipp Hagemeister
82c4d7b0ce release 2015.11.21 2015-11-21 23:36:27 +01:00
Sergey M․
136dadde95 [youtube:show] Rework in terms of playlists base extractor 2015-11-22 04:18:20 +06:00
Sergey M․
0c14841585 [youtube:user:playlists] Add extractor (Closes #3817) 2015-11-22 04:17:07 +06:00
Sergey M․
0eebf34d9d [pluralsight] Rephrase 2015-11-22 00:58:25 +06:00
Sergey M․
cf186b77a7 [pluralsight] Clarify allowed qualities guessing rationale 2015-11-22 00:56:40 +06:00
Sergey M․
a3372437bf [soundcloud] Remove unused variable 2015-11-22 00:49:58 +06:00
Sergey M․
4c57b4853d [pluralsight] Until listing formats request only single format 2015-11-22 00:42:58 +06:00
Sergey M․
38eb2968ab [pluralsight] Clarify and randomize ViewClip sleep interval 2015-11-22 00:07:09 +06:00
Andrzej Lichnerowicz
bea56c9569 [pluralsight] prevent error 429 when sensing video formats 2015-11-21 23:49:58 +06:00
Sergey M․
7e508ff2cf [pluralsight] Improve login detection 2015-11-21 21:49:37 +06:00
Sergey M․
563772eda4 [pluralsight] Extract base class 2015-11-21 21:37:29 +06:00
Sergey M․
0533915aad [pluralsight] Update some more URLs 2015-11-21 21:35:08 +06:00
Sergey M․
c3a227d1c4 [pluralsight] Update _LOGIN_URL 2015-11-21 21:25:48 +06:00
Sergey M․
f6c903e708 [soundcloud:search] Simplify (Closes #7213) 2015-11-21 21:21:21 +06:00
Sergey M․
7dc011c063 [soundcloud:search] Remove no track results message 2015-11-21 21:00:42 +06:00
Sergey M․
4e3b303016 [soundcloud:search] Fix non-ASCII searches 2015-11-21 20:55:48 +06:00
Sergey M․
7e1f5447e7 [utils] Improve encode_dict 2015-11-21 20:46:33 +06:00
Sergey M․
7e3472758b [soundcloud:search] PEP 8 2015-11-21 20:04:35 +06:00
reiv
328a22e175 [soundcloud] Remove limit on search results 2015-11-21 19:41:36 +06:00
reiv
417b453699 [soundcloud] Use correct error message conventions 2015-11-21 19:41:31 +06:00
reiv
6ea7190a3e Rewrite as list comprehension. 2015-11-21 19:41:26 +06:00
reiv
b54b08c91b Simplify with itertools.islice(). 2015-11-21 19:41:19 +06:00
reiv
c30943b1c0 Fix some compatibility issues, cleanup. 2015-11-21 19:41:15 +06:00
reiv
2abf7cab80 [soundcloud] Add Soundcloud search extractor 2015-11-21 19:41:08 +06:00
Sergey M․
4137196899 [rutube] Extract all formats 2015-11-21 18:02:52 +06:00
Sergey M․
019839faaa [extractor/common] Use baseURL from f4m manifest for recursive manifest extraction 2015-11-21 18:01:39 +06:00
Sergey M․
f52183a878 [rutube:embed] Extend _VALID_URL (Closes #7588) 2015-11-21 17:39:24 +06:00
Yen Chi Hsuan
750b9ff032 [generic] Extract M3U8 formats (closes #7582) 2015-11-21 16:43:01 +08:00
Yen Chi Hsuan
28602e747c [generic] Refactor 2015-11-21 16:08:54 +08:00
Yen Chi Hsuan
6cc37c69e2 [generic] Unescape URLs from JWPlayer (#7582) 2015-11-21 14:12:34 +08:00
Sergey M․
a5cd0eb8a4 [pluralsight:course] Improve _VALID_URL 2015-11-21 08:32:48 +06:00
Sergey M․
c23e266427 [pluralsight] Do not require pluralsight account
Looks like some courses are available without pluralsight account
2015-11-21 08:25:52 +06:00
Sergey M․
651acffbe5 [pluralsight] Update ViewClip URL 2015-11-21 08:21:33 +06:00
Sergey M․
71bd93b89c [pluralsight] Do not rely on argument order in query (Closes #7583) 2015-11-21 08:08:34 +06:00
Sergey M․
6da620de58 [kaltura] Add test for referrer protected video (#7409) 2015-11-21 01:40:28 +06:00
Sergey M․
bdceea7afd [kaltura] Clean description 2015-11-21 01:39:29 +06:00
Sergey M․
d80a39cec8 [kaltura] Improve 2015-11-21 01:38:08 +06:00
Sergey M․
5b5fae5f20 [generic] Use referrer from source kaltura embed URLs (#7409) 2015-11-21 01:35:58 +06:00
Sergey M․
01b06aedcf [kaltura] Add support for referrer protected videos (#7409) 2015-11-21 01:34:02 +06:00
Sergey M
c711383811 Merge pull request #7579 from ashutosh-mishra/typo_fix
Typo fix, found while going through the code.
2015-11-20 23:24:54 +06:00
ashutosh-mishra
17cc153435 Typo fix, found while going through the code. 2015-11-20 22:51:46 +05:30
Sergey M․
67446fd49b [instagram] Improve _VALID_URL (Closes #7568) 2015-11-20 04:07:39 +06:00
Sergey M․
325bb615a7 [theplatform] Style 2015-11-19 22:58:43 +06:00
Sergey M․
ee5cd8418e [theplatform] Handle protocolless feed URLs (Closes #7532) 2015-11-19 22:58:29 +06:00
Sergey M․
342609a1b4 [bloomberg] Reax _VALID_URL (Closes #7546) 2015-11-19 22:55:06 +06:00
Sergey M
f270cf1a26 Merge pull request #7519 from barlik/master
Clarify that automatic subtitles are generated.
2015-11-19 22:44:08 +06:00
hedii
371c3b796c [YoutubeDL] Add playlist finished downloading message (Closes #7517)
Conflicts:
	youtube_dl/YoutubeDL.py
2015-11-19 22:39:02 +06:00
Sergey M․
6b7ceee1b9 [vimeo] Add test for #7552 2015-11-19 22:31:16 +06:00
Sergey M․
fdb20a27a3 [vimeo:group] Improve _VALID_URL (Closes #7552) 2015-11-19 22:30:58 +06:00
Sergey M․
2c94198eb6 [vimeo] Improve playlists extraction 2015-11-19 21:29:32 +06:00
Philipp Hagemeister
e8110b8125 release 2015.11.19 2015-11-19 15:35:13 +01:00
Yen Chi Hsuan
c39fd7b1ca [UDNEmbed] Fix generic UDN pages
Closes #7547
2015-11-19 22:32:56 +08:00
Sergey M․
a9c09a7c62 [pbs] Update API URL (Closes #7565) 2015-11-19 20:25:28 +06:00
Philipp Hagemeister
82beaabb41 release 2015.11.18 2015-11-18 19:23:04 +01:00
Jaime Marquínez Ferrándiz
63b4295d20 [youtube:playlist] fix title extraction (fixes #7544 and #7545) 2015-11-18 18:28:05 +01:00
Sergey M․
312a3f389b [pbs] Extend _VALID_URL 2015-11-18 00:46:41 +06:00
Jaime Marquínez Ferrándiz
609af1ae1c [dplay] Add 'encoding: utf-8' line 2015-11-17 17:58:16 +01:00
Jaime Marquínez Ferrándiz
4cd759f73d [dplay] Add extractor (closes #7515)
Since I haven't figured out how to download the hds stream, we use the hls one instead.
2015-11-17 17:52:29 +01:00
Jaime Marquínez Ferrándiz
e156e70281 [rtve] Remove unused import 2015-11-17 16:23:29 +01:00
Sergey M․
9b464929fe [rtve.es:alacarta] Fix extraction 2015-11-17 21:11:42 +06:00
Sergey M
0c176d7bde Merge pull request #7514 from ping/patch-7301
[neteasemusic] Fixes #7301
2015-11-16 14:25:29 +00:00
Sergey M․
7a3f0c00ad [utils] Style 2015-11-16 20:24:09 +06:00
Sergey M․
7aefc49c40 [utils] Skip invalid/non HTML entities (Closes #7518) 2015-11-16 20:20:16 +06:00
Rastislav Barlik
741dd8ea65 Clarify that automatic subtitles are generated.
It wasn't clear what automatic word mean.
2015-11-16 14:15:25 +00:00
ping
76adc82068 [neteasemusic] Fixes #7301 2015-11-16 11:39:18 +08:00
Philipp Hagemeister
bd1512d196 release 2015.11.15 2015-11-15 22:16:08 +01:00
Sergey M․
9a4acbfaf5 [theplatform] Add test for #7385 2015-11-16 00:28:04 +06:00
Sergey M․
ad1f4e7902 [theplatform] Handle explicitly specified SMIL (#7385) 2015-11-15 23:43:23 +06:00
Sergey M
b328295910 Merge pull request #7436 from davidbz/add_proxy_to_update_procedure
Add proxy support for update_self
2015-11-15 11:13:22 +00:00
David Ben Zakai
828b2a5cd9 Removing an unnecessary import 2015-11-15 09:40:32 +02:00
Sergey M․
2ff7cbeaaa [nowtv:list] Add extrator (Closes #7147) 2015-11-15 08:30:13 +06:00
Sergey M․
b2f7738830 [dumpert] Use original protocol 2015-11-15 02:25:00 +06:00
Sergey M․
dc0279532a [dumpert] Disable SSL (Closes #7504) 2015-11-15 02:21:24 +06:00
Sergey M․
0c59d02bdc [periscope] Relax _VALID_URL (Closes #7503) 2015-11-15 00:20:17 +06:00
Jaime Marquínez Ferrándiz
0f72beb515 [periscope] Remove unused imports 2015-11-14 18:31:33 +01:00
Sergey M․
d781e29316 [bbc] Allow selectionunavailable errors (Closes #7502) 2015-11-14 23:08:13 +06:00
Sergey M․
3b3e8ed332 [quickscope] Remove extractor (2) 2015-11-14 22:34:30 +06:00
Sergey M․
dcdfeb33d2 [quickscope] Remove extractor 2015-11-14 22:32:54 +06:00
Sergey M․
0d85c3a732 [lynda] Style 2015-11-14 16:44:24 +06:00
Sergey M․
903d136942 [lynda] Logout only when login info present (Closes #7500) 2015-11-14 16:43:58 +06:00
Yen Chi Hsuan
9d584da7d0 [xfileshare] Correct _VALID_URL 2015-11-14 17:27:32 +08:00
Yen Chi Hsuan
31752f76f7 [twitter:card] Add add_ie for the external test 2015-11-14 17:03:26 +08:00
Yen Chi Hsuan
5f1b2aea80 [twitter:card] Support vine.co embeds (closes #7496) 2015-11-14 17:02:07 +08:00
Sergey M․
4479600d57 [instagram] Add test for #7497 2015-11-14 07:21:20 +06:00
Sergey M․
a90189c3ad [instagram] Relax _VALID_URL (Closes #7497) 2015-11-14 07:20:33 +06:00
Sergey M․
d8a1caf04f [brightcove:new] Style 2015-11-14 06:22:12 +06:00
Sergey M․
cb33d389ed [brightcove:new] Add test with rtmp streams 2015-11-14 06:20:09 +06:00
Sergey M․
967e0955f0 Merge branch 'remitamine-brightcove_in_page_embed' 2015-11-14 06:11:49 +06:00
Sergey M․
e01b432ad3 [brightcove:new] Fix test 2015-11-14 06:11:17 +06:00
Sergey M․
fd91257c40 [brightcove] Order imports alphabetically 2015-11-14 06:08:36 +06:00
Sergey M․
c7b959ce38 [utils] Remove unused function 2015-11-14 06:07:44 +06:00
Sergey M․
75eac8961e [brightcove] Remove unused import 2015-11-14 06:07:24 +06:00
Sergey M․
3b7d9aa487 Rename all references to legacy studio Brightcove extractor 2015-11-14 06:05:46 +06:00
Sergey M․
1f4b722b00 [generic] Clarify Brightcove Legacy Studio comment 2015-11-14 06:03:32 +06:00
Sergey M․
f6519f89b0 [generic] Extract Brightcove New Studio embeds 2015-11-14 06:03:07 +06:00
Sergey M․
24af85298e [brightcove] Fix _extract_urls 2015-11-14 06:01:56 +06:00
Sergey M․
e721d857c2 [brightcove] Clarify IE_NAMEs 2015-11-14 05:56:51 +06:00
Sergey M․
5c17f0a67a [brightcove:embedinpage] Rename extractor to brightcove new
It's not actually embed_in_page but "New Studio" and allows both iframe and embed_in_page embeds
2015-11-14 05:55:59 +06:00
Sergey M․
4fcaa4f4a5 [brightcove] Rename extractor to brightcove legacy
Old embedding approaches are now "Legacy Studio"
2015-11-14 05:54:16 +06:00
Sergey M․
536f819eda [brightcove] Imrove extraction of new embeds 2015-11-14 05:51:05 +06:00
Sergey M․
a662489877 [brightcove:embedinpage] Make more robust and extract rtmp streams 2015-11-14 05:09:50 +06:00
Sergey M․
a2973eb597 Merge branch 'brightcove_in_page_embed' of https://github.com/remitamine/youtube-dl into remitamine-brightcove_in_page_embed 2015-11-14 01:23:15 +06:00
Sergey M․
4e21b3a94f [cbs] Use android UA for higher quality streams (Closes #7490) 2015-11-14 00:25:28 +06:00
Jaime Marquínez Ferrándiz
b703ebeeaf [twitter] Don't fail if the description doesn't contain an URL (fixes #7489) 2015-11-13 19:09:42 +01:00
Jaime Marquínez Ferrándiz
b84a5f0337 [twitter] Update tests checksums 2015-11-13 18:55:07 +01:00
Philipp Hagemeister
a1ec9a7553 release 2015.11.13 2015-11-13 11:07:30 +01:00
Sergey M․
91d644b5ba [ruutu] Relax formats extraction 2015-11-13 02:43:27 +06:00
Sergey M․
5d6c3d6a66 [ruutu] Skip NOT-USED URLs(Closes #7478) 2015-11-13 02:41:38 +06:00
Jaime Marquínez Ferrándiz
1ebb4717df [cbsnews] Fix construction of 'play_path' in some videos (fixes #7394) 2015-11-12 21:02:56 +01:00
Yen Chi Hsuan
cf5881fc4d Credit @ferama
For providing idea for vidto.me (#7167) and extending nowvideo support (#6760)
2015-11-12 21:33:46 +08:00
Sergey M․
fcd817a326 [vimeo] Fix extraction (Closes #7460) 2015-11-12 03:56:11 +06:00
Sergey M․
031ec536f0 [gorillavid] Rename to xfileshare 2015-11-11 23:00:53 +06:00
Sergey M․
668db403f9 [gorillavid] Add test for vidto.me and strip title 2015-11-11 22:47:28 +06:00
Sergey M․
b9ad101926 [gorillavid] Add support for vidto.me 2015-11-11 22:44:03 +06:00
Sergey M․
435911029f [vidto] Remove extractor 2015-11-11 22:43:17 +06:00
Sergey M․
699ed30cee [novamov] Modernize 2015-11-11 22:34:49 +06:00
Sergey M․
9eab37dca0 [vimeo] Simplify set cookie 2015-11-11 22:32:13 +06:00
Sergey M․
9a8a12b7d8 [vimeo] Append cookies instead of overriding 2015-11-11 22:23:23 +06:00
Yen Chi Hsuan
a4c2ab35c1 Merge remote-tracking branch 'upstream/master' 2015-11-12 00:08:42 +08:00
Sergey M․
3d9c4bf09a [vimeo] Fix password protected videos (Closes #7451) 2015-11-11 21:21:21 +06:00
Yen Chi Hsuan
8b8a39e279 [vidto] Several simplifications and improvements
1. Use InfoExtractor._hidden_inputs
2. Fetch title from <title> tag
3. Cookies are preserved automatically
4. Use single quotes everywhere
5. Do not declare variables for one-time use only
2015-11-11 23:17:59 +08:00
Sergey M․
82393e2bb2 [novamov] Follow continue-to-the-video button if any (Closes #7330) 2015-11-11 21:02:05 +06:00
Sergey M․
2eb99a4b98 [nowvideo] Replace main host to resolvable one 2015-11-11 21:00:23 +06:00
Yen Chi Hsuan
6abce58a12 Credit @sieben for fixing wsj extractor 2015-11-11 20:16:18 +08:00
Yen Chi Hsuan
990e6e8fa3 [vidto] Minor fixes
1. import order
2. fatal is already True in helper functions
2015-11-11 20:13:03 +08:00
Yen Chi Hsuan
bfd88516eb Merge pull request #7454 from sieben/duplicate_keys
Remove duplicate key
2015-11-11 20:00:13 +08:00
Rémy Léone
d8b7e80d29 Remove duplicate key 2015-11-11 12:00:31 +01:00
Yen Chi Hsuan
37120974dc [vidto] PEP8 2015-11-11 02:02:46 +08:00
Marco Ferragina
42fc93c709 vidto extractor: code cleanup 2015-11-11 01:58:47 +08:00
Marco Ferragina
a625e56543 [vidto] Add extractor 2015-11-11 01:52:43 +08:00
Sergey M․
9b738b2caa [funnyordie] Fix extraction and extract m3u8 formats 2015-11-10 21:32:54 +06:00
David Ben Zakai
90bb5667bf Using internal opener 2015-11-10 17:15:23 +02:00
David Ben Zakai
d3d3e2e3aa Adding proxy to update procedure 2015-11-10 16:31:42 +02:00
remitamine
9550ca506f [utils] change extract_attributes to work in python 2 2015-10-31 19:36:04 +01:00
remitamine
c01e1a96aa [brightcove] fix test and fields extraction 2015-09-30 11:20:43 +01:00
remitamine
53407e3f38 [brightcove] fix streaming_src extraction 2015-09-23 14:02:13 +01:00
remitamine
ed1269000f [brightcove] add support for brightcove in page embed(fixes #6824) 2015-09-11 04:46:21 +01:00
remitamine
689fb748ee [utlis] add extract_attributes for extracting html tags attributes 2015-09-11 04:44:17 +01:00
132 changed files with 1386 additions and 724 deletions

View File

@@ -144,3 +144,5 @@ Lee Jenkins
Anssi Hannula Anssi Hannula
Lukáš Lalinský Lukáš Lalinský
Qijiang Fan Qijiang Fan
Rémy Léone
Marco Ferragina

View File

@@ -329,8 +329,8 @@ which means you can modify it, redistribute it or use it however you like.
## Subtitle Options: ## Subtitle Options:
--write-sub Write subtitle file --write-sub Write subtitle file
--write-auto-sub Write automatic subtitle file (YouTube --write-auto-sub Write automatically generated subtitle file
only) (YouTube only)
--all-subs Download all the available subtitles of the --all-subs Download all the available subtitles of the
video video
--list-subs List all available subtitles for the video --list-subs List all available subtitles for the video
@@ -534,6 +534,12 @@ Most people asking this question are not aware that youtube-dl now defaults to d
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
### Do I need any other programs?
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
Some videos or video formats can also be only downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed.
### I have downloaded a video but how can I play it? ### I have downloaded a video but how can I play it?
Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).

View File

@@ -67,7 +67,8 @@
- **Bpb**: Bundeszentrale für politische Bildung - **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek - **BR**: Bayerischer Rundfunk Mediathek
- **Break** - **Break**
- **Brightcove** - **brightcove:legacy**
- **brightcove:new**
- **bt:article**: Bergens Tidende Articles - **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BuzzFeed** - **BuzzFeed**
@@ -128,6 +129,7 @@
- **Discovery** - **Discovery**
- **Dotsub** - **Dotsub**
- **DouyuTV**: 斗鱼 - **DouyuTV**: 斗鱼
- **DPlay**
- **dramafever** - **dramafever**
- **dramafever:series** - **dramafever:series**
- **DRBonanza** - **DRBonanza**
@@ -200,7 +202,6 @@
- **GodTube** - **GodTube**
- **GoldenMoustache** - **GoldenMoustache**
- **Golem** - **Golem**
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
- **Goshgay** - **Goshgay**
- **Groupon** - **Groupon**
- **Hark** - **Hark**
@@ -367,6 +368,7 @@
- **nowness:playlist** - **nowness:playlist**
- **nowness:series** - **nowness:series**
- **NowTV** - **NowTV**
- **NowTVList**
- **nowvideo**: NowVideo - **nowvideo**: NowVideo
- **npo**: npo.nl and ntr.nl - **npo**: npo.nl and ntr.nl
- **npo.nl:live** - **npo.nl:live**
@@ -426,7 +428,6 @@
- **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:playlist**: QQ音乐 - 歌单
- **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜 - **qqmusic:toplist**: QQ音乐 - 排行榜
- **Quickscope**: Quick Scope
- **QuickVid** - **QuickVid**
- **R7** - **R7**
- **radio.de** - **radio.de**
@@ -493,6 +494,7 @@
- **soompi:show** - **soompi:show**
- **soundcloud** - **soundcloud**
- **soundcloud:playlist** - **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search
- **soundcloud:set** - **soundcloud:set**
- **soundcloud:user** - **soundcloud:user**
- **soundgasm** - **soundgasm**
@@ -671,6 +673,7 @@
- **WSJ**: Wall Street Journal - **WSJ**: Wall Street Journal
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **XMinus** - **XMinus**
@@ -705,6 +708,7 @@
- **youtube:show**: YouTube.com (multi-season) shows - **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:user:playlists**: YouTube.com user playlists
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks** - **Zapiks**
- **ZDF** - **ZDF**

View File

@@ -21,6 +21,7 @@ from youtube_dl.utils import (
clean_html, clean_html,
DateRange, DateRange,
detect_exe_version, detect_exe_version,
determine_ext,
encodeFilename, encodeFilename,
escape_rfc3986, escape_rfc3986,
escape_url, escape_url,
@@ -210,8 +211,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unescapeHTML('%20;'), '%20;') self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(unescapeHTML('&#x2F;'), '/') self.assertEqual(unescapeHTML('&#x2F;'), '/')
self.assertEqual(unescapeHTML('&#47;'), '/') self.assertEqual(unescapeHTML('&#47;'), '/')
self.assertEqual( self.assertEqual(unescapeHTML('&eacute;'), 'é')
unescapeHTML('&eacute;'), 'é') self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
def test_daterange(self): def test_daterange(self):
_20century = DateRange("19000101", "20000101") _20century = DateRange("19000101", "20000101")
@@ -238,6 +239,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
def test_find_xpath_attr(self): def test_find_xpath_attr(self):
testxml = '''<root> testxml = '''<root>
<node/> <node/>

View File

@@ -28,6 +28,7 @@ if os.name == 'nt':
import ctypes import ctypes
from .compat import ( from .compat import (
compat_basestring,
compat_cookiejar, compat_cookiejar,
compat_expanduser, compat_expanduser,
compat_get_terminal_size, compat_get_terminal_size,
@@ -63,6 +64,7 @@ from .utils import (
SameFileError, SameFileError,
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitized_Request,
std_headers, std_headers,
subtitles_filename, subtitles_filename,
UnavailableVideoError, UnavailableVideoError,
@@ -156,7 +158,7 @@ class YoutubeDL(object):
writethumbnail: Write the thumbnail image to a file writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files write_all_thumbnails: Write all thumbnail formats to files
writesubtitles: Write the video subtitles to a file writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub) (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video listsubtitles: Lists all available subtitles for the video
@@ -833,6 +835,7 @@ class YoutubeDL(object):
extra_info=extra) extra_info=extra)
playlist_results.append(entry_result) playlist_results.append(entry_result)
ie_result['entries'] = playlist_results ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
return ie_result return ie_result
elif result_type == 'compat_list': elif result_type == 'compat_list':
self.report_warning( self.report_warning(
@@ -937,7 +940,7 @@ class YoutubeDL(object):
filter_parts.append(string) filter_parts.append(string)
def _remove_unused_ops(tokens): def _remove_unused_ops(tokens):
# Remove operators that we don't use and join them with the sourrounding strings # Remove operators that we don't use and join them with the surrounding strings
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')') ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None last_string, last_start, last_end, last_line = None, None, None, None
@@ -1186,7 +1189,7 @@ class YoutubeDL(object):
return res return res
def _calc_cookies(self, info_dict): def _calc_cookies(self, info_dict):
pr = compat_urllib_request.Request(info_dict['url']) pr = sanitized_Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr) self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie') return pr.get_header('Cookie')
@@ -1870,6 +1873,8 @@ class YoutubeDL(object):
def urlopen(self, req): def urlopen(self, req):
""" Start an HTTP download """ """ Start an HTTP download """
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout) return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self): def print_debug_header(self):

View File

@@ -377,7 +377,7 @@ def _real_main(argv=None):
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:
# Update version # Update version
if opts.update_self: if opts.update_self:
update_self(ydl.to_screen, opts.verbose) update_self(ydl.to_screen, opts.verbose, ydl._opener)
# Remove cache dir # Remove cache dir
if opts.rm_cachedir: if opts.rm_cachedir:

View File

@@ -42,7 +42,7 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal) (experimental)
external_downloader_args: A list of additional command-line arguments for the external_downloader_args: A list of additional command-line arguments for the
external downloader. external downloader.

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import FileDownloader from .common import FileDownloader
from ..compat import compat_urllib_request from ..utils import sanitized_Request
class DashSegmentsFD(FileDownloader): class DashSegmentsFD(FileDownloader):
@@ -22,7 +22,7 @@ class DashSegmentsFD(FileDownloader):
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None): def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
req = compat_urllib_request.Request(target_url) req = sanitized_Request(target_url)
if remaining_bytes is not None: if remaining_bytes is not None:
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))

View File

@@ -7,14 +7,12 @@ import time
import re import re
from .common import FileDownloader from .common import FileDownloader
from ..compat import ( from ..compat import compat_urllib_error
compat_urllib_request,
compat_urllib_error,
)
from ..utils import ( from ..utils import (
ContentTooShortError, ContentTooShortError,
encodeFilename, encodeFilename,
sanitize_open, sanitize_open,
sanitized_Request,
) )
@@ -29,8 +27,8 @@ class HttpFD(FileDownloader):
add_headers = info_dict.get('http_headers') add_headers = info_dict.get('http_headers')
if add_headers: if add_headers:
headers.update(add_headers) headers.update(add_headers)
basic_request = compat_urllib_request.Request(url, None, headers) basic_request = sanitized_Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers) request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False) is_test = self.params.get('test', False)

View File

@@ -117,7 +117,7 @@ class RtmpFD(FileDownloader):
return False return False
# Download using rtmpdump. rtmpdump returns exit code 2 when # Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be # the connection was interrupted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK. # possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = [ basic_args = [
'rtmpdump', '--verbose', '-r', url, 'rtmpdump', '--verbose', '-r', url,

View File

@@ -60,7 +60,10 @@ from .bloomberg import BloombergIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import BRIE from .br import BRIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .buzzfeed import BuzzFeedIE from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE
from .c56 import C56IE from .c56 import C56IE
@@ -129,6 +132,7 @@ from .dfb import DFBIE
from .dhm import DHMIE from .dhm import DHMIE
from .dotsub import DotsubIE from .dotsub import DotsubIE
from .douyutv import DouyuTVIE from .douyutv import DouyuTVIE
from .dplay import DPlayIE
from .dramafever import ( from .dramafever import (
DramaFeverIE, DramaFeverIE,
DramaFeverSeriesIE, DramaFeverSeriesIE,
@@ -221,7 +225,6 @@ from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE from .golem import GolemIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
from .goshgay import GoshgayIE from .goshgay import GoshgayIE
from .groupon import GrouponIE from .groupon import GrouponIE
from .hark import HarkIE from .hark import HarkIE
@@ -418,7 +421,10 @@ from .nowness import (
NownessPlaylistIE, NownessPlaylistIE,
NownessSeriesIE, NownessSeriesIE,
) )
from .nowtv import NowTVIE from .nowtv import (
NowTVIE,
NowTVListIE,
)
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .npo import ( from .npo import (
NPOIE, NPOIE,
@@ -456,10 +462,7 @@ from .orf import (
from .parliamentliveuk import ParliamentLiveUKIE from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE from .patreon import PatreonIE
from .pbs import PBSIE from .pbs import PBSIE
from .periscope import ( from .periscope import PeriscopeIE
PeriscopeIE,
QuickscopeIE,
)
from .philharmoniedeparis import PhilharmonieDeParisIE from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
@@ -573,7 +576,8 @@ from .soundcloud import (
SoundcloudIE, SoundcloudIE,
SoundcloudSetIE, SoundcloudSetIE,
SoundcloudUserIE, SoundcloudUserIE,
SoundcloudPlaylistIE SoundcloudPlaylistIE,
SoundcloudSearchIE
) )
from .soundgasm import ( from .soundgasm import (
SoundgasmIE, SoundgasmIE,
@@ -786,6 +790,7 @@ from .wrzuta import WrzutaIE
from .wsj import WSJIE from .wsj import WSJIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE
from .xhamster import ( from .xhamster import (
XHamsterIE, XHamsterIE,
XHamsterEmbedIE, XHamsterEmbedIE,
@@ -829,6 +834,7 @@ from .youtube import (
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeUserIE, YoutubeUserIE,
YoutubeUserPlaylistsIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zapiks import ZapiksIE from .zapiks import ZapiksIE

View File

@@ -15,7 +15,7 @@ class AlJazeeraIE(InfoExtractor):
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
'uploader': 'Al Jazeera English', 'uploader': 'Al Jazeera English',
}, },
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
'skip': 'Not accessible from Travis CI server', 'skip': 'Not accessible from Travis CI server',
} }
@@ -32,5 +32,5 @@ class AlJazeeraIE(InfoExtractor):
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc' 'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
'&%40videoPlayer={0}'.format(brightcove_id) '&%40videoPlayer={0}'.format(brightcove_id)
), ),
'ie_key': 'Brightcove', 'ie_key': 'BrightcoveLegacy',
} }

View File

@@ -7,11 +7,11 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
float_or_none, float_or_none,
sanitized_Request,
xpath_text, xpath_text,
ExtractorError, ExtractorError,
) )
@@ -63,7 +63,7 @@ class AtresPlayerIE(InfoExtractor):
'j_password': password, 'j_password': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage( response = self._download_webpage(
@@ -94,7 +94,7 @@ class AtresPlayerIE(InfoExtractor):
formats = [] formats = []
for fmt in ['windows', 'android_tablet']: for fmt in ['windows', 'android_tablet']:
request = compat_urllib_request.Request( request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token)) self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('User-Agent', self._USER_AGENT) request.add_header('User-Agent', self._USER_AGENT)

View File

@@ -6,13 +6,13 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
sanitized_Request,
) )
@@ -57,7 +57,7 @@ class BambuserIE(InfoExtractor):
'pass': password, 'pass': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL) request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage( response = self._download_webpage(
@@ -126,7 +126,7 @@ class BambuserChannelIE(InfoExtractor):
'&sort=created&access_mode=0%2C1%2C2&limit={count}' '&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}' '&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id) ).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url) req = sanitized_Request(req_url)
# Without setting this header, we wouldn't get any result # Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
data = self._download_json( data = self._download_json(

View File

@@ -27,7 +27,7 @@ class BBCCoUkIE(InfoExtractor):
_MEDIASELECTOR_URLS = [ _MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails # Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g. # with geolocation in some cases when it's even not geo restricted at all (e.g.
# http://www.bbc.co.uk/programmes/b06bp7lf) # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s', 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s', 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
] ]
@@ -334,7 +334,7 @@ class BBCCoUkIE(InfoExtractor):
return self._download_media_selector_url( return self._download_media_selector_url(
mediaselector_url % programme_id, programme_id) mediaselector_url % programme_id, programme_id)
except BBCCoUkIE.MediaSelectionError as e: except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation'): if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e last_exception = e
continue continue
self._raise_extractor_error(e) self._raise_extractor_error(e)
@@ -345,7 +345,7 @@ class BBCCoUkIE(InfoExtractor):
media_selection = self._download_xml( media_selection = self._download_xml(
url, programme_id, 'Downloading media selection XML') url, programme_id, 'Downloading media selection XML')
except ExtractorError as ee: except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8')) media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
else: else:
raise raise

View File

@@ -4,14 +4,12 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urllib_request,
compat_urlparse,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
unescapeHTML, unescapeHTML,
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
@@ -219,7 +217,7 @@ class BlipTVIE(InfoExtractor):
for lang, url in subtitles_urls.items(): for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles # For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA # when we request with a common UA
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('User-Agent', 'youtube-dl') req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{ subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file # The extension is 'srt' but it's actually an 'ass' file

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor): class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://www\.bloomberg\.com/news/[^/]+/[^/]+/(?P<id>[^/?#]+)'
_TEST = { _TESTS = [{
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
# The md5 checksum changes # The md5 checksum changes
'info_dict': { 'info_dict': {
@@ -17,7 +17,10 @@ class BloombergIE(InfoExtractor):
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
'description': 'md5:a8ba0302912d03d246979735c17d2761', 'description': 'md5:a8ba0302912d03d246979735c17d2761',
}, },
} }, {
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
name = self._match_id(url) name = self._match_id(url)

View File

@@ -11,7 +11,6 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
compat_xml_parse_error, compat_xml_parse_error,
) )
@@ -20,12 +19,18 @@ from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
float_or_none,
js_to_json,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
) )
class BrightcoveIE(InfoExtractor): class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -245,7 +250,7 @@ class BrightcoveIE(InfoExtractor):
def _get_video_info(self, video_id, query_str, query, referer=None): def _get_video_info(self, video_id, query_str, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query_str request_url = self._FEDERATED_URL_TEMPLATE % query_str
req = compat_urllib_request.Request(request_url) req = sanitized_Request(request_url)
linkBase = query.get('linkBaseURL') linkBase = query.get('linkBaseURL')
if linkBase is not None: if linkBase is not None:
referer = linkBase[0] referer = linkBase[0]
@@ -346,3 +351,172 @@ class BrightcoveIE(InfoExtractor):
if 'url' not in info and not info.get('formats'): if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id']) raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info return info
class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
_TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
'info_dict': {
'id': '4463358922001',
'ext': 'mp4',
'title': 'Meet the man behind Popcorn Time',
'description': 'md5:eac376a4fe366edc70279bfb681aea16',
'duration': 165.768,
'timestamp': 1441391203,
'upload_date': '20150904',
'uploader_id': '929656772001',
'formats': 'mincount:22',
},
}, {
# with rtmp streams
'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
'info_dict': {
'id': '4279049078001',
'ext': 'mp4',
'title': 'Titansgrave: Chapter 0',
'description': 'Titansgrave: Chapter 0',
'duration': 1242.058,
'timestamp': 1433556729,
'upload_date': '20150606',
'uploader_id': '4036320279001',
'formats': 'mincount:41',
},
'params': {
'skip_download': True,
}
}]
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
entries = []
# Look for iframe embeds [1]
for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url)
# Look for embed_in_page embeds [2]
for video_id, account_id, player_id, embed in re.findall(
# According to examples from [3] it's unclear whether video id
# may be optional and what to do when it is
r'''(?sx)
<video[^>]+
data-video-id=["\'](\d+)["\'][^>]*>.*?
</video>.*?
<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/
(\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
''', webpage):
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
return entries
def _real_extract(self, url):
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
req = sanitized_Request(
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
% (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key})
json_data = self._download_json(req, video_id)
title = json_data['name']
formats = []
for source in json_data.get('sources', []):
source_type = source.get('type')
src = source.get('src')
if source_type == 'application/x-mpegURL':
if not src:
continue
m3u8_formats = self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
streaming_src = source.get('streaming_src')
stream_name, app_name = source.get('stream_name'), source.get('app_name')
if not src and not streaming_src and (not stream_name or not app_name):
continue
tbr = float_or_none(source.get('avg_bitrate'), 1000)
height = int_or_none(source.get('height'))
f = {
'tbr': tbr,
'width': int_or_none(source.get('width')),
'height': height,
'filesize': int_or_none(source.get('size')),
'container': source.get('container'),
'vcodec': source.get('codec'),
'ext': source.get('container').lower(),
}
def build_format_id(kind):
format_id = kind
if tbr:
format_id += '-%dk' % int(tbr)
if height:
format_id += '-%dp' % height
return format_id
if src or streaming_src:
f.update({
'url': src or streaming_src,
'format_id': build_format_id('http' if src else 'http-streaming'),
'preference': 2 if src else 1,
})
else:
f.update({
'url': app_name,
'play_path': stream_name,
'format_id': build_format_id('rtmp'),
})
formats.append(f)
self._sort_formats(formats)
description = json_data.get('description')
thumbnail = json_data.get('thumbnail')
timestamp = parse_iso8601(json_data.get('published_at'))
duration = float_or_none(json_data.get('duration'), 1000)
tags = json_data.get('tags', [])
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader_id': account_id,
'formats': formats,
'tags': tags,
}

View File

@@ -1,6 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
sanitized_Request,
smuggle_url,
)
class CBSIE(InfoExtractor): class CBSIE(InfoExtractor):
@@ -46,13 +50,19 @@ class CBSIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) request = sanitized_Request(url)
# Android UA is served with higher quality (720p) streams (see
# https://github.com/rg3/youtube-dl/issues/7490)
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
webpage = self._download_webpage(request, display_id)
real_id = self._search_regex( real_id = self._search_regex(
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"], [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
webpage, 'real video ID') webpage, 'real video ID')
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'ThePlatform', 'ie_key': 'ThePlatform',
'url': 'theplatform:%s' % real_id, 'url': smuggle_url(
'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
{'force_smil_url': True}),
'display_id': display_id, 'display_id': display_id,
} }

View File

@@ -67,9 +67,12 @@ class CBSNewsIE(InfoExtractor):
'format_id': format_id, 'format_id': format_id,
} }
if uri.startswith('rtmp'): if uri.startswith('rtmp'):
play_path = re.sub(
r'{slistFilePath}', '',
uri.split('<break>')[-1].split('{break}')[-1])
fmt.update({ fmt.update({
'app': 'ondemand?auth=cbs', 'app': 'ondemand?auth=cbs',
'play_path': 'mp4:' + uri.split('<break>')[-1], 'play_path': 'mp4:' + play_path,
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf', 'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
'page_url': 'http://www.cbsnews.com', 'page_url': 'http://www.cbsnews.com',
'ext': 'flv', 'ext': 'flv',

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
@@ -13,6 +12,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
sanitized_Request,
) )
@@ -100,7 +100,7 @@ class CeskaTelevizeIE(InfoExtractor):
'requestSource': 'iVysilani', 'requestSource': 'iVysilani',
} }
req = compat_urllib_request.Request( req = sanitized_Request(
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=compat_urllib_parse.urlencode(data)) data=compat_urllib_parse.urlencode(data))
@@ -115,7 +115,7 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region': if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url)) req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url) req.add_header('Referer', url)
playlist_title = self._og_search_title(webpage) playlist_title = self._og_search_title(webpage)

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -52,7 +52,7 @@ class CollegeRamaIE(InfoExtractor):
} }
} }
request = compat_urllib_request.Request( request = sanitized_Request(
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions', 'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
json.dumps(player_options_request)) json.dumps(player_options_request))
request.add_header('Content-Type', 'application/json') request.add_header('Content-Type', 'application/json')

View File

@@ -19,7 +19,6 @@ from ..compat import (
compat_urllib_error, compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
compat_str, compat_str,
compat_etree_fromstring, compat_etree_fromstring,
@@ -37,6 +36,7 @@ from ..utils import (
int_or_none, int_or_none,
RegexNotFoundError, RegexNotFoundError,
sanitize_filename, sanitize_filename,
sanitized_Request,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
url_basename, url_basename,
@@ -891,6 +891,11 @@ class InfoExtractor(object):
if not media_nodes: if not media_nodes:
manifest_version = '2.0' manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
for i, media_el in enumerate(media_nodes): for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0': if manifest_version == '2.0':
media_url = media_el.attrib.get('href') or media_el.attrib.get('url') media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
@@ -898,7 +903,7 @@ class InfoExtractor(object):
continue continue
manifest_url = ( manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://') media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url)) else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction # If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest # since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested # may differ leading to inability to resolve the format by requested
@@ -1280,7 +1285,7 @@ class InfoExtractor(object):
def _get_cookies(self, url): def _get_cookies(self, url):
""" Return a compat_cookies.SimpleCookie with the cookies for the url """ """ Return a compat_cookies.SimpleCookie with the cookies for the url """
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req) self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie')) return compat_cookies.SimpleCookie(req.get_header('Cookie'))

View File

@@ -23,6 +23,7 @@ from ..utils import (
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
remove_end, remove_end,
sanitized_Request,
unified_strdate, unified_strdate,
urlencode_postdata, urlencode_postdata,
xpath_text, xpath_text,
@@ -46,7 +47,7 @@ class CrunchyrollBaseIE(InfoExtractor):
'name': username, 'name': username,
'password': password, 'password': password,
}) })
login_request = compat_urllib_request.Request(login_url, data) login_request = sanitized_Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info') self._download_webpage(login_request, None, False, 'Wrong login info')
@@ -55,7 +56,7 @@ class CrunchyrollBaseIE(InfoExtractor):
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else compat_urllib_request.Request(url_or_request)) else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues # Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/rg3/youtube-dl/issues/6797. # similar to https://github.com/rg3/youtube-dl/issues/6797.
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
@@ -307,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_uploader', fatal=False) 'video_uploader', fatal=False)
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url) playerdata_req = sanitized_Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
@@ -319,7 +320,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt] stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p' video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request( streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (stream_id, stream_format, stream_quality), % (stream_id, stream_format, stream_quality),
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8')) compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))

View File

@@ -7,15 +7,13 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_str,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
) )
@@ -25,7 +23,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod @staticmethod
def _build_request(url): def _build_request(url):
"""Build a request with the family filter disabled""" """Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.add_header('Cookie', 'family_filter=off; ff=off') request.add_header('Cookie', 'family_filter=off; ff=off')
return request return request

View File

@@ -2,13 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -36,7 +34,7 @@ class DCNIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
request = compat_urllib_request.Request( request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
headers={'Origin': 'http://www.dcndigital.ae'}) headers={'Origin': 'http://www.dcndigital.ae'})

View File

@@ -0,0 +1,51 @@
# encoding: utf-8
from __future__ import unicode_literals
import time
from .common import InfoExtractor
from ..utils import int_or_none
class DPlayIE(InfoExtractor):
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
'info_dict': {
'id': '3172',
'ext': 'mp4',
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
'title': 'Svensken lär sig njuta av livet',
'duration': 2650,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-video-id="(\d+)"', webpage, 'video id')
info = self._download_json(
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
video_id)['data'][0]
self._set_cookie(
'secure.dplay.se', 'dsc-geo',
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
# TODO: consider adding support for 'stream_type=hds', it seems to
# require setting some cookies
manifest_url = self._download_json(
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
video_id, 'Getting manifest url for hls stream')['hls']
formats = self._extract_m3u8_formats(
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
return {
'id': video_id,
'display_id': display_id,
'title': info['title'],
'formats': formats,
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
}

View File

@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@@ -16,6 +15,7 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -51,7 +51,7 @@ class DramaFeverBaseIE(InfoExtractor):
'password': password, 'password': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
response = self._download_webpage( response = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in as %s' % username)

View File

@@ -2,14 +2,17 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64 import base64
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request from ..utils import (
from ..utils import qualities qualities,
sanitized_Request,
)
class DumpertIE(InfoExtractor): class DumpertIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)' _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643', 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@@ -26,10 +29,12 @@ class DumpertIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
protocol = mobj.group('protocol')
url = 'https://www.dumpert.nl/mediabase/' + video_id url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'nsfw=1; cpc=10') req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -2,11 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -57,7 +57,7 @@ class EitbIE(InfoExtractor):
hls_url = media.get('HLS_SURL') hls_url = media.get('HLS_SURL')
if hls_url: if hls_url:
request = compat_urllib_request.Request( request = sanitized_Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url}) headers={'Referer': url})
token_data = self._download_json( token_data = self._download_json(

View File

@@ -3,13 +3,12 @@ from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
clean_html, clean_html,
int_or_none, int_or_none,
float_or_none, float_or_none,
sanitized_Request,
) )
@@ -75,7 +74,7 @@ class EscapistIE(InfoExtractor):
video_id = ims_video['videoID'] video_id = ims_video['videoID']
key = ims_video['hash'] key = ims_video['hash']
config_req = compat_urllib_request.Request( config_req = sanitized_Request(
'http://www.escapistmagazine.com/videos/' 'http://www.escapistmagazine.com/videos/'
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key)) 'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
config_req.add_header('Referer', url) config_req.add_header('Referer', url)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -42,7 +40,7 @@ class EveryonesMixtapeIE(InfoExtractor):
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
pllist_req = compat_urllib_request.Request(pllist_url) pllist_req = sanitized_Request(pllist_url)
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest') pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist_list = self._download_json( playlist_list = self._download_json(
@@ -55,7 +53,7 @@ class EveryonesMixtapeIE(InfoExtractor):
raise ExtractorError('Playlist id not found') raise ExtractorError('Playlist id not found')
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
pl_req = compat_urllib_request.Request(pl_url) pl_req = sanitized_Request(pl_url)
pl_req.add_header('X-Requested-With', 'XMLHttpRequest') pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist = self._download_json( playlist = self._download_json(
pl_req, playlist_id, note='Downloading playlist info') pl_req, playlist_id, note='Downloading playlist info')

View File

@@ -3,9 +3,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request,
str_to_int, str_to_int,
) )
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -10,11 +10,11 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
limit_length, limit_length,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
get_element_by_id, get_element_by_id,
clean_html, clean_html,
@@ -73,7 +73,7 @@ class FacebookIE(InfoExtractor):
if useremail is None: if useremail is None:
return return
login_page_req = compat_urllib_request.Request(self._LOGIN_URL) login_page_req = sanitized_Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US') login_page_req.add_header('Cookie', 'locale=en_US')
login_page = self._download_webpage(login_page_req, None, login_page = self._download_webpage(login_page_req, None,
note='Downloading login page', note='Downloading login page',
@@ -94,7 +94,7 @@ class FacebookIE(InfoExtractor):
'timezone': '-60', 'timezone': '-60',
'trynum': '1', 'trynum': '1',
} }
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form)) request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try: try:
login_results = self._download_webpage(request, None, login_results = self._download_webpage(request, None,
@@ -109,7 +109,7 @@ class FacebookIE(InfoExtractor):
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'), r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
'name_action_selected': 'dont_save', 'name_action_selected': 'dont_save',
} }
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = self._download_webpage(check_req, None, check_response = self._download_webpage(check_req, None,
note='Confirming login') note='Confirming login')

View File

@@ -12,6 +12,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
encode_dict, encode_dict,
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -57,7 +58,7 @@ class FC2IE(InfoExtractor):
} }
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request( request = sanitized_Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@@ -66,7 +67,7 @@ class FC2IE(InfoExtractor):
return False return False
# this is also needed # this is also needed
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done') login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
self._download_webpage( self._download_webpage(
login_redir, None, note='Login redirect', errnote='Login redirect failed') login_redir, None, note='Login redirect', errnote='Login redirect failed')

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
sanitized_Request,
) )
@@ -30,7 +30,7 @@ class FlickrIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id') video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
req = compat_urllib_request.Request(webpage_url) req = sanitized_Request(webpage_url)
req.add_header( req.add_header(
'User-Agent', 'User-Agent',
# it needs a more recent version # it needs a more recent version

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
sanitized_Request,
str_to_int, str_to_int,
) )
@@ -93,7 +91,7 @@ class FourTubeIE(InfoExtractor):
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
b'Origin': b'http://www.4tube.com', b'Origin': b'http://www.4tube.com',
} }
token_req = compat_urllib_request.Request(token_url, b'{}', headers) token_req = sanitized_Request(token_url, b'{}', headers)
tokens = self._download_json(token_req, video_id) tokens = self._download_json(token_req, video_id)
formats = [{ formats = [{
'url': tokens[format]['token'], 'url': tokens[format]['token'],

View File

@@ -45,11 +45,20 @@ class FunnyOrDieIE(InfoExtractor):
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0) links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates') m3u8_url = self._search_regex(
bitrates = [int(b) for b in bitrates.rstrip(',').split(',')] r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
bitrates.sort() webpage, 'm3u8 url', default=None, group='url')
formats = [] formats = []
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
bitrates.sort()
for bitrate in bitrates: for bitrate in bitrates:
for link in links: for link in links:
formats.append({ formats.append({

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
remove_end, remove_end,
HEADRequest, HEADRequest,
sanitized_Request,
) )
@@ -125,7 +123,7 @@ class GDCVaultIE(InfoExtractor):
'password': password, 'password': password,
} }
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form)) request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(request, display_id, 'Logging in') self._download_webpage(request, display_id, 'Logging in')
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')

View File

@@ -11,7 +11,6 @@ from .youtube import YoutubeIE
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse, compat_urlparse,
compat_xml_parse_error, compat_xml_parse_error,
) )
@@ -22,6 +21,7 @@ from ..utils import (
HEADRequest, HEADRequest,
is_html, is_html,
orderedSet, orderedSet,
sanitized_Request,
smuggle_url, smuggle_url,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@@ -30,7 +30,10 @@ from ..utils import (
url_basename, url_basename,
xpath_text, xpath_text,
) )
from .brightcove import BrightcoveIE from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .nbc import NBCSportsVPlayerIE from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .rutv import RUTVIE from .rutv import RUTVIE
@@ -275,7 +278,7 @@ class GenericIE(InfoExtractor):
# it also tests brightcove videos that need to set the 'Referer' in the # it also tests brightcove videos that need to set the 'Referer' in the
# http requests # http requests
{ {
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': { 'info_dict': {
'id': '2765128793001', 'id': '2765128793001',
@@ -299,7 +302,7 @@ class GenericIE(InfoExtractor):
'uploader': 'thestar.com', 'uploader': 'thestar.com',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
}, },
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
}, },
{ {
'url': 'http://www.championat.com/video/football/v/87/87499.html', 'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -314,7 +317,7 @@ class GenericIE(InfoExtractor):
}, },
{ {
# https://github.com/rg3/youtube-dl/issues/3541 # https://github.com/rg3/youtube-dl/issues/3541
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': { 'info_dict': {
'id': '3866516442001', 'id': '3866516442001',
@@ -820,6 +823,19 @@ class GenericIE(InfoExtractor):
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
}, },
}, },
# Kaltura embed protected with referrer
{
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
'info_dict': {
'id': '1_g4fbemnq',
'ext': 'mp4',
'title': 'Violetta - Achter De Schermen - Ruggero',
'description': 'Achter de schermen met Ruggero',
'timestamp': 1435133761,
'upload_date': '20150624',
'uploader_id': 'echojecka',
},
},
# Eagle.Platform embed (generic URL) # Eagle.Platform embed (generic URL)
{ {
'url': 'http://lenta.ru/news/2015/03/06/navalny/', 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -1031,6 +1047,31 @@ class GenericIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'cinemasnob', 'title': 'cinemasnob',
}, },
},
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
'info_dict': {
'id': '4238694884001',
'ext': 'flv',
'title': 'Tabletop: Dread, Last Thoughts',
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
},
# JWPlayer with M3U8
{
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
'info_dict': {
'id': 'playlist',
'ext': 'mp4',
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
'uploader': 'ren.tv',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
} }
] ]
@@ -1174,7 +1215,7 @@ class GenericIE(InfoExtractor):
full_response = None full_response = None
if head_response is False: if head_response is False:
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.add_header('Accept-Encoding', '*') request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id) full_response = self._request_webpage(request, video_id)
head_response = full_response head_response = full_response
@@ -1203,7 +1244,7 @@ class GenericIE(InfoExtractor):
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response: if not full_response:
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to # making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
@@ -1290,14 +1331,14 @@ class GenericIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title) urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for BrightCove: # Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls: if bc_urls:
self.to_screen('Brightcove video detected.') self.to_screen('Brightcove video detected.')
entries = [{ entries = [{
'_type': 'url', '_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}), 'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove' 'ie_key': 'BrightcoveLegacy'
} for bc_url in bc_urls] } for bc_url in bc_urls]
return { return {
@@ -1307,6 +1348,11 @@ class GenericIE(InfoExtractor):
'entries': entries, 'entries': entries,
} }
# Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(webpage)
if bc_urls:
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@@ -1675,7 +1721,9 @@ class GenericIE(InfoExtractor):
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage)) re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None: if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura') return self.url_result(smuggle_url(
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
{'source_url': url}), 'Kaltura')
# Look for Eagle.Platform embeds # Look for Eagle.Platform embeds
mobj = re.search( mobj = re.search(
@@ -1720,7 +1768,7 @@ class GenericIE(InfoExtractor):
# Look for UDN embeds # Look for UDN embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage) r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None: if mobj is not None:
return self.url_result( return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -1840,6 +1888,7 @@ class GenericIE(InfoExtractor):
entries = [] entries = []
for video_url in found: for video_url in found:
video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url) video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
@@ -1851,25 +1900,24 @@ class GenericIE(InfoExtractor):
# here's a fun little line of code for you: # here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0] video_id = os.path.splitext(video_id)[0]
entry_info_dict = {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
}
ext = determine_ext(video_url) ext = determine_ext(video_url)
if ext == 'smil': if ext == 'smil':
entries.append({ entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
'id': video_id,
'formats': self._extract_smil_formats(video_url, video_id),
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
elif ext == 'xspf': elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'm3u8':
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
else: else:
entries.append({ entry_info_dict['url'] = video_url
'id': video_id,
'url': video_url, entries.append(entry_info_dict)
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
if len(entries) == 1: if len(entries) == 1:
return entries[0] return entries[0]

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urllib_request,
compat_urlparse,
)
from ..utils import ( from ..utils import (
HEADRequest, HEADRequest,
sanitized_Request,
str_to_int, str_to_int,
urlencode_postdata, urlencode_postdata,
urlhandle_detect_ext, urlhandle_detect_ext,
@@ -47,7 +45,7 @@ class HearThisAtIE(InfoExtractor):
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID') r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
payload = urlencode_postdata({'tracks[]': track_id}) payload = urlencode_postdata({'tracks[]': track_id})
req = compat_urllib_request.Request(self._PLAYLIST_URL, payload) req = sanitized_Request(self._PLAYLIST_URL, payload)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
track = self._download_json(req, track_id, 'Downloading playlist')[0] track = self._download_json(req, track_id, 'Downloading playlist')[0]

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest, HEADRequest,
sanitized_Request,
) )
@@ -41,7 +39,7 @@ class HotNewHipHopIE(InfoExtractor):
('mediaType', 's'), ('mediaType', 's'),
('mediaId', video_id), ('mediaId', video_id),
]) ])
r = compat_urllib_request.Request( r = sanitized_Request(
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata) 'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
r.add_header('Content-Type', 'application/x-www-form-urlencoded') r.add_header('Content-Type', 'application/x-www-form-urlencoded')
mkd = self._download_json( mkd = self._download_json(

View File

@@ -4,12 +4,10 @@ import json
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -32,7 +30,7 @@ class HypemIE(InfoExtractor):
data = {'ax': 1, 'ts': time.time()} data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data) data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url) request = sanitized_Request(complete_url)
response, urlh = self._download_webpage_handle( response, urlh = self._download_webpage_handle(
request, track_id, 'Downloading webpage with the url') request, track_id, 'Downloading webpage with the url')
cookie = urlh.headers.get('Set-Cookie', '') cookie = urlh.headers.get('Set-Cookie', '')
@@ -52,7 +50,7 @@ class HypemIE(InfoExtractor):
title = track['song'] title = track['song']
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key) serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
request = compat_urllib_request.Request( request = sanitized_Request(
serve_url, '', {'Content-Type': 'application/json'}) serve_url, '', {'Content-Type': 'application/json'})
request.add_header('cookie', cookie) request.add_header('cookie', cookie)
song_data = self._download_json(request, track_id, 'Downloading metadata') song_data = self._download_json(request, track_id, 'Downloading metadata')

View File

@@ -10,8 +10,8 @@ from ..utils import (
class InstagramIE(InfoExtractor): class InstagramIE(InfoExtractor):
_VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)' _VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516', 'md5': '0d2da106a9d2631273e192b372806516',
'info_dict': { 'info_dict': {
@@ -21,7 +21,10 @@ class InstagramIE(InfoExtractor):
'title': 'Video by naomipq', 'title': 'Video by naomipq',
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
} }
} }, {
'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@@ -6,12 +6,10 @@ from random import random
from math import floor from math import floor
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
remove_end, remove_end,
sanitized_Request,
) )
@@ -61,7 +59,7 @@ class IPrimaIE(InfoExtractor):
(floor(random() * 1073741824), floor(random() * 1073741824)) (floor(random() * 1073741824), floor(random() * 1073741824))
) )
req = compat_urllib_request.Request(player_url) req = sanitized_Request(player_url)
req.add_header('Referer', url) req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id) playerpage = self._download_webpage(req, video_id)

View File

@@ -5,11 +5,9 @@ import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -78,7 +76,7 @@ class IviIE(InfoExtractor):
] ]
} }
request = compat_urllib_request.Request(api_url, json.dumps(data)) request = sanitized_Request(api_url, json.dumps(data))
video_json_page = self._download_webpage( video_json_page = self._download_webpage(
request, video_id, 'Downloading video JSON') request, video_id, 'Downloading video JSON')

View File

@@ -2,12 +2,18 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import ( from ..utils import (
clean_html,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unsmuggle_url,
) )
@@ -121,31 +127,47 @@ class KalturaIE(InfoExtractor):
video_id, actions, note='Downloading video info JSON') video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5') partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5') entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
info, source_data = self._get_video_info(entry_id, partner_id) info, source_data = self._get_video_info(entry_id, partner_id)
formats = [{ source_url = smuggled_data.get('source_url')
'format_id': '%(fileExt)s-%(bitrate)s' % f, if source_url:
'ext': f['fileExt'], referrer = base64.b64encode(
'tbr': f['bitrate'], '://'.join(compat_urlparse.urlparse(source_url)[:2])
'fps': f.get('frameRate'), .encode('utf-8')).decode('utf-8')
'filesize_approx': int_or_none(f.get('size'), invscale=1024), else:
'container': f.get('containerFormat'), referrer = None
'vcodec': f.get('videoCodecId'),
'height': f.get('height'), formats = []
'width': f.get('width'), for f in source_data['flavorAssets']:
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']), video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
} for f in source_data['flavorAssets']] if referrer:
video_url += '?referrer=%s' % referrer
formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'),
'tbr': int_or_none(f['bitrate']),
'fps': int_or_none(f.get('frameRate')),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': int_or_none(f.get('height')),
'width': int_or_none(f.get('width')),
'url': video_url,
})
self._check_formats(formats, entry_id)
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': entry_id, 'id': entry_id,
'title': info['name'], 'title': info['name'],
'formats': formats, 'formats': formats,
'description': info.get('description'), 'description': clean_html(info.get('description')),
'thumbnail': info.get('thumbnailUrl'), 'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'), 'duration': info.get('duration'),
'timestamp': info.get('createdAt'), 'timestamp': info.get('createdAt'),

View File

@@ -4,10 +4,8 @@ import os
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse_urlparse
compat_urllib_parse_urlparse, from ..utils import sanitized_Request
compat_urllib_request,
)
class KeezMoviesIE(InfoExtractor): class KeezMoviesIE(InfoExtractor):
@@ -26,7 +24,7 @@ class KeezMoviesIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -8,13 +8,13 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_ord, compat_ord,
) )
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
parse_iso8601, parse_iso8601,
sanitized_Request,
int_or_none, int_or_none,
encode_data_uri, encode_data_uri,
) )
@@ -114,7 +114,7 @@ class LetvIE(InfoExtractor):
'tkey': self.calc_time_key(int(time.time())), 'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com' 'domain': 'www.letv.com'
} }
play_json_req = compat_urllib_request.Request( play_json_req = sanitized_Request(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
) )
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')

View File

@@ -7,12 +7,12 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -25,7 +25,7 @@ class LyndaBaseIE(InfoExtractor):
self._login() self._login()
def _login(self): def _login(self):
(username, password) = self._get_login_info() username, password = self._get_login_info()
if username is None: if username is None:
return return
@@ -35,7 +35,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false', 'remember': 'false',
'stayPut': 'false' 'stayPut': 'false'
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in as %s' % username)
@@ -64,7 +64,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false', 'remember': 'false',
'stayPut': 'false', 'stayPut': 'false',
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8')) self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, request, None,
@@ -83,6 +83,10 @@ class LyndaBaseIE(InfoExtractor):
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
def _logout(self): def _logout(self):
username, _ = self._get_login_info()
if username is None:
return
self._download_webpage( self._download_webpage(
'http://www.lynda.com/ajax/logout.aspx', None, 'http://www.lynda.com/ajax/logout.aspx', None,
'Logging out', 'Unable to log out', fatal=False) 'Logging out', 'Unable to log out', fatal=False)

View File

@@ -7,12 +7,12 @@ from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor):
'filters': '0', 'filters': '0',
'submit': "Continue - I'm over 18", 'submit': "Continue - I'm over 18",
} }
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.report_age_confirmation() self.report_age_confirmation()
self._download_webpage(request, None, False, 'Unable to confirm age') self._download_webpage(request, None, False, 'Unable to confirm age')
@@ -142,7 +142,7 @@ class MetacafeIE(InfoExtractor):
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform') return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
# AnyClip videos require the flashversion cookie so that we get the link # AnyClip videos require the flashversion cookie so that we get the link
# to the mp4 file # to the mp4 file

View File

@@ -2,14 +2,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_filesize, parse_filesize,
sanitized_Request,
) )
@@ -39,7 +37,7 @@ class MinhatecaIE(InfoExtractor):
('fileId', video_id), ('fileId', video_id),
('__RequestVerificationToken', token), ('__RequestVerificationToken', token),
] ]
req = compat_urllib_request.Request( req = sanitized_Request(
'http://minhateca.com.br/action/License/Download', 'http://minhateca.com.br/action/License/Download',
data=compat_urllib_parse.urlencode(token_data)) data=compat_urllib_parse.urlencode(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded') req.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,11 +4,11 @@ from __future__ import unicode_literals
import random import random
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import ( from ..utils import (
xpath_text, xpath_text,
int_or_none, int_or_none,
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -63,7 +63,7 @@ class MioMioIE(InfoExtractor):
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
video_id) video_id)
vid_config_request = compat_urllib_request.Request( vid_config_request = sanitized_Request(
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
headers=http_headers) headers=http_headers)

View File

@@ -5,13 +5,11 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -80,7 +78,7 @@ class MoeVideoIE(InfoExtractor):
] ]
r_json = json.dumps(r) r_json = json.dumps(r)
post = compat_urllib_parse.urlencode({'r': r_json}) post = compat_urllib_parse.urlencode({'r': r_json})
req = compat_urllib_request.Request(self._API_URL, post) req = sanitized_Request(self._API_URL, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
response = self._download_json(req, video_id) response = self._download_json(req, video_id)

View File

@@ -7,8 +7,8 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
) )
from ..utils import sanitized_Request
class MofosexIE(InfoExtractor): class MofosexIE(InfoExtractor):
@@ -29,7 +29,7 @@ class MofosexIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url') url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -5,13 +5,11 @@ import os.path
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
remove_start, remove_start,
sanitized_Request,
) )
@@ -81,7 +79,7 @@ class MonikerIE(InfoExtractor):
orig_webpage, 'builtin URL', default=None, group='url') orig_webpage, 'builtin URL', default=None, group='url')
if builtin_url: if builtin_url:
req = compat_urllib_request.Request(builtin_url) req = sanitized_Request(builtin_url)
req.add_header('Referer', url) req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id, 'Downloading builtin page') webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
title = self._og_search_title(orig_webpage).strip() title = self._og_search_title(orig_webpage).strip()
@@ -94,7 +92,7 @@ class MonikerIE(InfoExtractor):
headers = { headers = {
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
} }
req = compat_urllib_request.Request(url, post, headers) req = sanitized_Request(url, post, headers)
webpage = self._download_webpage( webpage = self._download_webpage(
req, video_id, note='Downloading video page ...') req, video_id, note='Downloading video page ...')

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_request,
compat_urllib_parse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -59,7 +57,7 @@ class MooshareIE(InfoExtractor):
'hash': hash_key, 'hash': hash_key,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form)) 'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..utils import sanitized_Request
compat_urllib_request,
)
class MovieClipsIE(InfoExtractor): class MovieClipsIE(InfoExtractor):
@@ -25,7 +23,7 @@ class MovieClipsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
# it doesn't work if it thinks the browser it's too old # it doesn't work if it thinks the browser it's too old
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)') req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
webpage = self._download_webpage(req, display_id) webpage = self._download_webpage(req, display_id)

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
@@ -13,6 +12,7 @@ from ..utils import (
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
HEADRequest, HEADRequest,
sanitized_Request,
unescapeHTML, unescapeHTML,
url_basename, url_basename,
RegexNotFoundError, RegexNotFoundError,
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _extract_mobile_video_formats(self, mtvn_id): def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url) req = sanitized_Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript # Otherwise we get a webpage that would execute some javascript
req.add_header('User-Agent', 'curl/7') req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id, webpage = self._download_webpage(req, mtvn_id,

View File

@@ -11,10 +11,10 @@ from ..compat import (
compat_ord, compat_ord,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -83,7 +83,7 @@ class MyVideoIE(InfoExtractor):
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None: if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id, response = self._download_webpage(request, video_id,
'Downloading video info') 'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8')) info = json.loads(base64.b64decode(response).decode('utf-8'))

View File

@@ -8,11 +8,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
compat_str, compat_str,
compat_itertools_count, compat_itertools_count,
) )
from ..utils import sanitized_Request
class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicBaseIE(InfoExtractor):
@@ -40,7 +40,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
if not details: if not details:
continue continue
formats.append({ formats.append({
'url': 'http://m1.music.126.net/%s/%s.%s' % 'url': 'http://m5.music.126.net/%s/%s.%s' %
(cls._encrypt(details['dfsId']), details['dfsId'], (cls._encrypt(details['dfsId']), details['dfsId'],
details['extension']), details['extension']),
'ext': details.get('extension'), 'ext': details.get('extension'),
@@ -56,7 +56,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
return int(round(ms / 1000.0)) return int(round(ms / 1000.0))
def query_api(self, endpoint, video_id, note): def query_api(self, endpoint, video_id, note):
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint)) req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
req.add_header('Referer', self._API_BASE) req.add_header('Referer', self._API_BASE)
return self._download_json(req, video_id, note) return self._download_json(req, video_id, note)

View File

@@ -1,10 +1,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_request, from ..utils import sanitized_Request
compat_urllib_parse,
)
class NFBIE(InfoExtractor): class NFBIE(InfoExtractor):
@@ -40,8 +38,9 @@ class NFBIE(InfoExtractor):
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>', uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
page, 'director name', fatal=False) page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, request = sanitized_Request(
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) 'https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')

View File

@@ -8,7 +8,6 @@ import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
sanitized_Request,
xpath_text, xpath_text,
determine_ext, determine_ext,
) )
@@ -102,7 +102,7 @@ class NiconicoIE(InfoExtractor):
'password': password, 'password': password,
} }
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request( request = sanitized_Request(
'https://secure.nicovideo.jp/secure/login', login_data) 'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage( login_results = self._download_webpage(
request, None, note='Logging in', errnote='Unable to log in') request, None, note='Logging in', errnote='Unable to log in')
@@ -145,7 +145,7 @@ class NiconicoIE(InfoExtractor):
'k': thumb_play_key, 'k': thumb_play_key,
'v': video_id 'v': video_id
}) })
flv_info_request = compat_urllib_request.Request( flv_info_request = sanitized_Request(
'http://ext.nicovideo.jp/thumb_watch', flv_info_data, 'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
{'Content-Type': 'application/x-www-form-urlencoded'}) {'Content-Type': 'application/x-www-form-urlencoded'})
flv_info_webpage = self._download_webpage( flv_info_webpage = self._download_webpage(

View File

@@ -9,7 +9,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
clean_html, clean_html,
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none, int_or_none,
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -74,7 +74,7 @@ class NocoIE(InfoExtractor):
'username': username, 'username': username,
'password': password, 'password': password,
} }
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
login = self._download_json(request, None, 'Logging in as %s' % username) login = self._download_json(request, None, 'Logging in as %s' % username)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
@@ -41,7 +39,7 @@ class NosVideoIE(InfoExtractor):
'op': 'download1', 'op': 'download1',
'method_free': 'Continue to Video', 'method_free': 'Continue to Video',
} }
req = compat_urllib_request.Request(url, urlencode_postdata(fields)) req = sanitized_Request(url, urlencode_postdata(fields))
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id, webpage = self._download_webpage(req, video_id,
'Downloading download page') 'Downloading download page')

View File

@@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
NO_DEFAULT,
encode_dict,
sanitized_Request,
urlencode_postdata,
) )
@@ -38,19 +40,40 @@ class NovaMovIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
page = self._download_webpage( url = 'http://%s/video/%s' % (self._HOST, video_id)
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
if re.search(self._FILE_DELETED_REGEX, page) is not None: webpage = self._download_webpage(
url, video_id, 'Downloading video page')
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey') def extract_filekey(default=NO_DEFAULT):
return self._search_regex(
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False) filekey = extract_filekey(default=None)
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
if not filekey:
fields = self._hidden_inputs(webpage)
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
'post url', default=url, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(url, post_url)
request = sanitized_Request(
post_url, urlencode_postdata(encode_dict(fields)))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('Referer', post_url)
webpage = self._download_webpage(
request, video_id, 'Downloading continue to the video page')
filekey = extract_filekey()
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
api_response = self._download_webpage( api_response = self._download_webpage(
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id, 'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,

View File

@@ -1,12 +1,12 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .brightcove import BrightcoveIE from .brightcove import BrightcoveLegacyIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..compat import compat_str
from ..compat import ( from ..utils import (
compat_str, ExtractorError,
compat_urllib_request, sanitized_Request,
) )
@@ -22,10 +22,10 @@ class NownessBaseIE(InfoExtractor):
'http://www.nowness.com/iframe?id=%s' % video_id, video_id, 'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
note='Downloading player JavaScript', note='Downloading player JavaScript',
errnote='Unable to download player JavaScript') errnote='Unable to download player JavaScript')
bc_url = BrightcoveIE._extract_brightcove_url(player_code) bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
if bc_url is None: if bc_url is None:
raise ExtractorError('Could not find player definition') raise ExtractorError('Could not find player definition')
return self.url_result(bc_url, 'Brightcove') return self.url_result(bc_url, 'BrightcoveLegacy')
elif source == 'vimeo': elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube': elif source == 'youtube':
@@ -37,7 +37,7 @@ class NownessBaseIE(InfoExtractor):
def _api_request(self, url, request_path): def _api_request(self, url, request_path):
display_id = self._match_id(url) display_id = self._match_id(url)
request = compat_urllib_request.Request( request = sanitized_Request(
'http://api.nowness.com/api/' + request_path % display_id, 'http://api.nowness.com/api/' + request_path % display_id,
headers={ headers={
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us', 'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',

View File

@@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
@@ -13,8 +15,63 @@ from ..utils import (
) )
class NowTVIE(InfoExtractor): class NowTVBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)' _VIDEO_FIELDS = (
'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
'broadcastStartDate', 'seoUrl', 'duration', 'files',
'format.defaultImage169Format', 'format.defaultImage169Logo')
def _extract_video(self, info, display_id=None):
video_id = compat_str(info['id'])
files = info['files']
if not files:
if info.get('geoblocked', False):
raise ExtractorError(
'Video %s is not available from your location due to geo restriction' % video_id,
expected=True)
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
formats = []
for item in files['items']:
if determine_ext(item['path']) != 'f4v':
continue
app, play_path = remove_start(item['path'], '/').split('/', 1)
formats.append({
'url': 'rtmpe://fms.rtl.de',
'app': app,
'play_path': 'mp4:%s' % play_path,
'ext': 'flv',
'page_url': 'http://rtlnow.rtl.de',
'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
'tbr': int_or_none(item.get('bitrate')),
})
self._sort_formats(formats)
title = info['title']
description = info.get('articleLong') or info.get('articleShort')
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
duration = parse_duration(info.get('duration'))
f = info.get('format', {})
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
return {
'id': video_id,
'display_id': display_id or info.get('seoUrl'),
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
class NowTVIE(NowTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:list/[^/]+/)?(?P<id>[^/]+)/(?:player|preview)'
_TESTS = [{ _TESTS = [{
# rtl # rtl
@@ -23,7 +80,7 @@ class NowTVIE(InfoExtractor):
'id': '203519', 'id': '203519',
'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', 'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
'ext': 'flv', 'ext': 'flv',
'title': 'Die neuen Bauern und eine Hochzeit', 'title': 'Inka Bause stellt die neuen Bauern vor',
'description': 'md5:e234e1ed6d63cf06be5c070442612e7e', 'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1432580700, 'timestamp': 1432580700,
@@ -136,58 +193,65 @@ class NowTVIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
display_id_split = display_id.split('/') display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id'))
if len(display_id) > 2:
display_id = '/'.join((display_id_split[0], display_id_split[-1]))
info = self._download_json( info = self._download_json(
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id, 'https://api.nowtv.de/v3/movies/%s?fields=%s'
display_id) % (display_id, ','.join(self._VIDEO_FIELDS)), display_id)
video_id = compat_str(info['id']) return self._extract_video(info, display_id)
files = info['files']
if not files:
if info.get('geoblocked', False):
raise ExtractorError(
'Video %s is not available from your location due to geo restriction' % video_id,
expected=True)
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
formats = [] class NowTVListIE(NowTVBaseIE):
for item in files['items']: _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$'
if determine_ext(item['path']) != 'f4v':
continue
app, play_path = remove_start(item['path'], '/').split('/', 1)
formats.append({
'url': 'rtmpe://fms.rtl.de',
'app': app,
'play_path': 'mp4:%s' % play_path,
'ext': 'flv',
'page_url': 'http://rtlnow.rtl.de',
'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
'tbr': int_or_none(item.get('bitrate')),
})
self._sort_formats(formats)
title = info['title'] _SHOW_FIELDS = ('title', )
description = info.get('articleLong') or info.get('articleShort') _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
duration = parse_duration(info.get('duration'))
f = info.get('format', {}) _TESTS = [{
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') 'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell',
'info_dict': {
'id': '17006',
'title': 'stern TV - Aktuell',
},
'playlist_count': 1,
}, {
'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8',
'info_dict': {
'id': '20716',
'title': 'Das Supertalent - FREE Staffel 8',
},
'playlist_count': 14,
}]
return { def _real_extract(self, url):
'id': video_id, mobj = re.match(self._VALID_URL, url)
'display_id': display_id, show_id = mobj.group('show_id')
'title': title, season_id = mobj.group('id')
'description': description,
'thumbnail': thumbnail, fields = []
'timestamp': timestamp, fields.extend(self._SHOW_FIELDS)
'duration': duration, fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
'formats': formats, fields.extend(
} 'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
list_info = self._download_json(
'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php'
% (','.join(fields), show_id),
season_id)
season = next(
season for season in list_info['formatTabs']['items']
if season.get('seoheadline') == season_id)
title = '%s - %s' % (list_info['title'], season['headline'])
entries = []
for container in season['formatTabPages']['items']:
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
entries.append(self._extract_video(info))
return self.playlist_result(
entries, compat_str(season.get('id') or season_id), title)

View File

@@ -7,9 +7,9 @@ class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo' IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo' IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'} _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.ch' _HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_FILEKEY_REGEX = r'var fkzd="([^"]+)";' _FILEKEY_REGEX = r'var fkzd="([^"]+)";'

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
sanitized_Request,
unified_strdate, unified_strdate,
) )
@@ -33,7 +31,7 @@ class NuvidIE(InfoExtractor):
formats = [] formats = []
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]: for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
request = compat_urllib_request.Request( request = sanitized_Request(
'http://m.nuvid.com/play/%s' % video_id) 'http://m.nuvid.com/play/%s' % video_id)
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed) request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
webpage = self._download_webpage( webpage = self._download_webpage(

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import js_to_json
js_to_json,
)
class PatreonIE(InfoExtractor): class PatreonIE(InfoExtractor):
@@ -65,7 +63,7 @@ class PatreonIE(InfoExtractor):
'password': password, 'password': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
'https://www.patreon.com/processLogin', 'https://www.patreon.com/processLogin',
compat_urllib_parse.urlencode(login_form).encode('utf-8') compat_urllib_parse.urlencode(login_form).encode('utf-8')
) )

View File

@@ -22,7 +22,7 @@ class PBSIE(InfoExtractor):
# Article with embedded player (or direct video) # Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player # Player
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
) )
''' '''
@@ -170,6 +170,10 @@ class PBSIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
}, },
},
{
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
'only_matching': True,
} }
] ]
_ERRORS = { _ERRORS = {
@@ -259,7 +263,7 @@ class PBSIE(InfoExtractor):
return self.playlist_result(entries, display_id) return self.playlist_result(entries, display_id)
info = self._download_json( info = self._download_json(
'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
display_id) display_id)
formats = [] formats = []

View File

@@ -2,16 +2,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import parse_iso8601 from ..utils import parse_iso8601
class PeriscopeIE(InfoExtractor): class PeriscopeIE(InfoExtractor):
IE_DESC = 'Periscope' IE_DESC = 'Periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/ # Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{ _TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -29,6 +25,9 @@ class PeriscopeIE(InfoExtractor):
}, { }, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
'only_matching': True,
}] }]
def _call_api(self, method, value): def _call_api(self, method, value):
@@ -81,24 +80,3 @@ class PeriscopeIE(InfoExtractor):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': formats,
} }
class QuickscopeIE(InfoExtractor):
IE_DESC = 'Quick Scope'
_VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
_TEST = {
'url': 'https://watchonperiscope.com/broadcast/56180087',
'only_matching': True,
}
def _real_extract(self, url):
broadcast_id = self._match_id(url)
request = compat_urllib_request.Request(
'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
'broadcast_id': broadcast_id,
'entry_ticket': '',
'from_push': 'false',
'uses_sessions': 'true',
}).encode('utf-8'))
return self.url_result(
self._download_json(request, broadcast_id)['share_url'], 'Periscope')

View File

@@ -5,12 +5,10 @@ import re
import os.path import os.path
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -46,7 +44,7 @@ class PlayedIE(InfoExtractor):
headers = { headers = {
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
} }
req = compat_urllib_request.Request(url, post, headers) req = sanitized_Request(url, post, headers)
webpage = self._download_webpage( webpage = self._download_webpage(
req, video_id, note='Downloading video page ...') req, video_id, note='Downloading video page ...')

View File

@@ -1,29 +1,35 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json import json
import random
import collections
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration, parse_duration,
sanitized_Request,
) )
class PluralsightIE(InfoExtractor): class PluralsightBaseIE(InfoExtractor):
_API_BASE = 'http://app.pluralsight.com'
class PluralsightIE(PluralsightBaseIE):
IE_NAME = 'pluralsight' IE_NAME = 'pluralsight'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)' _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
_LOGIN_URL = 'https://www.pluralsight.com/id/' _LOGIN_URL = 'https://app.pluralsight.com/id/'
_NETRC_MACHINE = 'pluralsight' _NETRC_MACHINE = 'pluralsight'
_TEST = { _TESTS = [{
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
'md5': '4d458cf5cf4c593788672419a8dd4cf8', 'md5': '4d458cf5cf4c593788672419a8dd4cf8',
'info_dict': { 'info_dict': {
@@ -33,7 +39,14 @@ class PluralsightIE(InfoExtractor):
'duration': 338, 'duration': 338,
}, },
'skip': 'Requires pluralsight account credentials', 'skip': 'Requires pluralsight account credentials',
} }, {
'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live',
'only_matching': True,
}, {
# available without pluralsight account
'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
'only_matching': True,
}]
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@@ -41,7 +54,7 @@ class PluralsightIE(InfoExtractor):
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None: if username is None:
self.raise_login_required('Pluralsight account is required') return
login_page = self._download_webpage( login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page') self._LOGIN_URL, None, 'Downloading login page')
@@ -60,7 +73,7 @@ class PluralsightIE(InfoExtractor):
if not post_url.startswith('http'): if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = compat_urllib_request.Request( request = sanitized_Request(
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8')) post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -73,30 +86,47 @@ class PluralsightIE(InfoExtractor):
if error: if error:
raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to login: %s' % error, expected=True)
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
raise ExtractorError('Unable to log in')
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
author = mobj.group('author')
name = mobj.group('name') author = qs.get('author', [None])[0]
clip_id = mobj.group('clip') name = qs.get('name', [None])[0]
course = mobj.group('course') clip_id = qs.get('clip', [None])[0]
course = qs.get('course', [None])[0]
if any(not f for f in (author, name, clip_id, course,)):
raise ExtractorError('Invalid URL', expected=True)
display_id = '%s-%s' % (name, clip_id) display_id = '%s-%s' % (name, clip_id)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
collection = self._parse_json( modules = self._search_regex(
self._search_regex( r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)', webpage, 'modules', default=None)
webpage, 'modules'),
display_id) if modules:
collection = self._parse_json(modules, display_id)
else:
# Webpage may be served in different layout (see
# https://github.com/rg3/youtube-dl/issues/7607)
collection = self._parse_json(
self._search_regex(
r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'),
display_id)['course']['modules']
module, clip = None, None module, clip = None, None
for module_ in collection: for module_ in collection:
if module_.get('moduleName') == name: if name in (module_.get('moduleName'), module_.get('name')):
module = module_ module = module_
for clip_ in module_.get('clips', []): for clip_ in module_.get('clips', []):
clip_index = clip_.get('clipIndex') clip_index = clip_.get('clipIndex')
if clip_index is None:
clip_index = clip_.get('index')
if clip_index is None: if clip_index is None:
continue continue
if compat_str(clip_index) == clip_id: if compat_str(clip_index) == clip_id:
@@ -112,13 +142,33 @@ class PluralsightIE(InfoExtractor):
'high': {'width': 1024, 'height': 768}, 'high': {'width': 1024, 'height': 768},
} }
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
ALLOWED_QUALITIES = ( ALLOWED_QUALITIES = (
('webm', ('high',)), AllowedQuality('webm', ('high',)),
('mp4', ('low', 'medium', 'high',)), AllowedQuality('mp4', ('low', 'medium', 'high',)),
) )
# In order to minimize the number of calls to ViewClip API and reduce
# the probability of being throttled or banned by Pluralsight we will request
# only single format until formats listing was explicitly requested.
if self._downloader.params.get('listformats', False):
allowed_qualities = ALLOWED_QUALITIES
else:
def guess_allowed_qualities():
req_format = self._downloader.params.get('format') or 'best'
req_format_split = req_format.split('-')
if len(req_format_split) > 1:
req_ext, req_quality = req_format_split
for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), )
req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
return (AllowedQuality(req_ext, ('high', )), )
allowed_qualities = guess_allowed_qualities()
formats = [] formats = []
for ext, qualities in ALLOWED_QUALITIES: for ext, qualities in allowed_qualities:
for quality in qualities: for quality in qualities:
f = QUALITIES[quality].copy() f = QUALITIES[quality].copy()
clip_post = { clip_post = {
@@ -131,13 +181,24 @@ class PluralsightIE(InfoExtractor):
'mt': ext, 'mt': ext,
'q': '%dx%d' % (f['width'], f['height']), 'q': '%dx%d' % (f['width'], f['height']),
} }
request = compat_urllib_request.Request( request = sanitized_Request(
'http://www.pluralsight.com/training/Player/ViewClip', '%s/training/Player/ViewClip' % self._API_BASE,
json.dumps(clip_post).encode('utf-8')) json.dumps(clip_post).encode('utf-8'))
request.add_header('Content-Type', 'application/json;charset=utf-8') request.add_header('Content-Type', 'application/json;charset=utf-8')
format_id = '%s-%s' % (ext, quality) format_id = '%s-%s' % (ext, quality)
clip_url = self._download_webpage( clip_url = self._download_webpage(
request, display_id, 'Downloading %s URL' % format_id, fatal=False) request, display_id, 'Downloading %s URL' % format_id, fatal=False)
# Pluralsight tracks multiple sequential calls to ViewClip API and start
# to return 429 HTTP errors after some time (see
# https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead
# to account ban (see https://github.com/rg3/youtube-dl/issues/6842).
# To somewhat reduce the probability of these consequences
# we will sleep random amount of time before each call to ViewClip.
self._sleep(
random.randint(2, 5), display_id,
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
if not clip_url: if not clip_url:
continue continue
f.update({ f.update({
@@ -163,10 +224,10 @@ class PluralsightIE(InfoExtractor):
} }
class PluralsightCourseIE(InfoExtractor): class PluralsightCourseIE(PluralsightBaseIE):
IE_NAME = 'pluralsight:course' IE_NAME = 'pluralsight:course'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)'
_TEST = { _TESTS = [{
# Free course from Pluralsight Starter Subscription for Microsoft TechNet # Free course from Pluralsight Starter Subscription for Microsoft TechNet
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
@@ -176,7 +237,14 @@ class PluralsightCourseIE(InfoExtractor):
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
}, },
'playlist_count': 31, 'playlist_count': 31,
} }, {
# available without pluralsight account
'url': 'https://www.pluralsight.com/courses/angularjs-get-started',
'only_matching': True,
}, {
'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
course_id = self._match_id(url) course_id = self._match_id(url)
@@ -184,14 +252,14 @@ class PluralsightCourseIE(InfoExtractor):
# TODO: PSM cookie # TODO: PSM cookie
course = self._download_json( course = self._download_json(
'http://www.pluralsight.com/data/course/%s' % course_id, '%s/data/course/%s' % (self._API_BASE, course_id),
course_id, 'Downloading course JSON') course_id, 'Downloading course JSON')
title = course['title'] title = course['title']
description = course.get('description') or course.get('shortDescription') description = course.get('description') or course.get('shortDescription')
course_data = self._download_json( course_data = self._download_json(
'http://www.pluralsight.com/data/course/content/%s' % course_id, '%s/data/course/content/%s' % (self._API_BASE, course_id),
course_id, 'Downloading course data JSON') course_id, 'Downloading course data JSON')
entries = [] entries = []
@@ -201,7 +269,7 @@ class PluralsightCourseIE(InfoExtractor):
if not player_parameters: if not player_parameters:
continue continue
entries.append(self.url_result( entries.append(self.url_result(
'http://www.pluralsight.com/training/player?%s' % player_parameters, '%s/training/player?%s' % (self._API_BASE, player_parameters),
'Pluralsight')) 'Pluralsight'))
return self.playlist_result(entries, course_id, title, description) return self.playlist_result(entries, course_id, title, description)

View File

@@ -36,7 +36,8 @@ class PornHdIE(InfoExtractor):
webpage = self._download_webpage(url, display_id or video_id) webpage = self._download_webpage(url, display_id or video_id)
title = self._html_search_regex( title = self._html_search_regex(
r'<title>(.+) porn HD.+?</title>', webpage, 'title') [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(

View File

@@ -8,10 +8,10 @@ from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus, compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
str_to_int, str_to_int,
) )
from ..aes import ( from ..aes import (
@@ -53,7 +53,7 @@ class PornHubIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = compat_urllib_request.Request( req = sanitized_Request(
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id) 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -46,7 +44,7 @@ class PornotubeIE(InfoExtractor):
'authenticationSpaceKey': originAuthenticationSpaceKey, 'authenticationSpaceKey': originAuthenticationSpaceKey,
'credentials': 'Clip Application', 'credentials': 'Clip Application',
} }
token_req = compat_urllib_request.Request( token_req = sanitized_Request(
'https://api.aebn.net/auth/v1/token/primal', 'https://api.aebn.net/auth/v1/token/primal',
data=json.dumps(token_req_data).encode('utf-8')) data=json.dumps(token_req_data).encode('utf-8'))
token_req.add_header('Content-Type', 'application/json') token_req.add_header('Content-Type', 'application/json')
@@ -56,7 +54,7 @@ class PornotubeIE(InfoExtractor):
token = token_answer['tokenKey'] token = token_answer['tokenKey']
# Get video URL # Get video URL
delivery_req = compat_urllib_request.Request( delivery_req = sanitized_Request(
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id) 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
delivery_req.add_header('Authorization', token) delivery_req.add_header('Authorization', token)
delivery_info = self._download_json( delivery_info = self._download_json(
@@ -64,7 +62,7 @@ class PornotubeIE(InfoExtractor):
video_url = delivery_info['mediaUrl'] video_url = delivery_info['mediaUrl']
# Get additional info (title etc.) # Get additional info (title etc.)
info_req = compat_urllib_request.Request( info_req = sanitized_Request(
'https://api.aebn.net/content/v1/clips/%s?expand=' 'https://api.aebn.net/content/v1/clips/%s?expand='
'title,description,primaryImageNumber,startSecond,endSecond,' 'title,description,primaryImageNumber,startSecond,endSecond,'
'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,' 'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'

View File

@@ -1,11 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse, from ..utils import (
compat_urllib_request, ExtractorError,
sanitized_Request,
) )
from ..utils import ExtractorError
class PrimeShareTVIE(InfoExtractor): class PrimeShareTVIE(InfoExtractor):
@@ -41,7 +41,7 @@ class PrimeShareTVIE(InfoExtractor):
webpage, 'wait time', default=7)) + 1 webpage, 'wait time', default=7)) + 1
self._sleep(wait_time, video_id) self._sleep(wait_time, video_id)
req = compat_urllib_request.Request( req = sanitized_Request(
url, compat_urllib_parse.urlencode(fields), headers) url, compat_urllib_parse.urlencode(fields), headers)
video_page = self._download_webpage( video_page = self._download_webpage(
req, video_id, 'Downloading video page') req, video_id, 'Downloading video page')

View File

@@ -4,13 +4,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -37,7 +35,7 @@ class PromptFileIE(InfoExtractor):
fields = self._hidden_inputs(webpage) fields = self._hidden_inputs(webpage)
post = compat_urllib_parse.urlencode(fields) post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post) req = sanitized_Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage( webpage = self._download_webpage(
req, video_id, 'Downloading video page') req, video_id, 'Downloading video page')

View File

@@ -7,11 +7,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
sanitized_Request,
strip_jsonp, strip_jsonp,
unescapeHTML, unescapeHTML,
clean_html, clean_html,
) )
from ..compat import compat_urllib_request
class QQMusicIE(InfoExtractor): class QQMusicIE(InfoExtractor):
@@ -201,7 +201,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
singer_desc = None singer_desc = None
if singer_id: if singer_id:
req = compat_urllib_request.Request( req = sanitized_Request(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id) 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
req.add_header( req.add_header(
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html') 'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')

View File

@@ -6,11 +6,11 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_request, compat_urlparse
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
remove_end, remove_end,
sanitized_Request,
std_headers, std_headers,
struct_unpack, struct_unpack,
) )
@@ -102,20 +102,14 @@ class RTVEALaCartaIE(InfoExtractor):
if info['state'] == 'DESPU': if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True) raise ExtractorError('The video is no longer available', expected=True)
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png_request = compat_urllib_request.Request(png_url) png_request = sanitized_Request(png_url)
png_request.add_header('Referer', url) png_request.add_header('Referer', url)
png = self._download_webpage(png_request, video_id, 'Downloading url information') png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png) video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'): if not video_url.endswith('.f4m'):
auth_url = video_url.replace( video_url = video_url.replace(
'resources/', 'auth/resources/' 'resources/', 'auth/resources/'
).replace('.net.rtve', '.multimedia.cdn.rtve') ).replace('.net.rtve', '.multimedia.cdn.rtve')
video_path = self._download_webpage(
auth_url, video_id, 'Getting video url')
# Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
# the right Content-Length header and the mp4 format
video_url = compat_urlparse.urljoin(
'http://mvod1.akcdn.rtve.es/', video_path)
subtitles = None subtitles = None
if info.get('sbtFile') is not None: if info.get('sbtFile') is not None:

View File

@@ -9,7 +9,7 @@ from ..compat import (
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
ExtractorError, determine_ext,
unified_strdate, unified_strdate,
) )
@@ -51,10 +51,25 @@ class RutubeIE(InfoExtractor):
'http://rutube.ru/api/play/options/%s/?format=json' % video_id, 'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
video_id, 'Downloading options JSON') video_id, 'Downloading options JSON')
m3u8_url = options['video_balancer'].get('m3u8') formats = []
if m3u8_url is None: for format_id, format_url in options['video_balancer'].items():
raise ExtractorError('Couldn\'t find m3u8 manifest url') ext = determine_ext(format_url)
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext == 'f4m':
f4m_formats = self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
return { return {
'id': video['id'], 'id': video['id'],
@@ -74,9 +89,9 @@ class RutubeIE(InfoExtractor):
class RutubeEmbedIE(InfoExtractor): class RutubeEmbedIE(InfoExtractor):
IE_NAME = 'rutube:embed' IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos' IE_DESC = 'Rutube embedded videos'
_VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)' _VALID_URL = 'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': { 'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661', 'id': 'a10e53b86e8f349080f718582ce4c661',
@@ -90,7 +105,10 @@ class RutubeEmbedIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'Requires ffmpeg', 'skip_download': 'Requires ffmpeg',
}, },
} }, {
'url': 'http://rutube.ru/play/embed/8083783',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
embed_id = self._match_id(url) embed_id = self._match_id(url)

View File

@@ -57,16 +57,21 @@ class RuutuIE(InfoExtractor):
extract_formats(child) extract_formats(child)
elif child.tag.endswith('File'): elif child.tag.endswith('File'):
video_url = child.text video_url = child.text
if not video_url or video_url in processed_urls or 'NOT_USED' in video_url: if (not video_url or video_url in processed_urls or
any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
return return
processed_urls.append(video_url) processed_urls.append(video_url)
ext = determine_ext(video_url) ext = determine_ext(video_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id='hls')) video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext == 'f4m': elif ext == 'f4m':
formats.extend(self._extract_f4m_formats( f4m_formats = self._extract_f4m_formats(
video_url, video_id, f4m_id='hds')) video_url, video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
else: else:
proto = compat_urllib_parse_urlparse(video_url).scheme proto = compat_urllib_parse_urlparse(video_url).scheme
if not child.tag.startswith('HTTP') and proto != 'rtmp': if not child.tag.startswith('HTTP') and proto != 'rtmp':

View File

@@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveIE from .brightcove import BrightcoveLegacyIE
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
smuggle_url, smuggle_url,
std_headers, std_headers,
) )
@@ -58,7 +56,7 @@ class SafariBaseIE(InfoExtractor):
'next': '', 'next': '',
} }
request = compat_urllib_request.Request( request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, 'Logging in as %s' % username) request, None, 'Logging in as %s' % username)
@@ -112,11 +110,11 @@ class SafariIE(SafariBaseIE):
'%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
part) part)
bc_url = BrightcoveIE._extract_brightcove_url(webpage) bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if not bc_url: if not bc_url:
raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove') return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')
class SafariCourseIE(SafariBaseIE): class SafariCourseIE(SafariBaseIE):

View File

@@ -6,14 +6,12 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urllib_request,
compat_urlparse,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
sanitized_Request,
unified_strdate, unified_strdate,
) )
@@ -37,7 +35,7 @@ class SandiaIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = compat_urllib_request.Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4') req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -46,7 +44,7 @@ class SharedIE(InfoExtractor):
'Video %s does not exist' % video_id, expected=True) 'Video %s does not exist' % video_id, expected=True)
download_form = self._hidden_inputs(webpage) download_form = self._hidden_inputs(webpage)
request = compat_urllib_request.Request( request = sanitized_Request(
url, compat_urllib_parse.urlencode(download_form)) url, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
sanitized_Request,
) )
@@ -50,7 +48,7 @@ class ShareSixIE(InfoExtractor):
'method_free': 'Free' 'method_free': 'Free'
} }
post = compat_urllib_parse.urlencode(fields) post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post) req = sanitized_Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id, webpage = self._download_webpage(req, video_id,

View File

@@ -4,10 +4,8 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_request, from ..utils import sanitized_Request
compat_urllib_parse,
)
class SinaIE(InfoExtractor): class SinaIE(InfoExtractor):
@@ -61,7 +59,7 @@ class SinaIE(InfoExtractor):
if mobj.group('token') is not None: if mobj.group('token') is not None:
# The video id is in the redirected url # The video id is in the redirected url
self.to_screen('Getting video id') self.to_screen('Getting video id')
request = compat_urllib_request.Request(url) request = sanitized_Request(url)
request.get_method = lambda: 'HEAD' request.get_method = lambda: 'HEAD'
(_, urlh) = self._download_webpage_handle(request, 'NA', False) (_, urlh) = self._download_webpage_handle(request, 'NA', False)
return self._real_extract(urlh.geturl()) return self._real_extract(urlh.geturl())

View File

@@ -7,13 +7,11 @@ import hashlib
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
unified_strdate, unified_strdate,
) )
@@ -176,7 +174,7 @@ class SmotriIE(InfoExtractor):
if video_password: if video_password:
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
request = compat_urllib_request.Request( request = sanitized_Request(
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form)) 'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -339,7 +337,7 @@ class SmotriBroadcastIE(InfoExtractor):
'password': password, 'password': password,
} }
request = compat_urllib_request.Request( request = sanitized_Request(
broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
broadcast_page = self._download_webpage( broadcast_page = self._download_webpage(

View File

@@ -6,11 +6,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
) )
@@ -96,7 +96,7 @@ class SohuIE(InfoExtractor):
else: else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
req = compat_urllib_request.Request(base_data_url + vid_id) req = sanitized_Request(base_data_url + vid_id)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy: if cn_verification_proxy:

View File

@@ -4,13 +4,17 @@ from __future__ import unicode_literals
import re import re
import itertools import itertools
from .common import InfoExtractor from .common import (
InfoExtractor,
SearchInfoExtractor
)
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_urlparse, compat_urlparse,
compat_urllib_parse, compat_urllib_parse,
) )
from ..utils import ( from ..utils import (
encode_dict,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unified_strdate, unified_strdate,
@@ -469,3 +473,60 @@ class SoundcloudPlaylistIE(SoundcloudIE):
'description': data.get('description'), 'description': data.get('description'),
'entries': entries, 'entries': entries,
} }
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
IE_NAME = 'soundcloud:search'
IE_DESC = 'Soundcloud search'
_MAX_RESULTS = float('inf')
_TESTS = [{
'url': 'scsearch15:post-avant jazzcore',
'info_dict': {
'title': 'post-avant jazzcore',
},
'playlist_count': 15,
}]
_SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query):
limit = min(
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
self._MAX_RESULTS_PER_PAGE)
query['limit'] = limit
query['client_id'] = self._CLIENT_ID
query['linked_partitioning'] = '1'
query['offset'] = 0
data = compat_urllib_parse.urlencode(encode_dict(query))
next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
collected_results = 0
for i in itertools.count(1):
response = self._download_json(
next_url, collection_id, 'Downloading page {0}'.format(i),
'Unable to download API page')
collection = response.get('collection', [])
if not collection:
break
collection = list(filter(bool, collection))
collected_results += len(collection)
for item in collection:
yield self.url_result(item['uri'], SoundcloudIE.ie_key())
if not collection or collected_results >= limit:
break
next_url = response.get('next_href')
if not next_url:
break
def _get_n_results(self, query, n):
tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
return self.playlist_result(tracks, playlist_title=query)

View File

@@ -3,14 +3,14 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveIE from .brightcove import BrightcoveLegacyIE
from ..utils import RegexNotFoundError, ExtractorError from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor): class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html' _VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = { _TEST = {
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html', 'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
'info_dict': { 'info_dict': {
'id': '2780937028001', 'id': '2780937028001',
@@ -31,8 +31,8 @@ class SpaceIE(InfoExtractor):
brightcove_url = self._og_search_video_url(webpage) brightcove_url = self._og_search_video_url(webpage)
except RegexNotFoundError: except RegexNotFoundError:
# Other videos works fine with the info from the object # Other videos works fine with the info from the object
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage) brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if brightcove_url is None: if brightcove_url is None:
raise ExtractorError( raise ExtractorError(
'The webpage does not contain a video', expected=True) 'The webpage does not contain a video', expected=True)
return self.url_result(brightcove_url, BrightcoveIE.ie_key()) return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key())

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request,
) )
from ..utils import ( from ..utils import (
sanitized_Request,
str_to_int, str_to_int,
unified_strdate, unified_strdate,
) )
@@ -51,7 +51,7 @@ class SpankwireIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
req = compat_urllib_request.Request('http://www.' + mobj.group('url')) req = sanitized_Request('http://www.' + mobj.group('url'))
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -54,7 +52,7 @@ class SportDeutschlandIE(InfoExtractor):
api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % ( api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
sport_id, video_id) sport_id, video_id)
req = compat_urllib_request.Request(api_url, headers={ req = sanitized_Request(api_url, headers={
'Accept': 'application/vnd.vidibus.v2.html+json', 'Accept': 'application/vnd.vidibus.v2.html+json',
'Referer': url, 'Referer': url,
}) })

View File

@@ -4,10 +4,8 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse
compat_urllib_parse, from ..utils import sanitized_Request
compat_urllib_request,
)
class StreamcloudIE(InfoExtractor): class StreamcloudIE(InfoExtractor):
@@ -43,7 +41,7 @@ class StreamcloudIE(InfoExtractor):
headers = { headers = {
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
} }
req = compat_urllib_request.Request(url, post, headers) req = sanitized_Request(url, post, headers)
webpage = self._download_webpage( webpage = self._download_webpage(
req, video_id, note='Downloading video page ...') req, video_id, note='Downloading video page ...')

View File

@@ -5,11 +5,9 @@ import hashlib
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request,
) )
@@ -54,7 +52,7 @@ class StreamCZIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
api_path = '/episode/%s' % video_id api_path = '/episode/%s' % video_id
req = compat_urllib_request.Request(self._API_URL + api_path) req = sanitized_Request(self._API_URL + api_path)
req.add_header('Api-Password', _get_api_key(api_path)) req.add_header('Api-Password', _get_api_key(api_path))
data = self._download_json(req, video_id) data = self._download_json(req, video_id)

View File

@@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
) )
@@ -53,7 +51,7 @@ class TapelyIE(InfoExtractor):
display_id = mobj.group('id') display_id = mobj.group('id')
playlist_url = self._API_URL.format(display_id) playlist_url = self._API_URL.format(display_id)
request = compat_urllib_request.Request(playlist_url) request = sanitized_Request(playlist_url)
request.add_header('X-Requested-With', 'XMLHttpRequest') request.add_header('X-Requested-With', 'XMLHttpRequest')
request.add_header('Accept', 'application/json') request.add_header('Accept', 'application/json')
request.add_header('Referer', url) request.add_header('Referer', url)

View File

@@ -139,6 +139,11 @@ class ThePlatformIE(ThePlatformBaseIE):
'upload_date': '20150701', 'upload_date': '20150701',
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"], 'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
}, },
}, {
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
# geo-restricted (US), HLS encrypted with AES-128
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@@ -182,8 +187,12 @@ class ThePlatformIE(ThePlatformBaseIE):
# Seems there's no pattern for the interested script filename, so # Seems there's no pattern for the interested script filename, so
# I try one by one # I try one by one
for script in reversed(scripts): for script in reversed(scripts):
feed_script = self._download_webpage(script, video_id, 'Downloading feed script') feed_script = self._download_webpage(
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None) self._proto_relative_url(script, 'http:'),
video_id, 'Downloading feed script')
feed_id = self._search_regex(
r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
'default feed id', default=None)
if feed_id is not None: if feed_id is not None:
break break
if feed_id is None: if feed_id is None:
@@ -193,6 +202,15 @@ class ThePlatformIE(ThePlatformBaseIE):
if smuggled_data.get('force_smil_url', False): if smuggled_data.get('force_smil_url', False):
smil_url = url smil_url = url
# Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
elif '/guid/' in url:
webpage = self._download_webpage(url, video_id)
smil_url = self._search_regex(
r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
webpage, 'smil url', group='url')
path = self._search_regex(
r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
elif mobj.group('config'): elif mobj.group('config'):
config_url = url + '&form=json' config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('swf/', 'config/')

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveIE from .brightcove import BrightcoveLegacyIE
from .discovery import DiscoveryIE from .discovery import DiscoveryIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
@@ -66,6 +66,6 @@ class TlcDeIE(InfoExtractor):
return { return {
'_type': 'url', '_type': 'url',
'url': BrightcoveIE._extract_brightcove_url(iframe), 'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
'ie': BrightcoveIE.ie_key(), 'ie': BrightcoveLegacyIE.ie_key(),
} }

Some files were not shown because too many files have changed in this diff Show More