Compare commits

...

164 Commits

Author SHA1 Message Date
Philipp Hagemeister
ba7a92b0ce release 2015.11.24 2015-11-24 07:46:38 +01:00
Philipp Hagemeister
4c7d816dd7 [jsinterp] Adapt to updated YouTube code generation (Fixes #7623, fixes #7624, fixes #7625, fixes #7626) 2015-11-24 07:45:38 +01:00
Philipp Hagemeister
032f2f260f README: Document which other programs may be helpful (Fixes #7621) 2015-11-24 03:38:46 +01:00
Philipp Hagemeister
20e98bf6c0 release 2015.11.23 2015-11-23 18:07:58 +01:00
Sergey M?
5c2266df4b Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8
2015-11-23 21:56:23 +06:00
Sergey M․
67dda51722 Rename compat_urllib_request_Request to sanitized_Request and move to utils 2015-11-23 21:55:15 +06:00
Sergey M․
e4c4bcf36f [vimeo] Use compat_urllib_request_Request 2015-11-23 21:55:14 +06:00
Sergey M․
82d8a8b6e2 [YoutubeDL] Wrap plain-text URL requests in compat_urllib_request_Request 2015-11-23 21:55:13 +06:00
Sergey M․
13a10d5aa3 [compat] Add compat_urllib_request_Request
This is actually not a compatibility routine but rather a workaround for URLs without protocol specified.
The protocol-less URL is treated as HTTP one since it's most probable scenario and it will most likely to
redirect to HTTPS if HTTPS was actually expected. This routine could also be useful for any Request
preprocessing that may be added in future.
2015-11-23 21:55:12 +06:00
Sergey M․
9022726446 [youtube] Fix test 2015-11-23 21:37:21 +06:00
Sergey M․
94bfcd23b7 [youtube] Fix test 2015-11-23 21:35:23 +06:00
Sergey M․
526b3b0716 [youtube] Clarify ytplayer.config extraction rationale 2015-11-23 21:14:03 +06:00
Sergey M․
61f92af1cf [youtube] Add test with '};' in tags 2015-11-23 21:02:37 +06:00
Sergey M․
a72778d364 [youtube] Improve ytplayer.config extraction 2015-11-23 21:00:06 +06:00
Sergey M
5ae17037a3 Merge pull request #7599 from lalinsky/fix-youtube
[youtube] More explicit player config JSON extraction (fixes #7468)
2015-11-23 20:52:23 +06:00
Sergey M․
02f0da20b0 [pluralsight] Add support for alternative webpage layout (Closes #7607) 2015-11-23 03:08:38 +06:00
Lukáš Lalinský
b41631c4e6 [youtube] Send the list of patterns directly to _search_regex 2015-11-22 13:53:26 +01:00
Lukáš Lalinský
0e49d9a6b0 [youtube] Fall back to the original regex for ytplayer.config 2015-11-22 13:49:33 +01:00
Sergey M․
4a7d108ab3 [rutube] Remove unnecessary print 2015-11-22 18:24:17 +06:00
Lukáš Lalinský
3cfd000849 [youtube] More explicit player config JSON extraction (fixes #7468) 2015-11-22 13:14:35 +01:00
Sergey M․
1b38185361 [pornhd] Fix title extraction (Closes #7596) 2015-11-22 18:08:30 +06:00
Sergey M․
9cb9a5df77 [utils] Check ext with trailing slash against the list of known extensions 2015-11-22 17:27:13 +06:00
Sergey M․
5035536e3f [test_utils] Add tests for determine_ext 2015-11-22 06:33:52 +06:00
Sergey M․
3e12bc583a [utils] Improve determine_ext (Closes #7593) 2015-11-22 06:29:39 +06:00
Sergey M․
e568c2233e [youtube] Add test for multi page list of playlists 2015-11-22 05:03:23 +06:00
Sergey M․
061a75edd6 [youtube] Extract base for entry list extractors and support multi page lists of playlists 2015-11-22 05:01:01 +06:00
Philipp Hagemeister
82c4d7b0ce release 2015.11.21 2015-11-21 23:36:27 +01:00
Sergey M․
136dadde95 [youtube:show] Rework in terms of playlists base extractor 2015-11-22 04:18:20 +06:00
Sergey M․
0c14841585 [youtube:user:playlists] Add extractor (Closes #3817) 2015-11-22 04:17:07 +06:00
Sergey M․
0eebf34d9d [pluralsight] Rephrase 2015-11-22 00:58:25 +06:00
Sergey M․
cf186b77a7 [pluralsight] Clarify allowed qualities guessing rationale 2015-11-22 00:56:40 +06:00
Sergey M․
a3372437bf [soundcloud] Remove unused variable 2015-11-22 00:49:58 +06:00
Sergey M․
4c57b4853d [pluralsight] Until listing formats request only single format 2015-11-22 00:42:58 +06:00
Sergey M․
38eb2968ab [pluralsight] Clarify and randomize ViewClip sleep interval 2015-11-22 00:07:09 +06:00
Andrzej Lichnerowicz
bea56c9569 [pluralsight] prevent error 429 when sensing video formats 2015-11-21 23:49:58 +06:00
Sergey M․
7e508ff2cf [pluralsight] Improve login detection 2015-11-21 21:49:37 +06:00
Sergey M․
563772eda4 [pluralsight] Extract base class 2015-11-21 21:37:29 +06:00
Sergey M․
0533915aad [pluralsight] Update some more URLs 2015-11-21 21:35:08 +06:00
Sergey M․
c3a227d1c4 [pluralsight] Update _LOGIN_URL 2015-11-21 21:25:48 +06:00
Sergey M․
f6c903e708 [soundcloud:search] Simplify (Closes #7213) 2015-11-21 21:21:21 +06:00
Sergey M․
7dc011c063 [soundcloud:search] Remove no track results message 2015-11-21 21:00:42 +06:00
Sergey M․
4e3b303016 [soundcloud:search] Fix non-ASCII searches 2015-11-21 20:55:48 +06:00
Sergey M․
7e1f5447e7 [utils] Improve encode_dict 2015-11-21 20:46:33 +06:00
Sergey M․
7e3472758b [soundcloud:search] PEP 8 2015-11-21 20:04:35 +06:00
reiv
328a22e175 [soundcloud] Remove limit on search results 2015-11-21 19:41:36 +06:00
reiv
417b453699 [soundcloud] Use correct error message conventions 2015-11-21 19:41:31 +06:00
reiv
6ea7190a3e Rewrite as list comprehension. 2015-11-21 19:41:26 +06:00
reiv
b54b08c91b Simplify with itertools.islice(). 2015-11-21 19:41:19 +06:00
reiv
c30943b1c0 Fix some compatibility issues, cleanup. 2015-11-21 19:41:15 +06:00
reiv
2abf7cab80 [soundcloud] Add Soundcloud search extractor 2015-11-21 19:41:08 +06:00
Sergey M․
4137196899 [rutube] Extract all formats 2015-11-21 18:02:52 +06:00
Sergey M․
019839faaa [extractor/common] Use baseURL from f4m manifest for recursive manifest extraction 2015-11-21 18:01:39 +06:00
Sergey M․
f52183a878 [rutube:embed] Extend _VALID_URL (Closes #7588) 2015-11-21 17:39:24 +06:00
Yen Chi Hsuan
750b9ff032 [generic] Extract M3U8 formats (closes #7582) 2015-11-21 16:43:01 +08:00
Yen Chi Hsuan
28602e747c [generic] Refactor 2015-11-21 16:08:54 +08:00
Yen Chi Hsuan
6cc37c69e2 [generic] Unescape URLs from JWPlayer (#7582) 2015-11-21 14:12:34 +08:00
Sergey M․
a5cd0eb8a4 [pluralsight:course] Improve _VALID_URL 2015-11-21 08:32:48 +06:00
Sergey M․
c23e266427 [pluralsight] Do not require pluralsight account
Looks like some courses are available without pluralsight account
2015-11-21 08:25:52 +06:00
Sergey M․
651acffbe5 [pluralsight] Update ViewClip URL 2015-11-21 08:21:33 +06:00
Sergey M․
71bd93b89c [pluralsight] Do not rely on argument order in query (Closes #7583) 2015-11-21 08:08:34 +06:00
Sergey M․
6da620de58 [kaltura] Add test for referrer protected video (#7409) 2015-11-21 01:40:28 +06:00
Sergey M․
bdceea7afd [kaltura] Clean description 2015-11-21 01:39:29 +06:00
Sergey M․
d80a39cec8 [kaltura] Improve 2015-11-21 01:38:08 +06:00
Sergey M․
5b5fae5f20 [generic] Use referrer from source kaltura embed URLs (#7409) 2015-11-21 01:35:58 +06:00
Sergey M․
01b06aedcf [kaltura] Add support for referrer protected videos (#7409) 2015-11-21 01:34:02 +06:00
Sergey M
c711383811 Merge pull request #7579 from ashutosh-mishra/typo_fix
Typo fix, found while going through the code.
2015-11-20 23:24:54 +06:00
ashutosh-mishra
17cc153435 Typo fix, found while going through the code. 2015-11-20 22:51:46 +05:30
Sergey M․
67446fd49b [instagram] Improve _VALID_URL (Closes #7568) 2015-11-20 04:07:39 +06:00
Sergey M․
325bb615a7 [theplatform] Style 2015-11-19 22:58:43 +06:00
Sergey M․
ee5cd8418e [theplatform] Handle protocolless feed URLs (Closes #7532) 2015-11-19 22:58:29 +06:00
Sergey M․
342609a1b4 [bloomberg] Reax _VALID_URL (Closes #7546) 2015-11-19 22:55:06 +06:00
Sergey M
f270cf1a26 Merge pull request #7519 from barlik/master
Clarify that automatic subtitles are generated.
2015-11-19 22:44:08 +06:00
hedii
371c3b796c [YoutubeDL] Add playlist finished downloading message (Closes #7517)
Conflicts:
	youtube_dl/YoutubeDL.py
2015-11-19 22:39:02 +06:00
Sergey M․
6b7ceee1b9 [vimeo] Add test for #7552 2015-11-19 22:31:16 +06:00
Sergey M․
fdb20a27a3 [vimeo:group] Improve _VALID_URL (Closes #7552) 2015-11-19 22:30:58 +06:00
Sergey M․
2c94198eb6 [vimeo] Improve playlists extraction 2015-11-19 21:29:32 +06:00
Philipp Hagemeister
e8110b8125 release 2015.11.19 2015-11-19 15:35:13 +01:00
Yen Chi Hsuan
c39fd7b1ca [UDNEmbed] Fix generic UDN pages
Closes #7547
2015-11-19 22:32:56 +08:00
Sergey M․
a9c09a7c62 [pbs] Update API URL (Closes #7565) 2015-11-19 20:25:28 +06:00
Philipp Hagemeister
82beaabb41 release 2015.11.18 2015-11-18 19:23:04 +01:00
Jaime Marquínez Ferrándiz
63b4295d20 [youtube:playlist] fix title extraction (fixes #7544 and #7545) 2015-11-18 18:28:05 +01:00
Sergey M․
312a3f389b [pbs] Extend _VALID_URL 2015-11-18 00:46:41 +06:00
Jaime Marquínez Ferrándiz
609af1ae1c [dplay] Add 'encoding: utf-8' line 2015-11-17 17:58:16 +01:00
Jaime Marquínez Ferrándiz
4cd759f73d [dplay] Add extractor (closes #7515)
Since I haven't figured out how to download the hds stream, we use the hls one instead.
2015-11-17 17:52:29 +01:00
Jaime Marquínez Ferrándiz
e156e70281 [rtve] Remove unused import 2015-11-17 16:23:29 +01:00
Sergey M․
9b464929fe [rtve.es:alacarta] Fix extraction 2015-11-17 21:11:42 +06:00
Sergey M
0c176d7bde Merge pull request #7514 from ping/patch-7301
[neteasemusic] Fixes #7301
2015-11-16 14:25:29 +00:00
Sergey M․
7a3f0c00ad [utils] Style 2015-11-16 20:24:09 +06:00
Sergey M․
7aefc49c40 [utils] Skip invalid/non HTML entities (Closes #7518) 2015-11-16 20:20:16 +06:00
Rastislav Barlik
741dd8ea65 Clarify that automatic subtitles are generated.
It wasn't clear what automatic word mean.
2015-11-16 14:15:25 +00:00
ping
76adc82068 [neteasemusic] Fixes #7301 2015-11-16 11:39:18 +08:00
Philipp Hagemeister
bd1512d196 release 2015.11.15 2015-11-15 22:16:08 +01:00
Sergey M․
9a4acbfaf5 [theplatform] Add test for #7385 2015-11-16 00:28:04 +06:00
Sergey M․
ad1f4e7902 [theplatform] Handle explicitly specified SMIL (#7385) 2015-11-15 23:43:23 +06:00
Sergey M
b328295910 Merge pull request #7436 from davidbz/add_proxy_to_update_procedure
Add proxy support for update_self
2015-11-15 11:13:22 +00:00
David Ben Zakai
828b2a5cd9 Removing an unnecessary import 2015-11-15 09:40:32 +02:00
Sergey M․
2ff7cbeaaa [nowtv:list] Add extrator (Closes #7147) 2015-11-15 08:30:13 +06:00
Sergey M․
b2f7738830 [dumpert] Use original protocol 2015-11-15 02:25:00 +06:00
Sergey M․
dc0279532a [dumpert] Disable SSL (Closes #7504) 2015-11-15 02:21:24 +06:00
Sergey M․
0c59d02bdc [periscope] Relax _VALID_URL (Closes #7503) 2015-11-15 00:20:17 +06:00
Jaime Marquínez Ferrándiz
0f72beb515 [periscope] Remove unused imports 2015-11-14 18:31:33 +01:00
Sergey M․
d781e29316 [bbc] Allow selectionunavailable errors (Closes #7502) 2015-11-14 23:08:13 +06:00
Sergey M․
3b3e8ed332 [quickscope] Remove extractor (2) 2015-11-14 22:34:30 +06:00
Sergey M․
dcdfeb33d2 [quickscope] Remove extractor 2015-11-14 22:32:54 +06:00
Sergey M․
0d85c3a732 [lynda] Style 2015-11-14 16:44:24 +06:00
Sergey M․
903d136942 [lynda] Logout only when login info present (Closes #7500) 2015-11-14 16:43:58 +06:00
Yen Chi Hsuan
9d584da7d0 [xfileshare] Correct _VALID_URL 2015-11-14 17:27:32 +08:00
Yen Chi Hsuan
31752f76f7 [twitter:card] Add add_ie for the external test 2015-11-14 17:03:26 +08:00
Yen Chi Hsuan
5f1b2aea80 [twitter:card] Support vine.co embeds (closes #7496) 2015-11-14 17:02:07 +08:00
Sergey M․
4479600d57 [instagram] Add test for #7497 2015-11-14 07:21:20 +06:00
Sergey M․
a90189c3ad [instagram] Relax _VALID_URL (Closes #7497) 2015-11-14 07:20:33 +06:00
Sergey M․
d8a1caf04f [brightcove:new] Style 2015-11-14 06:22:12 +06:00
Sergey M․
cb33d389ed [brightcove:new] Add test with rtmp streams 2015-11-14 06:20:09 +06:00
Sergey M․
967e0955f0 Merge branch 'remitamine-brightcove_in_page_embed' 2015-11-14 06:11:49 +06:00
Sergey M․
e01b432ad3 [brightcove:new] Fix test 2015-11-14 06:11:17 +06:00
Sergey M․
fd91257c40 [brightcove] Order imports alphabetically 2015-11-14 06:08:36 +06:00
Sergey M․
c7b959ce38 [utils] Remove unused function 2015-11-14 06:07:44 +06:00
Sergey M․
75eac8961e [brightcove] Remove unused import 2015-11-14 06:07:24 +06:00
Sergey M․
3b7d9aa487 Rename all references to legacy studio Brightcove extractor 2015-11-14 06:05:46 +06:00
Sergey M․
1f4b722b00 [generic] Clarify Brightcove Legacy Studio comment 2015-11-14 06:03:32 +06:00
Sergey M․
f6519f89b0 [generic] Extract Brightcove New Studio embeds 2015-11-14 06:03:07 +06:00
Sergey M․
24af85298e [brightcove] Fix _extract_urls 2015-11-14 06:01:56 +06:00
Sergey M․
e721d857c2 [brightcove] Clarify IE_NAMEs 2015-11-14 05:56:51 +06:00
Sergey M․
5c17f0a67a [brightcove:embedinpage] Rename extractor to brightcove new
It's not actually embed_in_page but "New Studio" and allows both iframe and embed_in_page embeds
2015-11-14 05:55:59 +06:00
Sergey M․
4fcaa4f4a5 [brightcove] Rename extractor to brightcove legacy
Old embedding approaches are now "Legacy Studio"
2015-11-14 05:54:16 +06:00
Sergey M․
536f819eda [brightcove] Imrove extraction of new embeds 2015-11-14 05:51:05 +06:00
Sergey M․
a662489877 [brightcove:embedinpage] Make more robust and extract rtmp streams 2015-11-14 05:09:50 +06:00
Sergey M․
a2973eb597 Merge branch 'brightcove_in_page_embed' of https://github.com/remitamine/youtube-dl into remitamine-brightcove_in_page_embed 2015-11-14 01:23:15 +06:00
Sergey M․
4e21b3a94f [cbs] Use android UA for higher quality streams (Closes #7490) 2015-11-14 00:25:28 +06:00
Jaime Marquínez Ferrándiz
b703ebeeaf [twitter] Don't fail if the description doesn't contain an URL (fixes #7489) 2015-11-13 19:09:42 +01:00
Jaime Marquínez Ferrándiz
b84a5f0337 [twitter] Update tests checksums 2015-11-13 18:55:07 +01:00
Philipp Hagemeister
a1ec9a7553 release 2015.11.13 2015-11-13 11:07:30 +01:00
Sergey M․
91d644b5ba [ruutu] Relax formats extraction 2015-11-13 02:43:27 +06:00
Sergey M․
5d6c3d6a66 [ruutu] Skip NOT-USED URLs(Closes #7478) 2015-11-13 02:41:38 +06:00
Jaime Marquínez Ferrándiz
1ebb4717df [cbsnews] Fix construction of 'play_path' in some videos (fixes #7394) 2015-11-12 21:02:56 +01:00
Yen Chi Hsuan
cf5881fc4d Credit @ferama
For providing idea for vidto.me (#7167) and extending nowvideo support (#6760)
2015-11-12 21:33:46 +08:00
Sergey M․
fcd817a326 [vimeo] Fix extraction (Closes #7460) 2015-11-12 03:56:11 +06:00
Sergey M․
031ec536f0 [gorillavid] Rename to xfileshare 2015-11-11 23:00:53 +06:00
Sergey M․
668db403f9 [gorillavid] Add test for vidto.me and strip title 2015-11-11 22:47:28 +06:00
Sergey M․
b9ad101926 [gorillavid] Add support for vidto.me 2015-11-11 22:44:03 +06:00
Sergey M․
435911029f [vidto] Remove extractor 2015-11-11 22:43:17 +06:00
Sergey M․
699ed30cee [novamov] Modernize 2015-11-11 22:34:49 +06:00
Sergey M․
9eab37dca0 [vimeo] Simplify set cookie 2015-11-11 22:32:13 +06:00
Sergey M․
9a8a12b7d8 [vimeo] Append cookies instead of overriding 2015-11-11 22:23:23 +06:00
Yen Chi Hsuan
a4c2ab35c1 Merge remote-tracking branch 'upstream/master' 2015-11-12 00:08:42 +08:00
Sergey M․
3d9c4bf09a [vimeo] Fix password protected videos (Closes #7451) 2015-11-11 21:21:21 +06:00
Yen Chi Hsuan
8b8a39e279 [vidto] Several simplifications and improvements
1. Use InfoExtractor._hidden_inputs
2. Fetch title from <title> tag
3. Cookies are preserved automatically
4. Use single quotes everywhere
5. Do not declare variables for one-time use only
2015-11-11 23:17:59 +08:00
Sergey M․
82393e2bb2 [novamov] Follow continue-to-the-video button if any (Closes #7330) 2015-11-11 21:02:05 +06:00
Sergey M․
2eb99a4b98 [nowvideo] Replace main host to resolvable one 2015-11-11 21:00:23 +06:00
Yen Chi Hsuan
6abce58a12 Credit @sieben for fixing wsj extractor 2015-11-11 20:16:18 +08:00
Yen Chi Hsuan
990e6e8fa3 [vidto] Minor fixes
1. import order
2. fatal is already True in helper functions
2015-11-11 20:13:03 +08:00
Yen Chi Hsuan
bfd88516eb Merge pull request #7454 from sieben/duplicate_keys
Remove duplicate key
2015-11-11 20:00:13 +08:00
Rémy Léone
d8b7e80d29 Remove duplicate key 2015-11-11 12:00:31 +01:00
Yen Chi Hsuan
37120974dc [vidto] PEP8 2015-11-11 02:02:46 +08:00
Marco Ferragina
42fc93c709 vidto extractor: code cleanup 2015-11-11 01:58:47 +08:00
Marco Ferragina
a625e56543 [vidto] Add extractor 2015-11-11 01:52:43 +08:00
Sergey M․
9b738b2caa [funnyordie] Fix extraction and extract m3u8 formats 2015-11-10 21:32:54 +06:00
David Ben Zakai
90bb5667bf Using internal opener 2015-11-10 17:15:23 +02:00
David Ben Zakai
d3d3e2e3aa Adding proxy to update procedure 2015-11-10 16:31:42 +02:00
remitamine
9550ca506f [utils] change extract_attributes to work in python 2 2015-10-31 19:36:04 +01:00
remitamine
c01e1a96aa [brightcove] fix test and fields extraction 2015-09-30 11:20:43 +01:00
remitamine
53407e3f38 [brightcove] fix streaming_src extraction 2015-09-23 14:02:13 +01:00
remitamine
ed1269000f [brightcove] add support for brightcove in page embed(fixes #6824) 2015-09-11 04:46:21 +01:00
remitamine
689fb748ee [utlis] add extract_attributes for extracting html tags attributes 2015-09-11 04:44:17 +01:00
132 changed files with 1386 additions and 724 deletions

View File

@@ -144,3 +144,5 @@ Lee Jenkins
Anssi Hannula
Lukáš Lalinský
Qijiang Fan
Rémy Léone
Marco Ferragina

View File

@@ -329,8 +329,8 @@ which means you can modify it, redistribute it or use it however you like.
## Subtitle Options:
--write-sub Write subtitle file
--write-auto-sub Write automatic subtitle file (YouTube
only)
--write-auto-sub Write automatically generated subtitle file
(YouTube only)
--all-subs Download all the available subtitles of the
video
--list-subs List all available subtitles for the video
@@ -534,6 +534,12 @@ Most people asking this question are not aware that youtube-dl now defaults to d
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
### Do I need any other programs?
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
Some videos or video formats can also be only downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed.
### I have downloaded a video but how can I play it?
Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).

View File

@@ -67,7 +67,8 @@
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
- **Break**
- **Brightcove**
- **brightcove:legacy**
- **brightcove:new**
- **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BuzzFeed**
@@ -128,6 +129,7 @@
- **Discovery**
- **Dotsub**
- **DouyuTV**: 斗鱼
- **DPlay**
- **dramafever**
- **dramafever:series**
- **DRBonanza**
@@ -200,7 +202,6 @@
- **GodTube**
- **GoldenMoustache**
- **Golem**
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
- **Goshgay**
- **Groupon**
- **Hark**
@@ -367,6 +368,7 @@
- **nowness:playlist**
- **nowness:series**
- **NowTV**
- **NowTVList**
- **nowvideo**: NowVideo
- **npo**: npo.nl and ntr.nl
- **npo.nl:live**
@@ -426,7 +428,6 @@
- **qqmusic:playlist**: QQ音乐 - 歌单
- **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜
- **Quickscope**: Quick Scope
- **QuickVid**
- **R7**
- **radio.de**
@@ -493,6 +494,7 @@
- **soompi:show**
- **soundcloud**
- **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search
- **soundcloud:set**
- **soundcloud:user**
- **soundgasm**
@@ -671,6 +673,7 @@
- **WSJ**: Wall Street Journal
- **XBef**
- **XboxClips**
- **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
- **XHamster**
- **XHamsterEmbed**
- **XMinus**
@@ -705,6 +708,7 @@
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:user:playlists**: YouTube.com user playlists
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**

View File

@@ -21,6 +21,7 @@ from youtube_dl.utils import (
clean_html,
DateRange,
detect_exe_version,
determine_ext,
encodeFilename,
escape_rfc3986,
escape_url,
@@ -210,8 +211,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(unescapeHTML('&#x2F;'), '/')
self.assertEqual(unescapeHTML('&#47;'), '/')
self.assertEqual(
unescapeHTML('&eacute;'), 'é')
self.assertEqual(unescapeHTML('&eacute;'), 'é')
self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
def test_daterange(self):
_20century = DateRange("19000101", "20000101")
@@ -238,6 +239,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
def test_find_xpath_attr(self):
testxml = '''<root>
<node/>

View File

@@ -28,6 +28,7 @@ if os.name == 'nt':
import ctypes
from .compat import (
compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_get_terminal_size,
@@ -63,6 +64,7 @@ from .utils import (
SameFileError,
sanitize_filename,
sanitize_path,
sanitized_Request,
std_headers,
subtitles_filename,
UnavailableVideoError,
@@ -156,7 +158,7 @@ class YoutubeDL(object):
writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
@@ -833,6 +835,7 @@ class YoutubeDL(object):
extra_info=extra)
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
return ie_result
elif result_type == 'compat_list':
self.report_warning(
@@ -937,7 +940,7 @@ class YoutubeDL(object):
filter_parts.append(string)
def _remove_unused_ops(tokens):
# Remove operators that we don't use and join them with the sourrounding strings
# Remove operators that we don't use and join them with the surrounding strings
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None
@@ -1186,7 +1189,7 @@ class YoutubeDL(object):
return res
def _calc_cookies(self, info_dict):
pr = compat_urllib_request.Request(info_dict['url'])
pr = sanitized_Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
@@ -1870,6 +1873,8 @@ class YoutubeDL(object):
def urlopen(self, req):
""" Start an HTTP download """
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):

View File

@@ -377,7 +377,7 @@ def _real_main(argv=None):
with YoutubeDL(ydl_opts) as ydl:
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
update_self(ydl.to_screen, opts.verbose, ydl._opener)
# Remove cache dir
if opts.rm_cachedir:

View File

@@ -42,7 +42,7 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal)
(experimental)
external_downloader_args: A list of additional command-line arguments for the
external downloader.

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import FileDownloader
from ..compat import compat_urllib_request
from ..utils import sanitized_Request
class DashSegmentsFD(FileDownloader):
@@ -22,7 +22,7 @@ class DashSegmentsFD(FileDownloader):
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
req = compat_urllib_request.Request(target_url)
req = sanitized_Request(target_url)
if remaining_bytes is not None:
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))

View File

@@ -7,14 +7,12 @@ import time
import re
from .common import FileDownloader
from ..compat import (
compat_urllib_request,
compat_urllib_error,
)
from ..compat import compat_urllib_error
from ..utils import (
ContentTooShortError,
encodeFilename,
sanitize_open,
sanitized_Request,
)
@@ -29,8 +27,8 @@ class HttpFD(FileDownloader):
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
basic_request = sanitized_Request(url, None, headers)
request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False)

View File

@@ -117,7 +117,7 @@ class RtmpFD(FileDownloader):
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# the connection was interrupted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = [
'rtmpdump', '--verbose', '-r', url,

View File

@@ -60,7 +60,10 @@ from .bloomberg import BloombergIE
from .bpb import BpbIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
@@ -129,6 +132,7 @@ from .dfb import DFBIE
from .dhm import DHMIE
from .dotsub import DotsubIE
from .douyutv import DouyuTVIE
from .dplay import DPlayIE
from .dramafever import (
DramaFeverIE,
DramaFeverSeriesIE,
@@ -221,7 +225,6 @@ from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
from .goshgay import GoshgayIE
from .groupon import GrouponIE
from .hark import HarkIE
@@ -418,7 +421,10 @@ from .nowness import (
NownessPlaylistIE,
NownessSeriesIE,
)
from .nowtv import NowTVIE
from .nowtv import (
NowTVIE,
NowTVListIE,
)
from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
@@ -456,10 +462,7 @@ from .orf import (
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .periscope import (
PeriscopeIE,
QuickscopeIE,
)
from .periscope import PeriscopeIE
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
@@ -573,7 +576,8 @@ from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
SoundcloudUserIE,
SoundcloudPlaylistIE
SoundcloudPlaylistIE,
SoundcloudSearchIE
)
from .soundgasm import (
SoundgasmIE,
@@ -786,6 +790,7 @@ from .wrzuta import WrzutaIE
from .wsj import WSJIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE
from .xhamster import (
XHamsterIE,
XHamsterEmbedIE,
@@ -829,6 +834,7 @@ from .youtube import (
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeUserIE,
YoutubeUserPlaylistsIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE

View File

@@ -15,7 +15,7 @@ class AlJazeeraIE(InfoExtractor):
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
'uploader': 'Al Jazeera English',
},
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
'skip': 'Not accessible from Travis CI server',
}
@@ -32,5 +32,5 @@ class AlJazeeraIE(InfoExtractor):
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
'&%40videoPlayer={0}'.format(brightcove_id)
),
'ie_key': 'Brightcove',
'ie_key': 'BrightcoveLegacy',
}

View File

@@ -7,11 +7,11 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
int_or_none,
float_or_none,
sanitized_Request,
xpath_text,
ExtractorError,
)
@@ -63,7 +63,7 @@ class AtresPlayerIE(InfoExtractor):
'j_password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage(
@@ -94,7 +94,7 @@ class AtresPlayerIE(InfoExtractor):
formats = []
for fmt in ['windows', 'android_tablet']:
request = compat_urllib_request.Request(
request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('User-Agent', self._USER_AGENT)

View File

@@ -6,13 +6,13 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
sanitized_Request,
)
@@ -57,7 +57,7 @@ class BambuserIE(InfoExtractor):
'pass': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
@@ -126,7 +126,7 @@ class BambuserChannelIE(InfoExtractor):
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url)
req = sanitized_Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
data = self._download_json(

View File

@@ -27,7 +27,7 @@ class BBCCoUkIE(InfoExtractor):
_MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g.
# http://www.bbc.co.uk/programmes/b06bp7lf)
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
]
@@ -334,7 +334,7 @@ class BBCCoUkIE(InfoExtractor):
return self._download_media_selector_url(
mediaselector_url % programme_id, programme_id)
except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation'):
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e
continue
self._raise_extractor_error(e)
@@ -345,7 +345,7 @@ class BBCCoUkIE(InfoExtractor):
media_selection = self._download_xml(
url, programme_id, 'Downloading media selection XML')
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
else:
raise

View File

@@ -4,14 +4,12 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
xpath_text,
xpath_with_ns,
@@ -219,7 +217,7 @@ class BlipTVIE(InfoExtractor):
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://www\.bloomberg\.com/news/[^/]+/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
_TESTS = [{
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
# The md5 checksum changes
'info_dict': {
@@ -17,7 +17,10 @@ class BloombergIE(InfoExtractor):
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
'description': 'md5:a8ba0302912d03d246979735c17d2761',
},
}
}, {
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
'only_matching': True,
}]
def _real_extract(self, url):
name = self._match_id(url)

View File

@@ -11,7 +11,6 @@ from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
@@ -20,12 +19,18 @@ from ..utils import (
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
js_to_json,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
unsmuggle_url,
)
class BrightcoveIE(InfoExtractor):
class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -245,7 +250,7 @@ class BrightcoveIE(InfoExtractor):
def _get_video_info(self, video_id, query_str, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query_str
req = compat_urllib_request.Request(request_url)
req = sanitized_Request(request_url)
linkBase = query.get('linkBaseURL')
if linkBase is not None:
referer = linkBase[0]
@@ -346,3 +351,172 @@ class BrightcoveIE(InfoExtractor):
if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info
class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
_TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
'info_dict': {
'id': '4463358922001',
'ext': 'mp4',
'title': 'Meet the man behind Popcorn Time',
'description': 'md5:eac376a4fe366edc70279bfb681aea16',
'duration': 165.768,
'timestamp': 1441391203,
'upload_date': '20150904',
'uploader_id': '929656772001',
'formats': 'mincount:22',
},
}, {
# with rtmp streams
'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
'info_dict': {
'id': '4279049078001',
'ext': 'mp4',
'title': 'Titansgrave: Chapter 0',
'description': 'Titansgrave: Chapter 0',
'duration': 1242.058,
'timestamp': 1433556729,
'upload_date': '20150606',
'uploader_id': '4036320279001',
'formats': 'mincount:41',
},
'params': {
'skip_download': True,
}
}]
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
entries = []
# Look for iframe embeds [1]
for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url)
# Look for embed_in_page embeds [2]
for video_id, account_id, player_id, embed in re.findall(
# According to examples from [3] it's unclear whether video id
# may be optional and what to do when it is
r'''(?sx)
<video[^>]+
data-video-id=["\'](\d+)["\'][^>]*>.*?
</video>.*?
<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/
(\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
''', webpage):
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
return entries
def _real_extract(self, url):
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
req = sanitized_Request(
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
% (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key})
json_data = self._download_json(req, video_id)
title = json_data['name']
formats = []
for source in json_data.get('sources', []):
source_type = source.get('type')
src = source.get('src')
if source_type == 'application/x-mpegURL':
if not src:
continue
m3u8_formats = self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
streaming_src = source.get('streaming_src')
stream_name, app_name = source.get('stream_name'), source.get('app_name')
if not src and not streaming_src and (not stream_name or not app_name):
continue
tbr = float_or_none(source.get('avg_bitrate'), 1000)
height = int_or_none(source.get('height'))
f = {
'tbr': tbr,
'width': int_or_none(source.get('width')),
'height': height,
'filesize': int_or_none(source.get('size')),
'container': source.get('container'),
'vcodec': source.get('codec'),
'ext': source.get('container').lower(),
}
def build_format_id(kind):
format_id = kind
if tbr:
format_id += '-%dk' % int(tbr)
if height:
format_id += '-%dp' % height
return format_id
if src or streaming_src:
f.update({
'url': src or streaming_src,
'format_id': build_format_id('http' if src else 'http-streaming'),
'preference': 2 if src else 1,
})
else:
f.update({
'url': app_name,
'play_path': stream_name,
'format_id': build_format_id('rtmp'),
})
formats.append(f)
self._sort_formats(formats)
description = json_data.get('description')
thumbnail = json_data.get('thumbnail')
timestamp = parse_iso8601(json_data.get('published_at'))
duration = float_or_none(json_data.get('duration'), 1000)
tags = json_data.get('tags', [])
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader_id': account_id,
'formats': formats,
'tags': tags,
}

View File

@@ -1,6 +1,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
sanitized_Request,
smuggle_url,
)
class CBSIE(InfoExtractor):
@@ -46,13 +50,19 @@ class CBSIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
request = sanitized_Request(url)
# Android UA is served with higher quality (720p) streams (see
# https://github.com/rg3/youtube-dl/issues/7490)
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
webpage = self._download_webpage(request, display_id)
real_id = self._search_regex(
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
webpage, 'real video ID')
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': 'theplatform:%s' % real_id,
'url': smuggle_url(
'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
{'force_smil_url': True}),
'display_id': display_id,
}

View File

@@ -67,9 +67,12 @@ class CBSNewsIE(InfoExtractor):
'format_id': format_id,
}
if uri.startswith('rtmp'):
play_path = re.sub(
r'{slistFilePath}', '',
uri.split('<break>')[-1].split('{break}')[-1])
fmt.update({
'app': 'ondemand?auth=cbs',
'play_path': 'mp4:' + uri.split('<break>')[-1],
'play_path': 'mp4:' + play_path,
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
'page_url': 'http://www.cbsnews.com',
'ext': 'flv',

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
@@ -13,6 +12,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
)
@@ -100,7 +100,7 @@ class CeskaTelevizeIE(InfoExtractor):
'requestSource': 'iVysilani',
}
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=compat_urllib_parse.urlencode(data))
@@ -115,7 +115,7 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist_title = self._og_search_title(webpage)

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
float_or_none,
int_or_none,
sanitized_Request,
)
@@ -52,7 +52,7 @@ class CollegeRamaIE(InfoExtractor):
}
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
json.dumps(player_options_request))
request.add_header('Content-Type', 'application/json')

View File

@@ -19,7 +19,6 @@ from ..compat import (
compat_urllib_error,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
compat_str,
compat_etree_fromstring,
@@ -37,6 +36,7 @@ from ..utils import (
int_or_none,
RegexNotFoundError,
sanitize_filename,
sanitized_Request,
unescapeHTML,
unified_strdate,
url_basename,
@@ -891,6 +891,11 @@ class InfoExtractor(object):
if not media_nodes:
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
@@ -898,7 +903,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@@ -1280,7 +1285,7 @@ class InfoExtractor(object):
def _get_cookies(self, url):
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie'))

View File

@@ -23,6 +23,7 @@ from ..utils import (
int_or_none,
lowercase_escape,
remove_end,
sanitized_Request,
unified_strdate,
urlencode_postdata,
xpath_text,
@@ -46,7 +47,7 @@ class CrunchyrollBaseIE(InfoExtractor):
'name': username,
'password': password,
})
login_request = compat_urllib_request.Request(login_url, data)
login_request = sanitized_Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info')
@@ -55,7 +56,7 @@ class CrunchyrollBaseIE(InfoExtractor):
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else compat_urllib_request.Request(url_or_request))
else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/rg3/youtube-dl/issues/6797.
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
@@ -307,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_uploader', fatal=False)
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req = sanitized_Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
@@ -319,7 +320,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request(
streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (stream_id, stream_format, stream_quality),
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))

View File

@@ -7,15 +7,13 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_request,
)
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
str_to_int,
unescapeHTML,
)
@@ -25,7 +23,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('Cookie', 'family_filter=off; ff=off')
return request

View File

@@ -2,13 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -36,7 +34,7 @@ class DCNIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
headers={'Origin': 'http://www.dcndigital.ae'})

View File

@@ -0,0 +1,51 @@
# encoding: utf-8
from __future__ import unicode_literals
import time
from .common import InfoExtractor
from ..utils import int_or_none
class DPlayIE(InfoExtractor):
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
'info_dict': {
'id': '3172',
'ext': 'mp4',
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
'title': 'Svensken lär sig njuta av livet',
'duration': 2650,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-video-id="(\d+)"', webpage, 'video id')
info = self._download_json(
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
video_id)['data'][0]
self._set_cookie(
'secure.dplay.se', 'dsc-geo',
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
# TODO: consider adding support for 'stream_type=hds', it seems to
# require setting some cookies
manifest_url = self._download_json(
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
video_id, 'Getting manifest url for hls stream')['hls']
formats = self._extract_m3u8_formats(
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
return {
'id': video_id,
'display_id': display_id,
'title': info['title'],
'formats': formats,
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
}

View File

@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -16,6 +15,7 @@ from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -51,7 +51,7 @@ class DramaFeverBaseIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
response = self._download_webpage(
request, None, 'Logging in as %s' % username)

View File

@@ -2,14 +2,17 @@
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import qualities
from ..utils import (
qualities,
sanitized_Request,
)
class DumpertIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
_VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@@ -26,10 +29,12 @@ class DumpertIE(InfoExtractor):
}]
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
protocol = mobj.group('protocol')
url = 'https://www.dumpert.nl/mediabase/' + video_id
req = compat_urllib_request.Request(url)
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
req = sanitized_Request(url)
req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id)

View File

@@ -2,11 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
float_or_none,
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -57,7 +57,7 @@ class EitbIE(InfoExtractor):
hls_url = media.get('HLS_SURL')
if hls_url:
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url})
token_data = self._download_json(

View File

@@ -3,13 +3,12 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
determine_ext,
clean_html,
int_or_none,
float_or_none,
sanitized_Request,
)
@@ -75,7 +74,7 @@ class EscapistIE(InfoExtractor):
video_id = ims_video['videoID']
key = ims_video['hash']
config_req = compat_urllib_request.Request(
config_req = sanitized_Request(
'http://www.escapistmagazine.com/videos/'
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
config_req.add_header('Referer', url)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -42,7 +40,7 @@ class EveryonesMixtapeIE(InfoExtractor):
playlist_id = mobj.group('id')
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
pllist_req = compat_urllib_request.Request(pllist_url)
pllist_req = sanitized_Request(pllist_url)
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist_list = self._download_json(
@@ -55,7 +53,7 @@ class EveryonesMixtapeIE(InfoExtractor):
raise ExtractorError('Playlist id not found')
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
pl_req = compat_urllib_request.Request(pl_url)
pl_req = sanitized_Request(pl_url)
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist = self._download_json(
pl_req, playlist_id, note='Downloading playlist info')

View File

@@ -3,9 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
int_or_none,
sanitized_Request,
str_to_int,
)
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -10,11 +10,11 @@ from ..compat import (
compat_str,
compat_urllib_error,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
limit_length,
sanitized_Request,
urlencode_postdata,
get_element_by_id,
clean_html,
@@ -73,7 +73,7 @@ class FacebookIE(InfoExtractor):
if useremail is None:
return
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
login_page_req = sanitized_Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US')
login_page = self._download_webpage(login_page_req, None,
note='Downloading login page',
@@ -94,7 +94,7 @@ class FacebookIE(InfoExtractor):
'timezone': '-60',
'trynum': '1',
}
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
login_results = self._download_webpage(request, None,
@@ -109,7 +109,7 @@ class FacebookIE(InfoExtractor):
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
'name_action_selected': 'dont_save',
}
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = self._download_webpage(check_req, None,
note='Confirming login')

View File

@@ -12,6 +12,7 @@ from ..compat import (
from ..utils import (
encode_dict,
ExtractorError,
sanitized_Request,
)
@@ -57,7 +58,7 @@ class FC2IE(InfoExtractor):
}
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@@ -66,7 +67,7 @@ class FC2IE(InfoExtractor):
return False
# this is also needed
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
self._download_webpage(
login_redir, None, note='Login redirect', errnote='Login redirect failed')

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
ExtractorError,
find_xpath_attr,
sanitized_Request,
)
@@ -30,7 +30,7 @@ class FlickrIE(InfoExtractor):
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
req = compat_urllib_request.Request(webpage_url)
req = sanitized_Request(webpage_url)
req.add_header(
'User-Agent',
# it needs a more recent version

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_duration,
parse_iso8601,
sanitized_Request,
str_to_int,
)
@@ -93,7 +91,7 @@ class FourTubeIE(InfoExtractor):
b'Content-Type': b'application/x-www-form-urlencoded',
b'Origin': b'http://www.4tube.com',
}
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
token_req = sanitized_Request(token_url, b'{}', headers)
tokens = self._download_json(token_req, video_id)
formats = [{
'url': tokens[format]['token'],

View File

@@ -45,11 +45,20 @@ class FunnyOrDieIE(InfoExtractor):
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates')
bitrates = [int(b) for b in bitrates.rstrip(',').split(',')]
bitrates.sort()
m3u8_url = self._search_regex(
r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
webpage, 'm3u8 url', default=None, group='url')
formats = []
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
bitrates.sort()
for bitrate in bitrates:
for link in links:
formats.append({

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
remove_end,
HEADRequest,
sanitized_Request,
)
@@ -125,7 +123,7 @@ class GDCVaultIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(request, display_id, 'Logging in')
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')

View File

@@ -11,7 +11,6 @@ from .youtube import YoutubeIE
from ..compat import (
compat_etree_fromstring,
compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
@@ -22,6 +21,7 @@ from ..utils import (
HEADRequest,
is_html,
orderedSet,
sanitized_Request,
smuggle_url,
unescapeHTML,
unified_strdate,
@@ -30,7 +30,10 @@ from ..utils import (
url_basename,
xpath_text,
)
from .brightcove import BrightcoveIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
@@ -275,7 +278,7 @@ class GenericIE(InfoExtractor):
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': {
'id': '2765128793001',
@@ -299,7 +302,7 @@ class GenericIE(InfoExtractor):
'uploader': 'thestar.com',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
},
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
},
{
'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -314,7 +317,7 @@ class GenericIE(InfoExtractor):
},
{
# https://github.com/rg3/youtube-dl/issues/3541
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': {
'id': '3866516442001',
@@ -820,6 +823,19 @@ class GenericIE(InfoExtractor):
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
},
},
# Kaltura embed protected with referrer
{
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
'info_dict': {
'id': '1_g4fbemnq',
'ext': 'mp4',
'title': 'Violetta - Achter De Schermen - Ruggero',
'description': 'Achter de schermen met Ruggero',
'timestamp': 1435133761,
'upload_date': '20150624',
'uploader_id': 'echojecka',
},
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -1031,6 +1047,31 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'cinemasnob',
},
},
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
'info_dict': {
'id': '4238694884001',
'ext': 'flv',
'title': 'Tabletop: Dread, Last Thoughts',
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
},
# JWPlayer with M3U8
{
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
'info_dict': {
'id': 'playlist',
'ext': 'mp4',
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
'uploader': 'ren.tv',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
}
]
@@ -1174,7 +1215,7 @@ class GenericIE(InfoExtractor):
full_response = None
if head_response is False:
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id)
head_response = full_response
@@ -1203,7 +1244,7 @@ class GenericIE(InfoExtractor):
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response:
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding
@@ -1290,14 +1331,14 @@ class GenericIE(InfoExtractor):
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for BrightCove:
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
# Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
self.to_screen('Brightcove video detected.')
entries = [{
'_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove'
'ie_key': 'BrightcoveLegacy'
} for bc_url in bc_urls]
return {
@@ -1307,6 +1348,11 @@ class GenericIE(InfoExtractor):
'entries': entries,
}
# Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(webpage)
if bc_urls:
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@@ -1675,7 +1721,9 @@ class GenericIE(InfoExtractor):
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
return self.url_result(smuggle_url(
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
{'source_url': url}), 'Kaltura')
# Look for Eagle.Platform embeds
mobj = re.search(
@@ -1720,7 +1768,7 @@ class GenericIE(InfoExtractor):
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -1840,6 +1888,7 @@ class GenericIE(InfoExtractor):
entries = []
for video_url in found:
video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
@@ -1851,25 +1900,24 @@ class GenericIE(InfoExtractor):
# here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
entry_info_dict = {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
}
ext = determine_ext(video_url)
if ext == 'smil':
entries.append({
'id': video_id,
'formats': self._extract_smil_formats(video_url, video_id),
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'm3u8':
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
else:
entries.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
entry_info_dict['url'] = video_url
entries.append(entry_info_dict)
if len(entries) == 1:
return entries[0]

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
HEADRequest,
sanitized_Request,
str_to_int,
urlencode_postdata,
urlhandle_detect_ext,
@@ -47,7 +45,7 @@ class HearThisAtIE(InfoExtractor):
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
payload = urlencode_postdata({'tracks[]': track_id})
req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
req = sanitized_Request(self._PLAYLIST_URL, payload)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
track = self._download_json(req, track_id, 'Downloading playlist')[0]

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
HEADRequest,
sanitized_Request,
)
@@ -41,7 +39,7 @@ class HotNewHipHopIE(InfoExtractor):
('mediaType', 's'),
('mediaId', video_id),
])
r = compat_urllib_request.Request(
r = sanitized_Request(
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
mkd = self._download_json(

View File

@@ -4,12 +4,10 @@ import json
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -32,7 +30,7 @@ class HypemIE(InfoExtractor):
data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url)
request = sanitized_Request(complete_url)
response, urlh = self._download_webpage_handle(
request, track_id, 'Downloading webpage with the url')
cookie = urlh.headers.get('Set-Cookie', '')
@@ -52,7 +50,7 @@ class HypemIE(InfoExtractor):
title = track['song']
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
request = compat_urllib_request.Request(
request = sanitized_Request(
serve_url, '', {'Content-Type': 'application/json'})
request.add_header('cookie', cookie)
song_data = self._download_json(request, track_id, 'Downloading metadata')

View File

@@ -10,8 +10,8 @@ from ..utils import (
class InstagramIE(InfoExtractor):
_VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
'info_dict': {
@@ -21,7 +21,10 @@ class InstagramIE(InfoExtractor):
'title': 'Video by naomipq',
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
}
}, {
'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -6,12 +6,10 @@ from random import random
from math import floor
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
remove_end,
sanitized_Request,
)
@@ -61,7 +59,7 @@ class IPrimaIE(InfoExtractor):
(floor(random() * 1073741824), floor(random() * 1073741824))
)
req = compat_urllib_request.Request(player_url)
req = sanitized_Request(player_url)
req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id)

View File

@@ -5,11 +5,9 @@ import re
import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -78,7 +76,7 @@ class IviIE(InfoExtractor):
]
}
request = compat_urllib_request.Request(api_url, json.dumps(data))
request = sanitized_Request(api_url, json.dumps(data))
video_json_page = self._download_webpage(
request, video_id, 'Downloading video JSON')

View File

@@ -2,12 +2,18 @@
from __future__ import unicode_literals
import re
import base64
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
unsmuggle_url,
)
@@ -121,31 +127,47 @@ class KalturaIE(InfoExtractor):
video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
info, source_data = self._get_video_info(entry_id, partner_id)
formats = [{
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f['fileExt'],
'tbr': f['bitrate'],
'fps': f.get('frameRate'),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': f.get('height'),
'width': f.get('width'),
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
} for f in source_data['flavorAssets']]
source_url = smuggled_data.get('source_url')
if source_url:
referrer = base64.b64encode(
'://'.join(compat_urlparse.urlparse(source_url)[:2])
.encode('utf-8')).decode('utf-8')
else:
referrer = None
formats = []
for f in source_data['flavorAssets']:
video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
if referrer:
video_url += '?referrer=%s' % referrer
formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'),
'tbr': int_or_none(f['bitrate']),
'fps': int_or_none(f.get('frameRate')),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': int_or_none(f.get('height')),
'width': int_or_none(f.get('width')),
'url': video_url,
})
self._check_formats(formats, entry_id)
self._sort_formats(formats)
return {
'id': entry_id,
'title': info['name'],
'formats': formats,
'description': info.get('description'),
'description': clean_html(info.get('description')),
'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),

View File

@@ -4,10 +4,8 @@ import os
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse_urlparse
from ..utils import sanitized_Request
class KeezMoviesIE(InfoExtractor):
@@ -26,7 +24,7 @@ class KeezMoviesIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -8,13 +8,13 @@ import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_ord,
)
from ..utils import (
determine_ext,
ExtractorError,
parse_iso8601,
sanitized_Request,
int_or_none,
encode_data_uri,
)
@@ -114,7 +114,7 @@ class LetvIE(InfoExtractor):
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
}
play_json_req = compat_urllib_request.Request(
play_json_req = sanitized_Request(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')

View File

@@ -7,12 +7,12 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
sanitized_Request,
)
@@ -25,7 +25,7 @@ class LyndaBaseIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
@@ -35,7 +35,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false',
'stayPut': 'false'
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
@@ -64,7 +64,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false',
'stayPut': 'false',
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
login_page = self._download_webpage(
request, None,
@@ -83,6 +83,10 @@ class LyndaBaseIE(InfoExtractor):
raise ExtractorError('Unable to log in')
def _logout(self):
username, _ = self._get_login_info()
if username is None:
return
self._download_webpage(
'http://www.lynda.com/ajax/logout.aspx', None,
'Logging out', 'Unable to log out', fatal=False)

View File

@@ -7,12 +7,12 @@ from ..compat import (
compat_parse_qs,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor):
'filters': '0',
'submit': "Continue - I'm over 18",
}
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.report_age_confirmation()
self._download_webpage(request, None, False, 'Unable to confirm age')
@@ -142,7 +142,7 @@ class MetacafeIE(InfoExtractor):
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
# Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
# AnyClip videos require the flashversion cookie so that we get the link
# to the mp4 file

View File

@@ -2,14 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
int_or_none,
parse_duration,
parse_filesize,
sanitized_Request,
)
@@ -39,7 +37,7 @@ class MinhatecaIE(InfoExtractor):
('fileId', video_id),
('__RequestVerificationToken', token),
]
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://minhateca.com.br/action/License/Download',
data=compat_urllib_parse.urlencode(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,11 +4,11 @@ from __future__ import unicode_literals
import random
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
xpath_text,
int_or_none,
ExtractorError,
sanitized_Request,
)
@@ -63,7 +63,7 @@ class MioMioIE(InfoExtractor):
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
video_id)
vid_config_request = compat_urllib_request.Request(
vid_config_request = sanitized_Request(
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
headers=http_headers)

View File

@@ -5,13 +5,11 @@ import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -80,7 +78,7 @@ class MoeVideoIE(InfoExtractor):
]
r_json = json.dumps(r)
post = compat_urllib_parse.urlencode({'r': r_json})
req = compat_urllib_request.Request(self._API_URL, post)
req = sanitized_Request(self._API_URL, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
response = self._download_json(req, video_id)

View File

@@ -7,8 +7,8 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import sanitized_Request
class MofosexIE(InfoExtractor):
@@ -29,7 +29,7 @@ class MofosexIE(InfoExtractor):
video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -5,13 +5,11 @@ import os.path
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
remove_start,
sanitized_Request,
)
@@ -81,7 +79,7 @@ class MonikerIE(InfoExtractor):
orig_webpage, 'builtin URL', default=None, group='url')
if builtin_url:
req = compat_urllib_request.Request(builtin_url)
req = sanitized_Request(builtin_url)
req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
title = self._og_search_title(orig_webpage).strip()
@@ -94,7 +92,7 @@ class MonikerIE(InfoExtractor):
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
req = sanitized_Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note='Downloading video page ...')

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -59,7 +57,7 @@ class MooshareIE(InfoExtractor):
'hash': hash_key,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import sanitized_Request
class MovieClipsIE(InfoExtractor):
@@ -25,7 +23,7 @@ class MovieClipsIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
# it doesn't work if it thinks the browser it's too old
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
webpage = self._download_webpage(req, display_id)

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_str,
)
from ..utils import (
@@ -13,6 +12,7 @@ from ..utils import (
find_xpath_attr,
fix_xml_ampersands,
HEADRequest,
sanitized_Request,
unescapeHTML,
url_basename,
RegexNotFoundError,
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url)
req = sanitized_Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript
req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,

View File

@@ -11,10 +11,10 @@ from ..compat import (
compat_ord,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -83,7 +83,7 @@ class MyVideoIE(InfoExtractor):
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))

View File

@@ -8,11 +8,11 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
compat_str,
compat_itertools_count,
)
from ..utils import sanitized_Request
class NetEaseMusicBaseIE(InfoExtractor):
@@ -40,7 +40,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
if not details:
continue
formats.append({
'url': 'http://m1.music.126.net/%s/%s.%s' %
'url': 'http://m5.music.126.net/%s/%s.%s' %
(cls._encrypt(details['dfsId']), details['dfsId'],
details['extension']),
'ext': details.get('extension'),
@@ -56,7 +56,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
return int(round(ms / 1000.0))
def query_api(self, endpoint, video_id, note):
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
req.add_header('Referer', self._API_BASE)
return self._download_json(req, video_id, note)

View File

@@ -1,10 +1,8 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..compat import compat_urllib_parse
from ..utils import sanitized_Request
class NFBIE(InfoExtractor):
@@ -40,8 +38,9 @@ class NFBIE(InfoExtractor):
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request = sanitized_Request(
'https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')

View File

@@ -8,7 +8,6 @@ import datetime
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
sanitized_Request,
xpath_text,
determine_ext,
)
@@ -102,7 +102,7 @@ class NiconicoIE(InfoExtractor):
'password': password,
}
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, None, note='Logging in', errnote='Unable to log in')
@@ -145,7 +145,7 @@ class NiconicoIE(InfoExtractor):
'k': thumb_play_key,
'v': video_id
})
flv_info_request = compat_urllib_request.Request(
flv_info_request = sanitized_Request(
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
{'Content-Type': 'application/x-www-form-urlencoded'})
flv_info_webpage = self._download_webpage(

View File

@@ -9,7 +9,6 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
clean_html,
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none,
float_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -74,7 +74,7 @@ class NocoIE(InfoExtractor):
'username': username,
'password': password,
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
login = self._download_json(request, None, 'Logging in as %s' % username)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
urlencode_postdata,
xpath_text,
xpath_with_ns,
@@ -41,7 +39,7 @@ class NosVideoIE(InfoExtractor):
'op': 'download1',
'method_free': 'Continue to Video',
}
req = compat_urllib_request.Request(url, urlencode_postdata(fields))
req = sanitized_Request(url, urlencode_postdata(fields))
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id,
'Downloading download page')

View File

@@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
NO_DEFAULT,
encode_dict,
sanitized_Request,
urlencode_postdata,
)
@@ -38,19 +40,40 @@ class NovaMovIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
page = self._download_webpage(
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
url = 'http://%s/video/%s' % (self._HOST, video_id)
if re.search(self._FILE_DELETED_REGEX, page) is not None:
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
def extract_filekey(default=NO_DEFAULT):
return self._search_regex(
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
filekey = extract_filekey(default=None)
if not filekey:
fields = self._hidden_inputs(webpage)
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
'post url', default=url, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(url, post_url)
request = sanitized_Request(
post_url, urlencode_postdata(encode_dict(fields)))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('Referer', post_url)
webpage = self._download_webpage(
request, video_id, 'Downloading continue to the video page')
filekey = extract_filekey()
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
api_response = self._download_webpage(
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,

View File

@@ -1,12 +1,12 @@
# encoding: utf-8
from __future__ import unicode_literals
from .brightcove import BrightcoveIE
from .brightcove import BrightcoveLegacyIE
from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import (
compat_str,
compat_urllib_request,
from ..compat import compat_str
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -22,10 +22,10 @@ class NownessBaseIE(InfoExtractor):
'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
note='Downloading player JavaScript',
errnote='Unable to download player JavaScript')
bc_url = BrightcoveIE._extract_brightcove_url(player_code)
bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
if bc_url is None:
raise ExtractorError('Could not find player definition')
return self.url_result(bc_url, 'Brightcove')
return self.url_result(bc_url, 'BrightcoveLegacy')
elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube':
@@ -37,7 +37,7 @@ class NownessBaseIE(InfoExtractor):
def _api_request(self, url, request_path):
display_id = self._match_id(url)
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://api.nowness.com/api/' + request_path % display_id,
headers={
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -13,8 +15,63 @@ from ..utils import (
)
class NowTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
class NowTVBaseIE(InfoExtractor):
_VIDEO_FIELDS = (
'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
'broadcastStartDate', 'seoUrl', 'duration', 'files',
'format.defaultImage169Format', 'format.defaultImage169Logo')
def _extract_video(self, info, display_id=None):
video_id = compat_str(info['id'])
files = info['files']
if not files:
if info.get('geoblocked', False):
raise ExtractorError(
'Video %s is not available from your location due to geo restriction' % video_id,
expected=True)
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
formats = []
for item in files['items']:
if determine_ext(item['path']) != 'f4v':
continue
app, play_path = remove_start(item['path'], '/').split('/', 1)
formats.append({
'url': 'rtmpe://fms.rtl.de',
'app': app,
'play_path': 'mp4:%s' % play_path,
'ext': 'flv',
'page_url': 'http://rtlnow.rtl.de',
'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
'tbr': int_or_none(item.get('bitrate')),
})
self._sort_formats(formats)
title = info['title']
description = info.get('articleLong') or info.get('articleShort')
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
duration = parse_duration(info.get('duration'))
f = info.get('format', {})
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
return {
'id': video_id,
'display_id': display_id or info.get('seoUrl'),
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
class NowTVIE(NowTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:list/[^/]+/)?(?P<id>[^/]+)/(?:player|preview)'
_TESTS = [{
# rtl
@@ -23,7 +80,7 @@ class NowTVIE(InfoExtractor):
'id': '203519',
'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
'ext': 'flv',
'title': 'Die neuen Bauern und eine Hochzeit',
'title': 'Inka Bause stellt die neuen Bauern vor',
'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1432580700,
@@ -136,58 +193,65 @@ class NowTVIE(InfoExtractor):
}]
def _real_extract(self, url):
display_id = self._match_id(url)
display_id_split = display_id.split('/')
if len(display_id) > 2:
display_id = '/'.join((display_id_split[0], display_id_split[-1]))
mobj = re.match(self._VALID_URL, url)
display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id'))
info = self._download_json(
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
display_id)
'https://api.nowtv.de/v3/movies/%s?fields=%s'
% (display_id, ','.join(self._VIDEO_FIELDS)), display_id)
video_id = compat_str(info['id'])
return self._extract_video(info, display_id)
files = info['files']
if not files:
if info.get('geoblocked', False):
raise ExtractorError(
'Video %s is not available from your location due to geo restriction' % video_id,
expected=True)
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
formats = []
for item in files['items']:
if determine_ext(item['path']) != 'f4v':
continue
app, play_path = remove_start(item['path'], '/').split('/', 1)
formats.append({
'url': 'rtmpe://fms.rtl.de',
'app': app,
'play_path': 'mp4:%s' % play_path,
'ext': 'flv',
'page_url': 'http://rtlnow.rtl.de',
'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
'tbr': int_or_none(item.get('bitrate')),
})
self._sort_formats(formats)
class NowTVListIE(NowTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$'
title = info['title']
description = info.get('articleLong') or info.get('articleShort')
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
duration = parse_duration(info.get('duration'))
_SHOW_FIELDS = ('title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
f = info.get('format', {})
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
_TESTS = [{
'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell',
'info_dict': {
'id': '17006',
'title': 'stern TV - Aktuell',
},
'playlist_count': 1,
}, {
'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8',
'info_dict': {
'id': '20716',
'title': 'Das Supertalent - FREE Staffel 8',
},
'playlist_count': 14,
}]
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_id = mobj.group('show_id')
season_id = mobj.group('id')
fields = []
fields.extend(self._SHOW_FIELDS)
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
fields.extend(
'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
list_info = self._download_json(
'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php'
% (','.join(fields), show_id),
season_id)
season = next(
season for season in list_info['formatTabs']['items']
if season.get('seoheadline') == season_id)
title = '%s - %s' % (list_info['title'], season['headline'])
entries = []
for container in season['formatTabPages']['items']:
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
entries.append(self._extract_video(info))
return self.playlist_result(
entries, compat_str(season.get('id') or season_id), title)

View File

@@ -7,9 +7,9 @@ class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.ch'
_HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_duration,
sanitized_Request,
unified_strdate,
)
@@ -33,7 +31,7 @@ class NuvidIE(InfoExtractor):
formats = []
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://m.nuvid.com/play/%s' % video_id)
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
webpage = self._download_webpage(

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
js_to_json,
)
from ..utils import js_to_json
class PatreonIE(InfoExtractor):
@@ -65,7 +63,7 @@ class PatreonIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://www.patreon.com/processLogin',
compat_urllib_parse.urlencode(login_form).encode('utf-8')
)

View File

@@ -22,7 +22,7 @@ class PBSIE(InfoExtractor):
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
'''
@@ -170,6 +170,10 @@ class PBSIE(InfoExtractor):
'params': {
'skip_download': True, # requires ffmpeg
},
},
{
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
'only_matching': True,
}
]
_ERRORS = {
@@ -259,7 +263,7 @@ class PBSIE(InfoExtractor):
return self.playlist_result(entries, display_id)
info = self._download_json(
'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
display_id)
formats = []

View File

@@ -2,16 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import parse_iso8601
class PeriscopeIE(InfoExtractor):
IE_DESC = 'Periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -29,6 +25,9 @@ class PeriscopeIE(InfoExtractor):
}, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,
}, {
'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
'only_matching': True,
}]
def _call_api(self, method, value):
@@ -81,24 +80,3 @@ class PeriscopeIE(InfoExtractor):
'thumbnails': thumbnails,
'formats': formats,
}
class QuickscopeIE(InfoExtractor):
IE_DESC = 'Quick Scope'
_VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
_TEST = {
'url': 'https://watchonperiscope.com/broadcast/56180087',
'only_matching': True,
}
def _real_extract(self, url):
broadcast_id = self._match_id(url)
request = compat_urllib_request.Request(
'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
'broadcast_id': broadcast_id,
'entry_ticket': '',
'from_push': 'false',
'uses_sessions': 'true',
}).encode('utf-8'))
return self.url_result(
self._download_json(request, broadcast_id)['share_url'], 'Periscope')

View File

@@ -5,12 +5,10 @@ import re
import os.path
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -46,7 +44,7 @@ class PlayedIE(InfoExtractor):
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
req = sanitized_Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note='Downloading video page ...')

View File

@@ -1,29 +1,35 @@
from __future__ import unicode_literals
import re
import json
import random
import collections
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
sanitized_Request,
)
class PluralsightIE(InfoExtractor):
class PluralsightBaseIE(InfoExtractor):
_API_BASE = 'http://app.pluralsight.com'
class PluralsightIE(PluralsightBaseIE):
IE_NAME = 'pluralsight'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
_LOGIN_URL = 'https://www.pluralsight.com/id/'
_VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
_LOGIN_URL = 'https://app.pluralsight.com/id/'
_NETRC_MACHINE = 'pluralsight'
_TEST = {
_TESTS = [{
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
'md5': '4d458cf5cf4c593788672419a8dd4cf8',
'info_dict': {
@@ -33,7 +39,14 @@ class PluralsightIE(InfoExtractor):
'duration': 338,
},
'skip': 'Requires pluralsight account credentials',
}
}, {
'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live',
'only_matching': True,
}, {
# available without pluralsight account
'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
'only_matching': True,
}]
def _real_initialize(self):
self._login()
@@ -41,7 +54,7 @@ class PluralsightIE(InfoExtractor):
def _login(self):
(username, password) = self._get_login_info()
if username is None:
self.raise_login_required('Pluralsight account is required')
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@@ -60,7 +73,7 @@ class PluralsightIE(InfoExtractor):
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = compat_urllib_request.Request(
request = sanitized_Request(
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -73,30 +86,47 @@ class PluralsightIE(InfoExtractor):
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
raise ExtractorError('Unable to log in')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
author = mobj.group('author')
name = mobj.group('name')
clip_id = mobj.group('clip')
course = mobj.group('course')
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
author = qs.get('author', [None])[0]
name = qs.get('name', [None])[0]
clip_id = qs.get('clip', [None])[0]
course = qs.get('course', [None])[0]
if any(not f for f in (author, name, clip_id, course,)):
raise ExtractorError('Invalid URL', expected=True)
display_id = '%s-%s' % (name, clip_id)
webpage = self._download_webpage(url, display_id)
collection = self._parse_json(
self._search_regex(
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
webpage, 'modules'),
display_id)
modules = self._search_regex(
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
webpage, 'modules', default=None)
if modules:
collection = self._parse_json(modules, display_id)
else:
# Webpage may be served in different layout (see
# https://github.com/rg3/youtube-dl/issues/7607)
collection = self._parse_json(
self._search_regex(
r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'),
display_id)['course']['modules']
module, clip = None, None
for module_ in collection:
if module_.get('moduleName') == name:
if name in (module_.get('moduleName'), module_.get('name')):
module = module_
for clip_ in module_.get('clips', []):
clip_index = clip_.get('clipIndex')
if clip_index is None:
clip_index = clip_.get('index')
if clip_index is None:
continue
if compat_str(clip_index) == clip_id:
@@ -112,13 +142,33 @@ class PluralsightIE(InfoExtractor):
'high': {'width': 1024, 'height': 768},
}
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
ALLOWED_QUALITIES = (
('webm', ('high',)),
('mp4', ('low', 'medium', 'high',)),
AllowedQuality('webm', ('high',)),
AllowedQuality('mp4', ('low', 'medium', 'high',)),
)
# In order to minimize the number of calls to ViewClip API and reduce
# the probability of being throttled or banned by Pluralsight we will request
# only single format until formats listing was explicitly requested.
if self._downloader.params.get('listformats', False):
allowed_qualities = ALLOWED_QUALITIES
else:
def guess_allowed_qualities():
req_format = self._downloader.params.get('format') or 'best'
req_format_split = req_format.split('-')
if len(req_format_split) > 1:
req_ext, req_quality = req_format_split
for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), )
req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
return (AllowedQuality(req_ext, ('high', )), )
allowed_qualities = guess_allowed_qualities()
formats = []
for ext, qualities in ALLOWED_QUALITIES:
for ext, qualities in allowed_qualities:
for quality in qualities:
f = QUALITIES[quality].copy()
clip_post = {
@@ -131,13 +181,24 @@ class PluralsightIE(InfoExtractor):
'mt': ext,
'q': '%dx%d' % (f['width'], f['height']),
}
request = compat_urllib_request.Request(
'http://www.pluralsight.com/training/Player/ViewClip',
request = sanitized_Request(
'%s/training/Player/ViewClip' % self._API_BASE,
json.dumps(clip_post).encode('utf-8'))
request.add_header('Content-Type', 'application/json;charset=utf-8')
format_id = '%s-%s' % (ext, quality)
clip_url = self._download_webpage(
request, display_id, 'Downloading %s URL' % format_id, fatal=False)
# Pluralsight tracks multiple sequential calls to ViewClip API and start
# to return 429 HTTP errors after some time (see
# https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead
# to account ban (see https://github.com/rg3/youtube-dl/issues/6842).
# To somewhat reduce the probability of these consequences
# we will sleep random amount of time before each call to ViewClip.
self._sleep(
random.randint(2, 5), display_id,
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
if not clip_url:
continue
f.update({
@@ -163,10 +224,10 @@ class PluralsightIE(InfoExtractor):
}
class PluralsightCourseIE(InfoExtractor):
class PluralsightCourseIE(PluralsightBaseIE):
IE_NAME = 'pluralsight:course'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
_TEST = {
_VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)'
_TESTS = [{
# Free course from Pluralsight Starter Subscription for Microsoft TechNet
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
@@ -176,7 +237,14 @@ class PluralsightCourseIE(InfoExtractor):
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
},
'playlist_count': 31,
}
}, {
# available without pluralsight account
'url': 'https://www.pluralsight.com/courses/angularjs-get-started',
'only_matching': True,
}, {
'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents',
'only_matching': True,
}]
def _real_extract(self, url):
course_id = self._match_id(url)
@@ -184,14 +252,14 @@ class PluralsightCourseIE(InfoExtractor):
# TODO: PSM cookie
course = self._download_json(
'http://www.pluralsight.com/data/course/%s' % course_id,
'%s/data/course/%s' % (self._API_BASE, course_id),
course_id, 'Downloading course JSON')
title = course['title']
description = course.get('description') or course.get('shortDescription')
course_data = self._download_json(
'http://www.pluralsight.com/data/course/content/%s' % course_id,
'%s/data/course/content/%s' % (self._API_BASE, course_id),
course_id, 'Downloading course data JSON')
entries = []
@@ -201,7 +269,7 @@ class PluralsightCourseIE(InfoExtractor):
if not player_parameters:
continue
entries.append(self.url_result(
'http://www.pluralsight.com/training/player?%s' % player_parameters,
'%s/training/player?%s' % (self._API_BASE, player_parameters),
'Pluralsight'))
return self.playlist_result(entries, course_id, title, description)

View File

@@ -36,7 +36,8 @@ class PornHdIE(InfoExtractor):
webpage = self._download_webpage(url, display_id or video_id)
title = self._html_search_regex(
r'<title>(.+) porn HD.+?</title>', webpage, 'title')
[r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
description = self._html_search_regex(
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
view_count = int_or_none(self._html_search_regex(

View File

@@ -8,10 +8,10 @@ from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
str_to_int,
)
from ..aes import (
@@ -53,7 +53,7 @@ class PornHubIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
int_or_none,
sanitized_Request,
)
@@ -46,7 +44,7 @@ class PornotubeIE(InfoExtractor):
'authenticationSpaceKey': originAuthenticationSpaceKey,
'credentials': 'Clip Application',
}
token_req = compat_urllib_request.Request(
token_req = sanitized_Request(
'https://api.aebn.net/auth/v1/token/primal',
data=json.dumps(token_req_data).encode('utf-8'))
token_req.add_header('Content-Type', 'application/json')
@@ -56,7 +54,7 @@ class PornotubeIE(InfoExtractor):
token = token_answer['tokenKey']
# Get video URL
delivery_req = compat_urllib_request.Request(
delivery_req = sanitized_Request(
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
delivery_req.add_header('Authorization', token)
delivery_info = self._download_json(
@@ -64,7 +62,7 @@ class PornotubeIE(InfoExtractor):
video_url = delivery_info['mediaUrl']
# Get additional info (title etc.)
info_req = compat_urllib_request.Request(
info_req = sanitized_Request(
'https://api.aebn.net/content/v1/clips/%s?expand='
'title,description,primaryImageNumber,startSecond,endSecond,'
'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'

View File

@@ -1,11 +1,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
from ..utils import ExtractorError
class PrimeShareTVIE(InfoExtractor):
@@ -41,7 +41,7 @@ class PrimeShareTVIE(InfoExtractor):
webpage, 'wait time', default=7)) + 1
self._sleep(wait_time, video_id)
req = compat_urllib_request.Request(
req = sanitized_Request(
url, compat_urllib_parse.urlencode(fields), headers)
video_page = self._download_webpage(
req, video_id, 'Downloading video page')

View File

@@ -4,13 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
determine_ext,
ExtractorError,
sanitized_Request,
)
@@ -37,7 +35,7 @@ class PromptFileIE(InfoExtractor):
fields = self._hidden_inputs(webpage)
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req = sanitized_Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(
req, video_id, 'Downloading video page')

View File

@@ -7,11 +7,11 @@ import re
from .common import InfoExtractor
from ..utils import (
sanitized_Request,
strip_jsonp,
unescapeHTML,
clean_html,
)
from ..compat import compat_urllib_request
class QQMusicIE(InfoExtractor):
@@ -201,7 +201,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
singer_desc = None
if singer_id:
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
req.add_header(
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')

View File

@@ -6,11 +6,11 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_urllib_request, compat_urlparse
from ..utils import (
ExtractorError,
float_or_none,
remove_end,
sanitized_Request,
std_headers,
struct_unpack,
)
@@ -102,20 +102,14 @@ class RTVEALaCartaIE(InfoExtractor):
if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True)
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png_request = compat_urllib_request.Request(png_url)
png_request = sanitized_Request(png_url)
png_request.add_header('Referer', url)
png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
auth_url = video_url.replace(
video_url = video_url.replace(
'resources/', 'auth/resources/'
).replace('.net.rtve', '.multimedia.cdn.rtve')
video_path = self._download_webpage(
auth_url, video_id, 'Getting video url')
# Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
# the right Content-Length header and the mp4 format
video_url = compat_urlparse.urljoin(
'http://mvod1.akcdn.rtve.es/', video_path)
subtitles = None
if info.get('sbtFile') is not None:

View File

@@ -9,7 +9,7 @@ from ..compat import (
compat_str,
)
from ..utils import (
ExtractorError,
determine_ext,
unified_strdate,
)
@@ -51,10 +51,25 @@ class RutubeIE(InfoExtractor):
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
video_id, 'Downloading options JSON')
m3u8_url = options['video_balancer'].get('m3u8')
if m3u8_url is None:
raise ExtractorError('Couldn\'t find m3u8 manifest url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
formats = []
for format_id, format_url in options['video_balancer'].items():
ext = determine_ext(format_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext == 'f4m':
f4m_formats = self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id, fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
return {
'id': video['id'],
@@ -74,9 +89,9 @@ class RutubeIE(InfoExtractor):
class RutubeEmbedIE(InfoExtractor):
IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos'
_VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
_VALID_URL = 'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
_TEST = {
_TESTS = [{
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661',
@@ -90,7 +105,10 @@ class RutubeEmbedIE(InfoExtractor):
'params': {
'skip_download': 'Requires ffmpeg',
},
}
}, {
'url': 'http://rutube.ru/play/embed/8083783',
'only_matching': True,
}]
def _real_extract(self, url):
embed_id = self._match_id(url)

View File

@@ -57,16 +57,21 @@ class RuutuIE(InfoExtractor):
extract_formats(child)
elif child.tag.endswith('File'):
video_url = child.text
if not video_url or video_url in processed_urls or 'NOT_USED' in video_url:
if (not video_url or video_url in processed_urls or
any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
return
processed_urls.append(video_url)
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id='hls'))
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id='hds'))
f4m_formats = self._extract_f4m_formats(
video_url, video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
else:
proto = compat_urllib_parse_urlparse(video_url).scheme
if not child.tag.startswith('HTTP') and proto != 'rtmp':

View File

@@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from .brightcove import BrightcoveLegacyIE
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
smuggle_url,
std_headers,
)
@@ -58,7 +56,7 @@ class SafariBaseIE(InfoExtractor):
'next': '',
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
@@ -112,11 +110,11 @@ class SafariIE(SafariBaseIE):
'%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
part)
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if not bc_url:
raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')
class SafariCourseIE(SafariBaseIE):

View File

@@ -6,14 +6,12 @@ import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
sanitized_Request,
unified_strdate,
)
@@ -37,7 +35,7 @@ class SandiaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
webpage = self._download_webpage(req, video_id)

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -46,7 +44,7 @@ class SharedIE(InfoExtractor):
'Video %s does not exist' % video_id, expected=True)
download_form = self._hidden_inputs(webpage)
request = compat_urllib_request.Request(
request = sanitized_Request(
url, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
parse_duration,
sanitized_Request,
)
@@ -50,7 +48,7 @@ class ShareSixIE(InfoExtractor):
'method_free': 'Free'
}
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req = sanitized_Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id,

View File

@@ -4,10 +4,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..compat import compat_urllib_parse
from ..utils import sanitized_Request
class SinaIE(InfoExtractor):
@@ -61,7 +59,7 @@ class SinaIE(InfoExtractor):
if mobj.group('token') is not None:
# The video id is in the redirected url
self.to_screen('Getting video id')
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.get_method = lambda: 'HEAD'
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
return self._real_extract(urlh.geturl())

View File

@@ -7,13 +7,11 @@ import hashlib
import uuid
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
unified_strdate,
)
@@ -176,7 +174,7 @@ class SmotriIE(InfoExtractor):
if video_password:
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -339,7 +337,7 @@ class SmotriBroadcastIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
broadcast_page = self._download_webpage(

View File

@@ -6,11 +6,11 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_request,
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -96,7 +96,7 @@ class SohuIE(InfoExtractor):
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
req = compat_urllib_request.Request(base_data_url + vid_id)
req = sanitized_Request(base_data_url + vid_id)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:

View File

@@ -4,13 +4,17 @@ from __future__ import unicode_literals
import re
import itertools
from .common import InfoExtractor
from .common import (
InfoExtractor,
SearchInfoExtractor
)
from ..compat import (
compat_str,
compat_urlparse,
compat_urllib_parse,
)
from ..utils import (
encode_dict,
ExtractorError,
int_or_none,
unified_strdate,
@@ -469,3 +473,60 @@ class SoundcloudPlaylistIE(SoundcloudIE):
'description': data.get('description'),
'entries': entries,
}
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
IE_NAME = 'soundcloud:search'
IE_DESC = 'Soundcloud search'
_MAX_RESULTS = float('inf')
_TESTS = [{
'url': 'scsearch15:post-avant jazzcore',
'info_dict': {
'title': 'post-avant jazzcore',
},
'playlist_count': 15,
}]
_SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query):
limit = min(
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
self._MAX_RESULTS_PER_PAGE)
query['limit'] = limit
query['client_id'] = self._CLIENT_ID
query['linked_partitioning'] = '1'
query['offset'] = 0
data = compat_urllib_parse.urlencode(encode_dict(query))
next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
collected_results = 0
for i in itertools.count(1):
response = self._download_json(
next_url, collection_id, 'Downloading page {0}'.format(i),
'Unable to download API page')
collection = response.get('collection', [])
if not collection:
break
collection = list(filter(bool, collection))
collected_results += len(collection)
for item in collection:
yield self.url_result(item['uri'], SoundcloudIE.ie_key())
if not collection or collected_results >= limit:
break
next_url = response.get('next_href')
if not next_url:
break
def _get_n_results(self, query, n):
tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
return self.playlist_result(tracks, playlist_title=query)

View File

@@ -3,14 +3,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from .brightcove import BrightcoveLegacyIE
from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = {
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
'info_dict': {
'id': '2780937028001',
@@ -31,8 +31,8 @@ class SpaceIE(InfoExtractor):
brightcove_url = self._og_search_video_url(webpage)
except RegexNotFoundError:
# Other videos works fine with the info from the object
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if brightcove_url is None:
raise ExtractorError(
'The webpage does not contain a video', expected=True)
return self.url_result(brightcove_url, BrightcoveIE.ie_key())
return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key())

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import (
sanitized_Request,
str_to_int,
unified_strdate,
)
@@ -51,7 +51,7 @@ class SpankwireIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
req = sanitized_Request('http://www.' + mobj.group('url'))
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_iso8601,
sanitized_Request,
)
@@ -54,7 +52,7 @@ class SportDeutschlandIE(InfoExtractor):
api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
sport_id, video_id)
req = compat_urllib_request.Request(api_url, headers={
req = sanitized_Request(api_url, headers={
'Accept': 'application/vnd.vidibus.v2.html+json',
'Referer': url,
})

View File

@@ -4,10 +4,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import sanitized_Request
class StreamcloudIE(InfoExtractor):
@@ -43,7 +41,7 @@ class StreamcloudIE(InfoExtractor):
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
req = sanitized_Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note='Downloading video page ...')

View File

@@ -5,11 +5,9 @@ import hashlib
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
int_or_none,
sanitized_Request,
)
@@ -54,7 +52,7 @@ class StreamCZIE(InfoExtractor):
video_id = self._match_id(url)
api_path = '/episode/%s' % video_id
req = compat_urllib_request.Request(self._API_URL + api_path)
req = sanitized_Request(self._API_URL + api_path)
req.add_header('Api-Password', _get_api_key(api_path))
data = self._download_json(req, video_id)

View File

@@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
clean_html,
ExtractorError,
float_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -53,7 +51,7 @@ class TapelyIE(InfoExtractor):
display_id = mobj.group('id')
playlist_url = self._API_URL.format(display_id)
request = compat_urllib_request.Request(playlist_url)
request = sanitized_Request(playlist_url)
request.add_header('X-Requested-With', 'XMLHttpRequest')
request.add_header('Accept', 'application/json')
request.add_header('Referer', url)

View File

@@ -139,6 +139,11 @@ class ThePlatformIE(ThePlatformBaseIE):
'upload_date': '20150701',
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
},
}, {
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
# geo-restricted (US), HLS encrypted with AES-128
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True,
}]
@staticmethod
@@ -182,8 +187,12 @@ class ThePlatformIE(ThePlatformBaseIE):
# Seems there's no pattern for the interested script filename, so
# I try one by one
for script in reversed(scripts):
feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
feed_script = self._download_webpage(
self._proto_relative_url(script, 'http:'),
video_id, 'Downloading feed script')
feed_id = self._search_regex(
r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
'default feed id', default=None)
if feed_id is not None:
break
if feed_id is None:
@@ -193,6 +202,15 @@ class ThePlatformIE(ThePlatformBaseIE):
if smuggled_data.get('force_smil_url', False):
smil_url = url
# Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
elif '/guid/' in url:
webpage = self._download_webpage(url, video_id)
smil_url = self._search_regex(
r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
webpage, 'smil url', group='url')
path = self._search_regex(
r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
elif mobj.group('config'):
config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/')

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from .brightcove import BrightcoveLegacyIE
from .discovery import DiscoveryIE
from ..compat import compat_urlparse
@@ -66,6 +66,6 @@ class TlcDeIE(InfoExtractor):
return {
'_type': 'url',
'url': BrightcoveIE._extract_brightcove_url(iframe),
'ie': BrightcoveIE.ie_key(),
'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
'ie': BrightcoveLegacyIE.ie_key(),
}

Some files were not shown because too many files have changed in this diff Show More