Compare commits

...

309 Commits

Author SHA1 Message Date
Philipp Hagemeister
86e8c89488 release 2015.12.09 2015-12-09 15:32:26 +01:00
Jaime Marquínez Ferrándiz
47f48f5d85 [test/test_all_urls] Update pbs extractor name
It's in lowercase now (since e15e2ef7a0).
2015-12-08 21:12:13 +01:00
Sergey M․
e15e2ef7a0 [pbs] Add support for all member stations (#7674) 2015-12-09 01:51:34 +06:00
Sergey M․
d0c8b279da [pbs] Add another coveplayer pattern (Closes #7674) 2015-12-08 23:34:43 +06:00
Sergey M․
612d83b51d [pbs] Extend _VALID_URL 2015-12-08 23:28:36 +06:00
Sergey M
9c30efeb7e Merge pull request #7792 from jindaxia/fix_sohu_403forbidden
[sohu] Fix 403 forbidden
2015-12-08 22:54:14 +06:00
Sergey M․
39fa4cc107 [cliphunter] Fix extraction (Closes #7796) 2015-12-08 21:56:00 +06:00
Sergey M?
b09c122373 [nbc] Add another theplatform pattern 2015-12-08 21:35:42 +06:00
Sergey M
3348243b7b [README.md] Clarify verbose log requirements 2015-12-08 21:34:26 +06:00
Sergey M․
b46b65ed37 [nbc] Smuggle referer (Closes #7791) 2015-12-08 21:16:14 +06:00
Sergey M․
18e4088fad [theplatform] Add support for referer protected videos wuth explicit SMIL 2015-12-08 21:15:45 +06:00
虾哥哥
5fd6cd64f9 [sohu]fix 403 forbidden 2015-12-08 14:14:14 +08:00
Sergey M․
3d24bbfbe4 [YoutubeDL] Check formats for merge to be opposite (#7786) 2015-12-07 23:10:57 +06:00
Sergey M․
1775612512 [wimp] Improve video URL regex 2015-12-07 22:18:00 +06:00
Sergey M․
0d2d967cc7 [wimp] Fix extraction (Closes #7784) 2015-12-07 22:14:45 +06:00
Sergey M․
a5e52a1fd4 [vk] Add test for pladform embed 2015-12-07 22:05:54 +06:00
Sergey M․
291a93bafa [vk] Remove unnecessary message 2015-12-07 22:04:47 +06:00
Sergey M․
c4737bea17 [vk] Add support for pladform embeds (Closes #7780) 2015-12-07 22:03:52 +06:00
Sergey M․
45dad7ba1b [extractor/generic] Use _extract_url for pladform 2015-12-07 22:03:21 +06:00
Sergey M․
db7c9da871 [pladform] Add _extract_url routine 2015-12-07 22:02:45 +06:00
Philipp Hagemeister
bc92621ade release 2015.12.06 2015-12-06 18:51:25 +01:00
Sergey M․
fd8e559c3a [beeg] Switch to api v4 (Closes #7774) 2015-12-06 23:47:10 +06:00
Sergey M․
222e11d4ae [bbc] Add another pattern for playlist.sxml (Closes #7743) 2015-12-06 16:41:12 +06:00
Sergey M․
7d682f0acb [nowtv] Extend _VALID_URL to support jahr URLs (Closes #7755) 2015-12-06 16:18:59 +06:00
Yen Chi Hsuan
8364b6b0b1 [iqiyi] Update key
Closes #7772
2015-12-06 16:41:02 +08:00
Sergey M․
7ac40e5521 [nowvideo] Update test 2015-12-06 09:42:20 +06:00
Sergey M․
36066dd3ee [movshare] Rename to wholecloud 2015-12-06 09:42:00 +06:00
Sergey M․
636aa83ed3 [cloudtime] Add extractor 2015-12-06 09:37:38 +06:00
Sergey M․
33d152b6cc [novamov] Move all novamov based extractors to a single place
For easier navigation
2015-12-06 09:29:41 +06:00
remitamine
51c4fec0d5 [nba] use int_or_none for tbr 2015-12-05 21:04:22 +01:00
remitamine
0017486dca [nba] use int instead of int_or_none 2015-12-05 20:58:44 +01:00
Sergey M․
edc70f4aaf [pluralsight] Fix format code split while guessing quality 2015-12-06 01:40:13 +06:00
Sergey M․
756926ff00 [pluralsight] Add support for widescreen videos (Closes #7766) 2015-12-06 01:39:28 +06:00
remitamine
cb160dd531 [nba] handle format info properly 2015-12-05 18:47:15 +01:00
Jaime Marquínez Ferrándiz
77334ccb44 [metacafe] Fix age limit extraction 2015-12-05 16:12:50 +01:00
Jaime Marquínez Ferrándiz
796db21295 [metacafe] Fix video url extraction (closes #7763) 2015-12-05 16:12:02 +01:00
Philipp Hagemeister
535d7b681b release 2015.12.05 2015-12-05 16:01:37 +01:00
Sergey M․
960e038886 [hypem] Modernize 2015-12-05 20:46:57 +06:00
Sergey M․
ea14422ff1 [hypem] Correctly handle cookies (Closes #7762) 2015-12-05 20:42:21 +06:00
Yen Chi Hsuan
38d05d17e5 [fc2] Fix test_FC2_1 2015-12-05 21:10:26 +08:00
Yen Chi Hsuan
db9bd5267f [keezmovies] Fix extraction
Also fixes #7752
2015-12-05 17:26:13 +08:00
remitamine
ab3b773bbe [acast] change tests into more stable casts and work with channel extractor only if it didn't match cast regex 2015-12-05 10:14:34 +01:00
Yen Chi Hsuan
0bc4ee60e0 [bbc] Fix test_BBC_6 2015-12-05 16:55:53 +08:00
Yen Chi Hsuan
a3ef0e1cdd [bbc.co.uk] Skip removed test video 2015-12-05 16:55:53 +08:00
Yen Chi Hsuan
679bacf0b5 [bbc.co.uk] Fix test_BBCCoUk
This is similar to the one in #7756, So also fixes #7756.
2015-12-05 16:55:53 +08:00
remitamine
02e3952f3b [trilulilu] handle errors 2015-12-05 09:42:00 +01:00
Yen Chi Hsuan
64b7e89c0c [srf] Support audios (closes #7760) 2015-12-05 16:26:30 +08:00
remitamine
bee4c5571a [clipfish] improve extraction 2015-12-04 16:38:05 +01:00
remitamine
96929dd1e8 [skynewsarabia] fix extractor name 2015-12-04 16:23:44 +01:00
remitamine
53e06b2507 [ooyala] fix duration scale 2015-12-04 16:18:02 +01:00
remitamine
b80d4bebf3 [nba] fix extraction errors 2015-12-04 16:04:22 +01:00
Jaime Marquínez Ferrándiz
55bec9b658 [clipfish] Remove unused import and style fix 2015-12-04 14:29:37 +01:00
Jaime Marquínez Ferrándiz
2a63b0f110 [mixcloud] Fix extraction of the audio url (fixes #7751) 2015-12-04 14:26:34 +01:00
remitamine
07b88cffce Merge pull request #7686 from remitamine/acast
[acast] Add new extractor
2015-12-04 09:10:02 +01:00
remitamine
58c8451f36 Merge pull request #7660 from remitamine/gameinformer
[gameinformer] Add new extractor(closes #3376)
2015-12-04 09:03:21 +01:00
remitamine
3047121c63 Merge pull request #7320 from remitamine/adobetv
[adobetv] improve extraction and add support specific language video,show and channel extraction
2015-12-04 08:54:06 +01:00
remitamine
7079f8ff1f [adobetv] use compat_str 2015-12-04 08:44:18 +01:00
remitamine
2c3b9f3570 [adobetv] use a variable for api base url 2015-12-04 08:37:08 +01:00
remitamine
fad2428f47 [gameinformer] split long line 2015-12-04 08:24:04 +01:00
remitamine
c3d3110f6a Merge pull request #7185 from remitamine/ooyala
[ooyala] extract more formats and metadata
2015-12-04 08:23:21 +01:00
remitamine
79ec00276c Merge pull request #7326 from remitamine/clipfish
[clipfish] improve info extraction
2015-12-04 07:57:58 +01:00
remitamine
9c117d345f [nba] improve(fixes #7068)
* extract more formats
* extract videos from team mini sites
* extract more metadata
2015-12-04 07:20:27 +01:00
remitamine
46cc1c65a4 [nba] use xpath utils 2015-12-04 07:09:48 +01:00
remitamine
71d9fe7818 [trilulilu] improve extraction 2015-12-04 06:53:33 +01:00
remitamine
4ccabf93db [trilulilu] fix info extraction 2015-12-04 00:51:02 +01:00
remitamine
6612a34939 [bilibili] flake8 2015-12-03 22:43:19 +01:00
remitamine
e5b4225f7c [audimedia] flake8 2015-12-03 22:25:08 +01:00
remitamine
b2ca35ddbc Merge pull request #7745 from remitamine/bilibili
[bilibili] use xpath_text and catch errors in xml document
2015-12-03 22:11:41 +01:00
remitamine
76ab842d9b [bilibili] use xpath_text and catch errors in xml document 2015-12-03 22:01:32 +01:00
remitamine
24dc1ed715 Merge pull request #7659 from remitamine/audimedia
[audimedia] Add new extractor(closes #7654)
2015-12-03 20:28:52 +01:00
remitamine
682d8dcd21 Merge pull request #7210 from remitamine/bilibili
[bilibili] fix info extraction(fixes #7182)
2015-12-03 20:16:54 +01:00
remitamine
640bb54e73 Merge branch 'master' of https://github.com/rg3/youtube-dl into bilibili 2015-12-03 20:05:11 +01:00
Sergey M․
e0977d7686 [beeg] Decrypt URL (Closes #7736) 2015-12-04 00:59:32 +06:00
remitamine
112ab398db Merge pull request #7681 from remitamine/skynewarabia
[skynewsarabia] Add new extractor
2015-12-03 18:41:38 +01:00
Sergey M․
af93fcfa05 [beeg] Update API URL (Closes #7736) 2015-12-03 23:23:36 +06:00
Sergey M․
62d231c004 [extractor/common] Clarify duration can be float 2015-12-03 20:55:02 +06:00
Sergey M․
49358274d7 [bbc] Fix _VALID_URL 2015-12-03 20:49:14 +06:00
Jaime Marquínez Ferrándiz
7b1e379ca9 [gametrailers] Fix extraction (fixes #7722)
They have stopped using the MTV system.
2015-12-03 13:47:21 +01:00
Sergey M․
22d7368dfb [bbc] Extract _ID_REGEX and ad one more video id pattern (Closes #7724) 2015-12-02 02:34:31 +06:00
Sergey M․
24121bc703 [udemy] Make lecture downloading fatal 2015-12-02 00:53:03 +06:00
Sergey M․
9fc87fa767 [udemy] Remove unused import 2015-12-02 00:51:47 +06:00
Sergey M․
328f82d59a [udemy] Semi-switch to api 2.0 (Closes #7704)
* Use api 2.0 to get lectures since it provides more formats
* Fix authorization for api 2.0
* Autotry enrolling in the course for single lectures
* Extract additional metadata rom asset['data']['outputs']
2015-12-02 00:48:27 +06:00
Sergey M․
78717fc328 [udemy] Allow authentication via cookies 2015-12-01 22:10:10 +06:00
Sergey M․
3b35c3425e [udemy] Extract formats from data.outputs (#7704) 2015-12-01 20:35:46 +06:00
Sergey M․
874ae0354e [nrk] Extract f4m formats and impose geo restriction only when not media URL (Closes #7715) 2015-12-01 18:35:24 +06:00
Sergey M․
4c6b4764f0 [youtube] Clarify itag 272 possible resolutions (#7699) 2015-11-30 20:42:05 +06:00
Sergey M․
59ee8a8647 [facebook] Make alternative title optional (Closes #7700) 2015-11-30 20:10:09 +06:00
Sergey M․
af284305d5 [vodlocker] Capture file not found error (Closes #7696) 2015-11-30 03:58:39 +06:00
Sergey M․
d53a4af1a4 [pornhub:playlist] Allow alphanumeric viewkeys (Closes #7695) 2015-11-30 03:47:01 +06:00
Sergey M․
2e1b928540 [youtube:playlist] Extend _VALID_URL 2015-11-29 21:04:11 +06:00
Sergey M․
040ac68679 [youtube] Extend _VALID_URL (Closes #7694) 2015-11-29 21:01:59 +06:00
Yen Chi Hsuan
049d71d874 [youtube] Simplify and make sure header values are strings 2015-11-29 19:52:48 +08:00
Sergey M․
bf2c8c8f82 [spiegel] Fix extraction (Closes #7693) 2015-11-29 17:03:33 +06:00
Yen Chi Hsuan
ef428960c9 Merge pull request #7691 from ryandesign/use-PYTHON-env-var
Always use PYTHON env var in Makefile
2015-11-29 13:08:46 +08:00
Yen Chi Hsuan
992fc9d6e1 [utils] Refactor handle_youtubedl_headers for future extension 2015-11-29 12:58:29 +08:00
Ryan Schmidt
8639f89f51 Always use PYTHON env var in Makefile 2015-11-28 22:56:24 -06:00
Yen Chi Hsuan
0424ec307b [utils] Correct docstring of YoutubeDLHandler 2015-11-29 12:46:04 +08:00
Yen Chi Hsuan
ac5a69af45 [youtube] Disable compression for live streams 2015-11-29 12:44:24 +08:00
Yen Chi Hsuan
94e8c80473 [downloader/hls] Respect Youtubedl-* headers 2015-11-29 12:43:59 +08:00
Yen Chi Hsuan
87f0e62d94 [utils] Separate codes for handling Youtubedl-* headers 2015-11-29 12:42:50 +08:00
remitamine
46b4070f3f Merge pull request #7057 from remitamine/cspan
[cspan] correct the clip info extraction (fixes #7335)
2015-11-28 21:36:52 +01:00
remitamine
2a776f9788 [cspan] change into a function 2015-11-28 20:22:31 +01:00
remitamine
f4c7ef9862 [skynewsarabia] return empty categories array if there is no topic 2015-11-28 18:20:44 +01:00
remitamine
50e12e9df1 [acast] Add new extractor 2015-11-28 18:10:37 +01:00
Sergey M․
b7faebbac8 [bloomberg] Improve formats extraction 2015-11-28 22:45:19 +06:00
Sergey M․
4191fdf147 [bloomberg] Improve video id regex 2015-11-28 22:41:39 +06:00
Sergey M․
9a4f12be98 [bloomberg] Modernize 2015-11-28 22:40:29 +06:00
Sergey M․
7ad4258add [bloomberg] Relax _VALID_URL even more (Closes #7685) 2015-11-28 22:39:36 +06:00
Sergey M․
9945c4994c Credit @reiv for soundcloud:search 2015-11-28 20:21:03 +06:00
Sergey M․
5faf9fed7e [youtube] Clarify rationale for yt:stretch validation 2015-11-28 18:50:21 +06:00
Sergey M
13a9b69b09 Merge pull request #7677 from lalinsky/yt-stretch-zero-height
[youtube] Ignore yt:stretch with zero width/height
2015-11-28 18:14:06 +06:00
remitamine
4975650e00 [skynewsarabia] fix IE_NAME 2015-11-28 12:20:39 +01:00
remitamine
0cc7178546 [skynewsarabia] Add new extractor 2015-11-28 11:48:18 +01:00
Lukáš Lalinský
41f24c321d [youtube] Use the existing w and h variables 2015-11-28 08:16:46 +01:00
Yen Chi Hsuan
4b3fbafdd2 [options] Changed wording for --list-formats
As proposed by @dstftw at 9bff48a0e7
2015-11-28 14:14:20 +08:00
Sergey M․
7ac40086f5 [dbtv] Expand _VALID_URL (Closes #7645) 2015-11-28 08:44:13 +06:00
Lukáš Lalinský
313dfc45f5 [youtube] Ignore yt:stretch with zero width/height 2015-11-28 01:07:07 +01:00
Philipp Hagemeister
78a55d7a28 release 2015.11.27.1 2015-11-27 16:39:59 +01:00
Philipp Hagemeister
bb6ac83698 release 2015.11.27 2015-11-27 16:32:51 +01:00
Yen Chi Hsuan
9d0e366880 [downloader/hls] Remove Accept-encoding from headers passed to ffmpeg
Fails for Youtube Gaming live streams (#7671)
2015-11-27 21:37:45 +08:00
Yen Chi Hsuan
9bff48a0e7 [options] Clarify --list-formats needs videos (closes #7669) 2015-11-27 21:24:39 +08:00
remitamine
60121eb514 [gameinformer] Add new extractor 2015-11-26 22:43:31 +01:00
remitamine
527ca1da4f [audimedia] Add new extractor(closes #7654) 2015-11-26 21:24:10 +01:00
Sergey M
7689413e42 [README.md] Mention mplayer and mpv in "other programs" question 2015-11-24 23:06:21 +06:00
Philipp Hagemeister
ba7a92b0ce release 2015.11.24 2015-11-24 07:46:38 +01:00
Philipp Hagemeister
4c7d816dd7 [jsinterp] Adapt to updated YouTube code generation (Fixes #7623, fixes #7624, fixes #7625, fixes #7626) 2015-11-24 07:45:38 +01:00
Philipp Hagemeister
032f2f260f README: Document which other programs may be helpful (Fixes #7621) 2015-11-24 03:38:46 +01:00
Philipp Hagemeister
20e98bf6c0 release 2015.11.23 2015-11-23 18:07:58 +01:00
Sergey M?
5c2266df4b Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8
2015-11-23 21:56:23 +06:00
Sergey M․
67dda51722 Rename compat_urllib_request_Request to sanitized_Request and move to utils 2015-11-23 21:55:15 +06:00
Sergey M․
e4c4bcf36f [vimeo] Use compat_urllib_request_Request 2015-11-23 21:55:14 +06:00
Sergey M․
82d8a8b6e2 [YoutubeDL] Wrap plain-text URL requests in compat_urllib_request_Request 2015-11-23 21:55:13 +06:00
Sergey M․
13a10d5aa3 [compat] Add compat_urllib_request_Request
This is actually not a compatibility routine but rather a workaround for URLs without protocol specified.
The protocol-less URL is treated as HTTP one since it's most probable scenario and it will most likely to
redirect to HTTPS if HTTPS was actually expected. This routine could also be useful for any Request
preprocessing that may be added in future.
2015-11-23 21:55:12 +06:00
Sergey M․
9022726446 [youtube] Fix test 2015-11-23 21:37:21 +06:00
Sergey M․
94bfcd23b7 [youtube] Fix test 2015-11-23 21:35:23 +06:00
Sergey M․
526b3b0716 [youtube] Clarify ytplayer.config extraction rationale 2015-11-23 21:14:03 +06:00
Sergey M․
61f92af1cf [youtube] Add test with '};' in tags 2015-11-23 21:02:37 +06:00
Sergey M․
a72778d364 [youtube] Improve ytplayer.config extraction 2015-11-23 21:00:06 +06:00
Sergey M
5ae17037a3 Merge pull request #7599 from lalinsky/fix-youtube
[youtube] More explicit player config JSON extraction (fixes #7468)
2015-11-23 20:52:23 +06:00
Sergey M․
02f0da20b0 [pluralsight] Add support for alternative webpage layout (Closes #7607) 2015-11-23 03:08:38 +06:00
Lukáš Lalinský
b41631c4e6 [youtube] Send the list of patterns directly to _search_regex 2015-11-22 13:53:26 +01:00
Lukáš Lalinský
0e49d9a6b0 [youtube] Fall back to the original regex for ytplayer.config 2015-11-22 13:49:33 +01:00
Sergey M․
4a7d108ab3 [rutube] Remove unnecessary print 2015-11-22 18:24:17 +06:00
Lukáš Lalinský
3cfd000849 [youtube] More explicit player config JSON extraction (fixes #7468) 2015-11-22 13:14:35 +01:00
Sergey M․
1b38185361 [pornhd] Fix title extraction (Closes #7596) 2015-11-22 18:08:30 +06:00
Sergey M․
9cb9a5df77 [utils] Check ext with trailing slash against the list of known extensions 2015-11-22 17:27:13 +06:00
Sergey M․
5035536e3f [test_utils] Add tests for determine_ext 2015-11-22 06:33:52 +06:00
Sergey M․
3e12bc583a [utils] Improve determine_ext (Closes #7593) 2015-11-22 06:29:39 +06:00
Sergey M․
e568c2233e [youtube] Add test for multi page list of playlists 2015-11-22 05:03:23 +06:00
Sergey M․
061a75edd6 [youtube] Extract base for entry list extractors and support multi page lists of playlists 2015-11-22 05:01:01 +06:00
Philipp Hagemeister
82c4d7b0ce release 2015.11.21 2015-11-21 23:36:27 +01:00
Sergey M․
136dadde95 [youtube:show] Rework in terms of playlists base extractor 2015-11-22 04:18:20 +06:00
Sergey M․
0c14841585 [youtube:user:playlists] Add extractor (Closes #3817) 2015-11-22 04:17:07 +06:00
Sergey M․
0eebf34d9d [pluralsight] Rephrase 2015-11-22 00:58:25 +06:00
Sergey M․
cf186b77a7 [pluralsight] Clarify allowed qualities guessing rationale 2015-11-22 00:56:40 +06:00
Sergey M․
a3372437bf [soundcloud] Remove unused variable 2015-11-22 00:49:58 +06:00
Sergey M․
4c57b4853d [pluralsight] Until listing formats request only single format 2015-11-22 00:42:58 +06:00
Sergey M․
38eb2968ab [pluralsight] Clarify and randomize ViewClip sleep interval 2015-11-22 00:07:09 +06:00
Andrzej Lichnerowicz
bea56c9569 [pluralsight] prevent error 429 when sensing video formats 2015-11-21 23:49:58 +06:00
Sergey M․
7e508ff2cf [pluralsight] Improve login detection 2015-11-21 21:49:37 +06:00
Sergey M․
563772eda4 [pluralsight] Extract base class 2015-11-21 21:37:29 +06:00
Sergey M․
0533915aad [pluralsight] Update some more URLs 2015-11-21 21:35:08 +06:00
Sergey M․
c3a227d1c4 [pluralsight] Update _LOGIN_URL 2015-11-21 21:25:48 +06:00
Sergey M․
f6c903e708 [soundcloud:search] Simplify (Closes #7213) 2015-11-21 21:21:21 +06:00
Sergey M․
7dc011c063 [soundcloud:search] Remove no track results message 2015-11-21 21:00:42 +06:00
Sergey M․
4e3b303016 [soundcloud:search] Fix non-ASCII searches 2015-11-21 20:55:48 +06:00
Sergey M․
7e1f5447e7 [utils] Improve encode_dict 2015-11-21 20:46:33 +06:00
Sergey M․
7e3472758b [soundcloud:search] PEP 8 2015-11-21 20:04:35 +06:00
reiv
328a22e175 [soundcloud] Remove limit on search results 2015-11-21 19:41:36 +06:00
reiv
417b453699 [soundcloud] Use correct error message conventions 2015-11-21 19:41:31 +06:00
reiv
6ea7190a3e Rewrite as list comprehension. 2015-11-21 19:41:26 +06:00
reiv
b54b08c91b Simplify with itertools.islice(). 2015-11-21 19:41:19 +06:00
reiv
c30943b1c0 Fix some compatibility issues, cleanup. 2015-11-21 19:41:15 +06:00
reiv
2abf7cab80 [soundcloud] Add Soundcloud search extractor 2015-11-21 19:41:08 +06:00
Sergey M․
4137196899 [rutube] Extract all formats 2015-11-21 18:02:52 +06:00
Sergey M․
019839faaa [extractor/common] Use baseURL from f4m manifest for recursive manifest extraction 2015-11-21 18:01:39 +06:00
Sergey M․
f52183a878 [rutube:embed] Extend _VALID_URL (Closes #7588) 2015-11-21 17:39:24 +06:00
Yen Chi Hsuan
750b9ff032 [generic] Extract M3U8 formats (closes #7582) 2015-11-21 16:43:01 +08:00
Yen Chi Hsuan
28602e747c [generic] Refactor 2015-11-21 16:08:54 +08:00
Yen Chi Hsuan
6cc37c69e2 [generic] Unescape URLs from JWPlayer (#7582) 2015-11-21 14:12:34 +08:00
Sergey M․
a5cd0eb8a4 [pluralsight:course] Improve _VALID_URL 2015-11-21 08:32:48 +06:00
Sergey M․
c23e266427 [pluralsight] Do not require pluralsight account
Looks like some courses are available without pluralsight account
2015-11-21 08:25:52 +06:00
Sergey M․
651acffbe5 [pluralsight] Update ViewClip URL 2015-11-21 08:21:33 +06:00
Sergey M․
71bd93b89c [pluralsight] Do not rely on argument order in query (Closes #7583) 2015-11-21 08:08:34 +06:00
Sergey M․
6da620de58 [kaltura] Add test for referrer protected video (#7409) 2015-11-21 01:40:28 +06:00
Sergey M․
bdceea7afd [kaltura] Clean description 2015-11-21 01:39:29 +06:00
Sergey M․
d80a39cec8 [kaltura] Improve 2015-11-21 01:38:08 +06:00
Sergey M․
5b5fae5f20 [generic] Use referrer from source kaltura embed URLs (#7409) 2015-11-21 01:35:58 +06:00
Sergey M․
01b06aedcf [kaltura] Add support for referrer protected videos (#7409) 2015-11-21 01:34:02 +06:00
Sergey M
c711383811 Merge pull request #7579 from ashutosh-mishra/typo_fix
Typo fix, found while going through the code.
2015-11-20 23:24:54 +06:00
ashutosh-mishra
17cc153435 Typo fix, found while going through the code. 2015-11-20 22:51:46 +05:30
Sergey M․
67446fd49b [instagram] Improve _VALID_URL (Closes #7568) 2015-11-20 04:07:39 +06:00
Sergey M․
325bb615a7 [theplatform] Style 2015-11-19 22:58:43 +06:00
Sergey M․
ee5cd8418e [theplatform] Handle protocolless feed URLs (Closes #7532) 2015-11-19 22:58:29 +06:00
Sergey M․
342609a1b4 [bloomberg] Reax _VALID_URL (Closes #7546) 2015-11-19 22:55:06 +06:00
Sergey M
f270cf1a26 Merge pull request #7519 from barlik/master
Clarify that automatic subtitles are generated.
2015-11-19 22:44:08 +06:00
hedii
371c3b796c [YoutubeDL] Add playlist finished downloading message (Closes #7517)
Conflicts:
	youtube_dl/YoutubeDL.py
2015-11-19 22:39:02 +06:00
Sergey M․
6b7ceee1b9 [vimeo] Add test for #7552 2015-11-19 22:31:16 +06:00
Sergey M․
fdb20a27a3 [vimeo:group] Improve _VALID_URL (Closes #7552) 2015-11-19 22:30:58 +06:00
Sergey M․
2c94198eb6 [vimeo] Improve playlists extraction 2015-11-19 21:29:32 +06:00
Philipp Hagemeister
e8110b8125 release 2015.11.19 2015-11-19 15:35:13 +01:00
Yen Chi Hsuan
c39fd7b1ca [UDNEmbed] Fix generic UDN pages
Closes #7547
2015-11-19 22:32:56 +08:00
Sergey M․
a9c09a7c62 [pbs] Update API URL (Closes #7565) 2015-11-19 20:25:28 +06:00
Philipp Hagemeister
82beaabb41 release 2015.11.18 2015-11-18 19:23:04 +01:00
Jaime Marquínez Ferrándiz
63b4295d20 [youtube:playlist] fix title extraction (fixes #7544 and #7545) 2015-11-18 18:28:05 +01:00
Sergey M․
312a3f389b [pbs] Extend _VALID_URL 2015-11-18 00:46:41 +06:00
Jaime Marquínez Ferrándiz
609af1ae1c [dplay] Add 'encoding: utf-8' line 2015-11-17 17:58:16 +01:00
Jaime Marquínez Ferrándiz
4cd759f73d [dplay] Add extractor (closes #7515)
Since I haven't figured out how to download the hds stream, we use the hls one instead.
2015-11-17 17:52:29 +01:00
Jaime Marquínez Ferrándiz
e156e70281 [rtve] Remove unused import 2015-11-17 16:23:29 +01:00
Sergey M․
9b464929fe [rtve.es:alacarta] Fix extraction 2015-11-17 21:11:42 +06:00
Sergey M
0c176d7bde Merge pull request #7514 from ping/patch-7301
[neteasemusic] Fixes #7301
2015-11-16 14:25:29 +00:00
Sergey M․
7a3f0c00ad [utils] Style 2015-11-16 20:24:09 +06:00
Sergey M․
7aefc49c40 [utils] Skip invalid/non HTML entities (Closes #7518) 2015-11-16 20:20:16 +06:00
Rastislav Barlik
741dd8ea65 Clarify that automatic subtitles are generated.
It wasn't clear what automatic word mean.
2015-11-16 14:15:25 +00:00
ping
76adc82068 [neteasemusic] Fixes #7301 2015-11-16 11:39:18 +08:00
Philipp Hagemeister
bd1512d196 release 2015.11.15 2015-11-15 22:16:08 +01:00
Sergey M․
9a4acbfaf5 [theplatform] Add test for #7385 2015-11-16 00:28:04 +06:00
Sergey M․
ad1f4e7902 [theplatform] Handle explicitly specified SMIL (#7385) 2015-11-15 23:43:23 +06:00
Sergey M
b328295910 Merge pull request #7436 from davidbz/add_proxy_to_update_procedure
Add proxy support for update_self
2015-11-15 11:13:22 +00:00
David Ben Zakai
828b2a5cd9 Removing an unnecessary import 2015-11-15 09:40:32 +02:00
Sergey M․
2ff7cbeaaa [nowtv:list] Add extrator (Closes #7147) 2015-11-15 08:30:13 +06:00
Sergey M․
b2f7738830 [dumpert] Use original protocol 2015-11-15 02:25:00 +06:00
Sergey M․
dc0279532a [dumpert] Disable SSL (Closes #7504) 2015-11-15 02:21:24 +06:00
Sergey M․
0c59d02bdc [periscope] Relax _VALID_URL (Closes #7503) 2015-11-15 00:20:17 +06:00
Jaime Marquínez Ferrándiz
0f72beb515 [periscope] Remove unused imports 2015-11-14 18:31:33 +01:00
Sergey M․
d781e29316 [bbc] Allow selectionunavailable errors (Closes #7502) 2015-11-14 23:08:13 +06:00
Sergey M․
3b3e8ed332 [quickscope] Remove extractor (2) 2015-11-14 22:34:30 +06:00
Sergey M․
dcdfeb33d2 [quickscope] Remove extractor 2015-11-14 22:32:54 +06:00
Sergey M․
0d85c3a732 [lynda] Style 2015-11-14 16:44:24 +06:00
Sergey M․
903d136942 [lynda] Logout only when login info present (Closes #7500) 2015-11-14 16:43:58 +06:00
Yen Chi Hsuan
9d584da7d0 [xfileshare] Correct _VALID_URL 2015-11-14 17:27:32 +08:00
Yen Chi Hsuan
31752f76f7 [twitter:card] Add add_ie for the external test 2015-11-14 17:03:26 +08:00
Yen Chi Hsuan
5f1b2aea80 [twitter:card] Support vine.co embeds (closes #7496) 2015-11-14 17:02:07 +08:00
Sergey M․
4479600d57 [instagram] Add test for #7497 2015-11-14 07:21:20 +06:00
Sergey M․
a90189c3ad [instagram] Relax _VALID_URL (Closes #7497) 2015-11-14 07:20:33 +06:00
Sergey M․
d8a1caf04f [brightcove:new] Style 2015-11-14 06:22:12 +06:00
Sergey M․
cb33d389ed [brightcove:new] Add test with rtmp streams 2015-11-14 06:20:09 +06:00
Sergey M․
967e0955f0 Merge branch 'remitamine-brightcove_in_page_embed' 2015-11-14 06:11:49 +06:00
Sergey M․
e01b432ad3 [brightcove:new] Fix test 2015-11-14 06:11:17 +06:00
Sergey M․
fd91257c40 [brightcove] Order imports alphabetically 2015-11-14 06:08:36 +06:00
Sergey M․
c7b959ce38 [utils] Remove unused function 2015-11-14 06:07:44 +06:00
Sergey M․
75eac8961e [brightcove] Remove unused import 2015-11-14 06:07:24 +06:00
Sergey M․
3b7d9aa487 Rename all references to legacy studio Brightcove extractor 2015-11-14 06:05:46 +06:00
Sergey M․
1f4b722b00 [generic] Clarify Brightcove Legacy Studio comment 2015-11-14 06:03:32 +06:00
Sergey M․
f6519f89b0 [generic] Extract Brightcove New Studio embeds 2015-11-14 06:03:07 +06:00
Sergey M․
24af85298e [brightcove] Fix _extract_urls 2015-11-14 06:01:56 +06:00
Sergey M․
e721d857c2 [brightcove] Clarify IE_NAMEs 2015-11-14 05:56:51 +06:00
Sergey M․
5c17f0a67a [brightcove:embedinpage] Rename extractor to brightcove new
It's not actually embed_in_page but "New Studio" and allows both iframe and embed_in_page embeds
2015-11-14 05:55:59 +06:00
Sergey M․
4fcaa4f4a5 [brightcove] Rename extractor to brightcove legacy
Old embedding approaches are now "Legacy Studio"
2015-11-14 05:54:16 +06:00
Sergey M․
536f819eda [brightcove] Imrove extraction of new embeds 2015-11-14 05:51:05 +06:00
Sergey M․
a662489877 [brightcove:embedinpage] Make more robust and extract rtmp streams 2015-11-14 05:09:50 +06:00
Sergey M․
a2973eb597 Merge branch 'brightcove_in_page_embed' of https://github.com/remitamine/youtube-dl into remitamine-brightcove_in_page_embed 2015-11-14 01:23:15 +06:00
Sergey M․
4e21b3a94f [cbs] Use android UA for higher quality streams (Closes #7490) 2015-11-14 00:25:28 +06:00
Jaime Marquínez Ferrándiz
b703ebeeaf [twitter] Don't fail if the description doesn't contain an URL (fixes #7489) 2015-11-13 19:09:42 +01:00
Jaime Marquínez Ferrándiz
b84a5f0337 [twitter] Update tests checksums 2015-11-13 18:55:07 +01:00
Philipp Hagemeister
a1ec9a7553 release 2015.11.13 2015-11-13 11:07:30 +01:00
Sergey M․
91d644b5ba [ruutu] Relax formats extraction 2015-11-13 02:43:27 +06:00
Sergey M․
5d6c3d6a66 [ruutu] Skip NOT-USED URLs(Closes #7478) 2015-11-13 02:41:38 +06:00
Jaime Marquínez Ferrándiz
1ebb4717df [cbsnews] Fix construction of 'play_path' in some videos (fixes #7394) 2015-11-12 21:02:56 +01:00
Yen Chi Hsuan
cf5881fc4d Credit @ferama
For providing idea for vidto.me (#7167) and extending nowvideo support (#6760)
2015-11-12 21:33:46 +08:00
Sergey M․
fcd817a326 [vimeo] Fix extraction (Closes #7460) 2015-11-12 03:56:11 +06:00
Sergey M․
031ec536f0 [gorillavid] Rename to xfileshare 2015-11-11 23:00:53 +06:00
Sergey M․
668db403f9 [gorillavid] Add test for vidto.me and strip title 2015-11-11 22:47:28 +06:00
Sergey M․
b9ad101926 [gorillavid] Add support for vidto.me 2015-11-11 22:44:03 +06:00
Sergey M․
435911029f [vidto] Remove extractor 2015-11-11 22:43:17 +06:00
Sergey M․
699ed30cee [novamov] Modernize 2015-11-11 22:34:49 +06:00
Sergey M․
9eab37dca0 [vimeo] Simplify set cookie 2015-11-11 22:32:13 +06:00
Sergey M․
9a8a12b7d8 [vimeo] Append cookies instead of overriding 2015-11-11 22:23:23 +06:00
Yen Chi Hsuan
a4c2ab35c1 Merge remote-tracking branch 'upstream/master' 2015-11-12 00:08:42 +08:00
Sergey M․
3d9c4bf09a [vimeo] Fix password protected videos (Closes #7451) 2015-11-11 21:21:21 +06:00
Yen Chi Hsuan
8b8a39e279 [vidto] Several simplifications and improvements
1. Use InfoExtractor._hidden_inputs
2. Fetch title from <title> tag
3. Cookies are preserved automatically
4. Use single quotes everywhere
5. Do not declare variables for one-time use only
2015-11-11 23:17:59 +08:00
Sergey M․
82393e2bb2 [novamov] Follow continue-to-the-video button if any (Closes #7330) 2015-11-11 21:02:05 +06:00
Sergey M․
2eb99a4b98 [nowvideo] Replace main host to resolvable one 2015-11-11 21:00:23 +06:00
Yen Chi Hsuan
6abce58a12 Credit @sieben for fixing wsj extractor 2015-11-11 20:16:18 +08:00
Yen Chi Hsuan
990e6e8fa3 [vidto] Minor fixes
1. import order
2. fatal is already True in helper functions
2015-11-11 20:13:03 +08:00
Yen Chi Hsuan
bfd88516eb Merge pull request #7454 from sieben/duplicate_keys
Remove duplicate key
2015-11-11 20:00:13 +08:00
Rémy Léone
d8b7e80d29 Remove duplicate key 2015-11-11 12:00:31 +01:00
Yen Chi Hsuan
37120974dc [vidto] PEP8 2015-11-11 02:02:46 +08:00
Marco Ferragina
42fc93c709 vidto extractor: code cleanup 2015-11-11 01:58:47 +08:00
Marco Ferragina
a625e56543 [vidto] Add extractor 2015-11-11 01:52:43 +08:00
Sergey M․
9b738b2caa [funnyordie] Fix extraction and extract m3u8 formats 2015-11-10 21:32:54 +06:00
David Ben Zakai
90bb5667bf Using internal opener 2015-11-10 17:15:23 +02:00
David Ben Zakai
d3d3e2e3aa Adding proxy to update procedure 2015-11-10 16:31:42 +02:00
remitamine
9550ca506f [utils] change extract_attributes to work in python 2 2015-10-31 19:36:04 +01:00
remitamine
240384afe6 [clipfish] improve info extraction 2015-10-30 20:06:38 +01:00
remitamine
9a605c8859 [adobetv] add support for show and channel extraction 2015-10-29 20:00:27 +01:00
remitamine
402ca40c9d [adobetv] extract AdobeTVVideo info from json directly 2015-10-29 19:55:04 +01:00
remitamine
30bd1c16c8 [adobetv] use api for extraction and add support specific language videos 2015-10-29 19:44:26 +01:00
remitamine
497f5fd93f [bilibili] extract multiple backup_urls 2015-10-21 08:24:05 +01:00
remitamine
4bf5614195 [cspan] move get_text_attr to CSpanIE 2015-10-20 07:43:39 +01:00
remitamine
520e753390 [bilibili] add support for specefic page extraction 2015-10-17 23:12:58 +01:00
remitamine
355c7ad361 [cspan] handle error massages and extract qualities 2015-10-17 21:30:38 +01:00
remitamine
55af2b26e0 [bilibili] extract backup url 2015-10-17 18:30:51 +01:00
remitamine
d90e40305b [bilibili] fix info extraction 2015-10-17 17:28:09 +01:00
remitamine
cce9d15d01 [ooyala] extract domain,handle errors and change related tests 2015-10-16 16:02:40 +01:00
remitamine
dd414c970b [ooyala] fix sorting and format id 2015-10-16 10:12:42 +01:00
remitamine
497ca088a6 [ooyala] remove print statment 2015-10-15 14:37:05 +01:00
remitamine
90bddb6cdd [ooyala] extract more formats and metadata 2015-10-15 14:28:56 +01:00
remitamine
6a11bb77ba [nba] add support for team subsites 2015-10-07 12:17:32 +01:00
remitamine
ecf6de5b02 [nba] extract width,height and bitrate from format key 2015-10-07 07:09:45 +01:00
remitamine
139f27827e [nba] skip Legacy Video Files 2015-10-07 06:53:19 +01:00
remitamine
30787f7259 [cspan] correct the clip info extraction 2015-10-03 19:28:48 +01:00
remitamine
c233e6bcc3 [nba] extract video info from xml feed 2015-10-03 12:30:05 +01:00
remitamine
28809ab07a [nba] extract more formats 2015-10-03 09:47:19 +01:00
remitamine
8fc226ef99 [nba] extract all video formats and extract more info 2015-10-02 17:24:30 +01:00
remitamine
c01e1a96aa [brightcove] fix test and fields extraction 2015-09-30 11:20:43 +01:00
remitamine
53407e3f38 [brightcove] fix streaming_src extraction 2015-09-23 14:02:13 +01:00
remitamine
ed1269000f [brightcove] add support for brightcove in page embed(fixes #6824) 2015-09-11 04:46:21 +01:00
remitamine
689fb748ee [utlis] add extract_attributes for extracting html tags attributes 2015-09-11 04:44:17 +01:00
166 changed files with 3263 additions and 1411 deletions

View File

@@ -144,3 +144,6 @@ Lee Jenkins
Anssi Hannula
Lukáš Lalinský
Qijiang Fan
Rémy Léone
Marco Ferragina
reiv

View File

@@ -1,4 +1,18 @@
**Please include the full output of youtube-dl when run with `-v`**.
**Please include the full output of youtube-dl when run with `-v`**, i.e. add `-v` flag to your command line, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
$ youtube-dl -v http://www.youtube.com/watch?v=BaW_jenozKcj
[debug] System config: []
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e
[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
...
```
**Do not post screenshots of verbose log only plain text is acceptable.**
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.

View File

@@ -61,34 +61,34 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
chmod a+x youtube-dl
README.md: youtube_dl/*.py youtube_dl/*/*.py
COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
COLUMNS=80 $(PYTHON) youtube_dl/__main__.py --help | $(PYTHON) devscripts/make_readme.py
CONTRIBUTING.md: README.md
python devscripts/make_contributing.py README.md CONTRIBUTING.md
$(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
supportedsites:
python devscripts/make_supportedsites.py docs/supportedsites.md
$(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
README.txt: README.md
pandoc -f markdown -t plain README.md -o README.txt
youtube-dl.1: README.md
python devscripts/prepare_manpage.py >youtube-dl.1.temp.md
$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
rm -f youtube-dl.1.temp.md
youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
python devscripts/bash-completion.py
$(PYTHON) devscripts/bash-completion.py
bash-completion: youtube-dl.bash-completion
youtube-dl.zsh: youtube_dl/*.py youtube_dl/*/*.py devscripts/zsh-completion.in
python devscripts/zsh-completion.py
$(PYTHON) devscripts/zsh-completion.py
zsh-completion: youtube-dl.zsh
youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
python devscripts/fish-completion.py
$(PYTHON) devscripts/fish-completion.py
fish-completion: youtube-dl.fish

View File

@@ -319,7 +319,8 @@ which means you can modify it, redistribute it or use it however you like.
--all-formats Download all available video formats
--prefer-free-formats Prefer free video formats unless a specific
one is requested
-F, --list-formats List all available formats
-F, --list-formats List all available formats of requested
videos
--youtube-skip-dash-manifest Do not download the DASH manifests and
related data on YouTube videos
--merge-output-format FORMAT If a merge is required (e.g.
@@ -329,8 +330,8 @@ which means you can modify it, redistribute it or use it however you like.
## Subtitle Options:
--write-sub Write subtitle file
--write-auto-sub Write automatic subtitle file (YouTube
only)
--write-auto-sub Write automatically generated subtitle file
(YouTube only)
--all-subs Download all the available subtitles of the
video
--list-subs List all available subtitles for the video
@@ -534,6 +535,12 @@ Most people asking this question are not aware that youtube-dl now defaults to d
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
### Do I need any other programs?
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
### I have downloaded a video but how can I play it?
Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
@@ -793,7 +800,21 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode.
**Please include the full output of youtube-dl when run with `-v`**.
**Please include the full output of youtube-dl when run with `-v`**, i.e. add `-v` flag to your command line, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
$ youtube-dl -v http://www.youtube.com/watch?v=BaW_jenozKcj
[debug] System config: []
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e
[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
...
```
**Do not post screenshots of verbose log only plain text is acceptable.**
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.

File diff suppressed because one or more lines are too long

View File

@@ -121,8 +121,8 @@ class TestAllURLsMatching(unittest.TestCase):
def test_pbs(self):
# https://github.com/rg3/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
def test_yahoo_https(self):
# https://github.com/rg3/youtube-dl/issues/2701

View File

@@ -21,6 +21,7 @@ from youtube_dl.utils import (
clean_html,
DateRange,
detect_exe_version,
determine_ext,
encodeFilename,
escape_rfc3986,
escape_url,
@@ -210,8 +211,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unescapeHTML('%20;'), '%20;')
self.assertEqual(unescapeHTML('&#x2F;'), '/')
self.assertEqual(unescapeHTML('&#47;'), '/')
self.assertEqual(
unescapeHTML('&eacute;'), 'é')
self.assertEqual(unescapeHTML('&eacute;'), 'é')
self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
def test_daterange(self):
_20century = DateRange("19000101", "20000101")
@@ -238,6 +239,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
def test_find_xpath_attr(self):
testxml = '''<root>
<node/>

View File

@@ -28,6 +28,7 @@ if os.name == 'nt':
import ctypes
from .compat import (
compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_get_terminal_size,
@@ -63,6 +64,7 @@ from .utils import (
SameFileError,
sanitize_filename,
sanitize_path,
sanitized_Request,
std_headers,
subtitles_filename,
UnavailableVideoError,
@@ -156,7 +158,7 @@ class YoutubeDL(object):
writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
@@ -833,6 +835,7 @@ class YoutubeDL(object):
extra_info=extra)
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
return ie_result
elif result_type == 'compat_list':
self.report_warning(
@@ -937,7 +940,7 @@ class YoutubeDL(object):
filter_parts.append(string)
def _remove_unused_ops(tokens):
# Remove operators that we don't use and join them with the sourrounding strings
# Remove operators that we don't use and join them with the surrounding strings
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None
@@ -1107,6 +1110,12 @@ class YoutubeDL(object):
'contain the video, try using '
'"-f %s+%s"' % (format_2, format_1))
return
# Formats must be opposite (video+audio)
if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
self.report_error(
'Both formats %s and %s are video-only, you must specify "-f video+audio"'
% (format_1, format_2))
return
output_ext = (
formats_info[0]['ext']
if self.params.get('merge_output_format') is None
@@ -1186,7 +1195,7 @@ class YoutubeDL(object):
return res
def _calc_cookies(self, info_dict):
pr = compat_urllib_request.Request(info_dict['url'])
pr = sanitized_Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
@@ -1870,6 +1879,8 @@ class YoutubeDL(object):
def urlopen(self, req):
""" Start an HTTP download """
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):

View File

@@ -377,7 +377,7 @@ def _real_main(argv=None):
with YoutubeDL(ydl_opts) as ydl:
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
update_self(ydl.to_screen, opts.verbose, ydl._opener)
# Remove cache dir
if opts.rm_cachedir:

View File

@@ -42,7 +42,7 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal)
(experimental)
external_downloader_args: A list of additional command-line arguments for the
external downloader.

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import FileDownloader
from ..compat import compat_urllib_request
from ..utils import sanitized_Request
class DashSegmentsFD(FileDownloader):
@@ -22,7 +22,7 @@ class DashSegmentsFD(FileDownloader):
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
req = compat_urllib_request.Request(target_url)
req = sanitized_Request(target_url)
if remaining_bytes is not None:
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))

View File

@@ -13,6 +13,7 @@ from ..utils import (
encodeArgument,
encodeFilename,
sanitize_open,
handle_youtubedl_headers,
)
@@ -33,9 +34,10 @@ class HlsFD(FileDownloader):
if info_dict['http_headers'] and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])
args += [
'-headers',
''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items())]
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']

View File

@@ -7,14 +7,12 @@ import time
import re
from .common import FileDownloader
from ..compat import (
compat_urllib_request,
compat_urllib_error,
)
from ..compat import compat_urllib_error
from ..utils import (
ContentTooShortError,
encodeFilename,
sanitize_open,
sanitized_Request,
)
@@ -29,8 +27,8 @@ class HttpFD(FileDownloader):
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
basic_request = sanitized_Request(url, None, headers)
request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False)

View File

@@ -117,7 +117,7 @@ class RtmpFD(FileDownloader):
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# the connection was interrupted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = [
'rtmpdump', '--verbose', '-r', url,

View File

@@ -3,9 +3,15 @@ from __future__ import unicode_literals
from .abc import ABCIE
from .abc7news import Abc7NewsIE
from .academicearth import AcademicEarthCourseIE
from .acast import (
ACastIE,
ACastChannelIE,
)
from .addanime import AddAnimeIE
from .adobetv import (
AdobeTVIE,
AdobeTVShowIE,
AdobeTVChannelIE,
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
@@ -38,6 +44,7 @@ from .arte import (
)
from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE
from .audimedia import AudiMediaIE
from .audiomack import AudiomackIE, AudiomackAlbumIE
from .azubu import AzubuIE
from .baidu import BaiduVideoIE
@@ -60,7 +67,10 @@ from .bloomberg import BloombergIE
from .bpb import BpbIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
@@ -129,6 +139,7 @@ from .dfb import DFBIE
from .dhm import DHMIE
from .dotsub import DotsubIE
from .douyutv import DouyuTVIE
from .dplay import DPlayIE
from .dramafever import (
DramaFeverIE,
DramaFeverSeriesIE,
@@ -196,6 +207,7 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funnyordie import FunnyOrDieIE
from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE
from .gameone import (
GameOneIE,
@@ -221,7 +233,6 @@ from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
from .goshgay import GoshgayIE
from .groupon import GrouponIE
from .hark import HarkIE
@@ -346,7 +357,6 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE
from .movshare import MovShareIE
from .mtv import (
MTVIE,
MTVServicesEmbeddedIE,
@@ -412,14 +422,22 @@ from .noco import NocoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .nova import NovaIE
from .novamov import NovaMovIE
from .novamov import (
NovaMovIE,
WholeCloudIE,
NowVideoIE,
VideoWeedIE,
CloudTimeIE,
)
from .nowness import (
NownessIE,
NownessPlaylistIE,
NownessSeriesIE,
)
from .nowtv import NowTVIE
from .nowvideo import NowVideoIE
from .nowtv import (
NowTVIE,
NowTVListIE,
)
from .npo import (
NPOIE,
NPOLiveIE,
@@ -456,10 +474,7 @@ from .orf import (
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .periscope import (
PeriscopeIE,
QuickscopeIE,
)
from .periscope import PeriscopeIE
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
@@ -551,6 +566,10 @@ from .shahid import ShahidIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE
from .skynewsarabia import (
SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
)
from .slideshare import SlideshareIE
from .slutload import SlutloadIE
from .smotri import (
@@ -573,7 +592,8 @@ from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
SoundcloudUserIE,
SoundcloudPlaylistIE
SoundcloudPlaylistIE,
SoundcloudSearchIE
)
from .soundgasm import (
SoundgasmIE,
@@ -728,7 +748,6 @@ from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE
from .videott import VideoTtIE
from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
@@ -786,6 +805,7 @@ from .wrzuta import WrzutaIE
from .wsj import WSJIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE
from .xhamster import (
XHamsterIE,
XHamsterEmbedIE,
@@ -829,6 +849,7 @@ from .youtube import (
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeUserIE,
YoutubeUserPlaylistsIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE

View File

@@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
class ACastBaseIE(InfoExtractor):
_API_BASE_URL = 'https://www.acast.com/api/'
class ACastIE(ACastBaseIE):
IE_NAME = 'acast'
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
_TEST = {
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
'md5': 'ada3de5a1e3a2a381327d749854788bb',
'info_dict': {
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
'ext': 'mp3',
'title': '"Where Are You?": Taipei 101, Taiwan',
'timestamp': 1196172000000,
'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
'duration': 211,
}
}
def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups()
cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
return {
'id': compat_str(cast_data['id']),
'display_id': display_id,
'url': cast_data['blings'][0]['audio'],
'title': cast_data['name'],
'description': cast_data.get('description'),
'thumbnail': cast_data.get('image'),
'timestamp': int_or_none(cast_data.get('publishingDate')),
'duration': int_or_none(cast_data.get('duration')),
}
class ACastChannelIE(ACastBaseIE):
IE_NAME = 'acast:channel'
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
_TEST = {
'url': 'https://www.acast.com/condenasttraveler',
'info_dict': {
'id': '50544219-29bb-499e-a083-6087f4cb7797',
'title': 'Condé Nast Traveler Podcast',
'description': 'md5:98646dee22a5b386626ae31866638fbd',
},
'playlist_mincount': 20,
}
@classmethod
def suitable(cls, url):
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
def _real_extract(self, url):
display_id = self._match_id(url)
channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))

View File

@@ -1,23 +1,32 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
parse_duration,
unified_strdate,
str_to_int,
int_or_none,
float_or_none,
ISO639Utils,
determine_ext,
)
class AdobeTVIE(InfoExtractor):
_VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)'
class AdobeTVBaseIE(InfoExtractor):
_API_BASE_URL = 'http://tv.adobe.com/api/v4/'
class AdobeTVIE(AdobeTVBaseIE):
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
_TEST = {
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
'info_dict': {
'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
'id': '10981',
'ext': 'mp4',
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
@@ -29,50 +38,106 @@ class AdobeTVIE(InfoExtractor):
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
if not language:
language = 'en'
player = self._parse_json(
self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
video_id)
title = player.get('title') or self._search_regex(
r'data-title="([^"]+)"', webpage, 'title')
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(
self._html_search_meta('datepublished', webpage, 'upload date'))
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration') or
self._search_regex(
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
webpage, 'view count'))
video_data = self._download_json(
self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
urlname)['data'][0]
formats = [{
'url': source['src'],
'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
'tbr': source.get('bitrate'),
} for source in player['sources']]
'url': source['url'],
'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
'tbr': int_or_none(source.get('video_data_rate')),
} for source in video_data['videos']]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'id': compat_str(video_data['id']),
'title': video_data['title'],
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnail'),
'upload_date': unified_strdate(video_data.get('start_date')),
'duration': parse_duration(video_data.get('duration')),
'view_count': str_to_int(video_data.get('playcount')),
'formats': formats,
}
class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
def _parse_page_data(self, page_data):
return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
def _extract_playlist_entries(self, url, display_id):
page = self._download_json(url, display_id)
entries = self._parse_page_data(page['data'])
for page_num in range(2, page['paging']['pages'] + 1):
entries.extend(self._parse_page_data(
self._download_json(url + '&page=%d' % page_num, display_id)['data']))
return entries
class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
_TEST = {
'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
'info_dict': {
'id': '36',
'title': 'The Complete Picture with Julieanne Kost',
'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
},
'playlist_mincount': 136,
}
def _get_element_url(self, element_data):
return element_data['urls'][0]
def _real_extract(self, url):
language, show_urlname = re.match(self._VALID_URL, url).groups()
if not language:
language = 'en'
query = 'language=%s&show_urlname=%s' % (language, show_urlname)
show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0]
return self.playlist_result(
self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname),
compat_str(show_data['id']),
show_data['show_name'],
show_data['show_description'])
class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
_TEST = {
'url': 'http://tv.adobe.com/channel/development',
'info_dict': {
'id': 'development',
},
'playlist_mincount': 96,
}
def _get_element_url(self, element_data):
return element_data['url']
def _real_extract(self, url):
language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
if not language:
language = 'en'
query = 'language=%s&channel_urlname=%s' % (language, channel_urlname)
if category_urlname:
query += '&category_urlname=%s' % category_urlname
return self.playlist_result(
self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname),
channel_urlname)
class AdobeTVVideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
@@ -91,28 +156,25 @@ class AdobeTVVideoIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_params = self._parse_json(self._search_regex(
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
video_id)
video_data = self._download_json(url + '?format=json', video_id)
formats = [{
'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
'url': source['src'],
'width': source.get('width'),
'height': source.get('height'),
'tbr': source.get('bitrate'),
} for source in player_params['sources']]
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
'tbr': int_or_none(source.get('bitrate')),
} for source in video_data['sources']]
self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one
duration = max(filter(None, [
float_or_none(source.get('duration'), scale=1000)
for source in player_params['sources']]))
for source in video_data['sources']]))
subtitles = {}
for translation in player_params.get('translations', []):
for translation in video_data.get('translations', []):
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
if lang_id not in subtitles:
subtitles[lang_id] = []
@@ -124,8 +186,9 @@ class AdobeTVVideoIE(InfoExtractor):
return {
'id': video_id,
'formats': formats,
'title': player_params['title'],
'description': self._og_search_description(webpage),
'title': video_data['title'],
'description': video_data.get('description'),
'thumbnail': video_data['video'].get('poster'),
'duration': duration,
'subtitles': subtitles,
}

View File

@@ -15,7 +15,7 @@ class AlJazeeraIE(InfoExtractor):
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
'uploader': 'Al Jazeera English',
},
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
'skip': 'Not accessible from Travis CI server',
}
@@ -32,5 +32,5 @@ class AlJazeeraIE(InfoExtractor):
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
'&%40videoPlayer={0}'.format(brightcove_id)
),
'ie_key': 'Brightcove',
'ie_key': 'BrightcoveLegacy',
}

View File

@@ -7,11 +7,11 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
int_or_none,
float_or_none,
sanitized_Request,
xpath_text,
ExtractorError,
)
@@ -63,7 +63,7 @@ class AtresPlayerIE(InfoExtractor):
'j_password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage(
@@ -94,7 +94,7 @@ class AtresPlayerIE(InfoExtractor):
formats = []
for fmt in ['windows', 'android_tablet']:
request = compat_urllib_request.Request(
request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('User-Agent', self._USER_AGENT)

View File

@@ -0,0 +1,80 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
sanitized_Request,
)
class AudiMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)'
_TEST = {
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
'id': '1564',
'ext': 'mp4',
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
'description': 'md5:60e5d30a78ced725f7b8d34370762941',
'upload_date': '20151124',
'timestamp': 1448354940,
'duration': 74022,
'view_count': int,
}
}
# extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
_AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
_, stage_mode, video_id, lang = raw_payload.split('-')
# TODO: handle s and e stage_mode (live streams and ended live streams)
if stage_mode not in ('s', 'e'):
request = sanitized_Request(
'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang),
headers={'X-Auth-Token': self._AUTH_TOKEN})
json_data = self._download_json(request, video_id)['results']
formats = []
stream_url_hls = json_data.get('stream_url_hls')
if stream_url_hls:
m3u8_formats = self._extract_m3u8_formats(stream_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
stream_url_hds = json_data.get('stream_url_hds')
if stream_url_hds:
f4m_formats = self._extract_f4m_formats(json_data.get('stream_url_hds') + '?hdcore=3.4.0', video_id, -1, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
for video_version in json_data.get('video_versions'):
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
if not video_version_url:
continue
formats.append({
'url': video_version_url,
'width': int_or_none(video_version.get('width')),
'height': int_or_none(video_version.get('height')),
'abr': int_or_none(video_version.get('audio_bitrate')),
'vbr': int_or_none(video_version.get('video_bitrate')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': json_data['title'],
'description': json_data.get('subtitle'),
'thumbnail': json_data.get('thumbnail_image', {}).get('file'),
'timestamp': parse_iso8601(json_data.get('publication_date')),
'duration': int_or_none(json_data.get('duration')),
'view_count': int_or_none(json_data.get('view_count')),
'formats': formats,
}

View File

@@ -6,13 +6,13 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
sanitized_Request,
)
@@ -57,7 +57,7 @@ class BambuserIE(InfoExtractor):
'pass': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
@@ -126,7 +126,7 @@ class BambuserChannelIE(InfoExtractor):
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url)
req = sanitized_Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
data = self._download_json(

View File

@@ -22,12 +22,13 @@ from ..compat import (
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>[\da-z]{8})'
_ID_REGEX = r'[pb][\da-z]{7}'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>%s)' % _ID_REGEX
_MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g.
# http://www.bbc.co.uk/programmes/b06bp7lf)
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
]
@@ -46,9 +47,8 @@ class BBCCoUkIE(InfoExtractor):
'info_dict': {
'id': 'b039d07m',
'ext': 'flv',
'title': 'Kaleidoscope, Leonard Cohen',
'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
'description': 'The Canadian poet and songwriter reflects on his musical career.',
'duration': 1740,
},
'params': {
# rtmp download
@@ -111,7 +111,8 @@ class BBCCoUkIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
}
},
'skip': 'Episode is no longer available on BBC iPlayer Radio',
}, {
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
'note': 'Audio',
@@ -334,7 +335,7 @@ class BBCCoUkIE(InfoExtractor):
return self._download_media_selector_url(
mediaselector_url % programme_id, programme_id)
except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation'):
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e
continue
self._raise_extractor_error(e)
@@ -345,7 +346,7 @@ class BBCCoUkIE(InfoExtractor):
media_selection = self._download_xml(
url, programme_id, 'Downloading media selection XML')
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
else:
raise
@@ -453,6 +454,7 @@ class BBCCoUkIE(InfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page')
programme_id = None
duration = None
tviplayer = self._search_regex(
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
@@ -465,14 +467,16 @@ class BBCCoUkIE(InfoExtractor):
if not programme_id:
programme_id = self._search_regex(
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)
title = self._og_search_title(webpage)
description = self._search_regex(
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
webpage, 'description', fatal=False)
webpage, 'description', default=None)
if not description:
description = self._html_search_meta('description', webpage)
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
@@ -586,6 +590,7 @@ class BBCIE(BBCCoUkIE):
'ext': 'mp4',
'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
'duration': 56,
'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
},
'params': {
'skip_download': True,
@@ -728,6 +733,7 @@ class BBCIE(BBCCoUkIE):
# article with multiple videos embedded with playlist.sxml (e.g.
# http://www.bbc.com/sport/0/football/34475836)
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
if playlists:
entries = [
self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
@@ -780,8 +786,9 @@ class BBCIE(BBCCoUkIE):
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
programme_id = self._search_regex(
[r'data-video-player-vpid="([\da-z]{8})"',
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'],
[r'data-video-player-vpid="(%s)"' % self._ID_REGEX,
r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
webpage, 'vpid', default=None)
if programme_id:
@@ -816,7 +823,7 @@ class BBCIE(BBCCoUkIE):
# Multiple video article (e.g.
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
entries = []
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')

View File

@@ -1,6 +1,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
compat_urllib_parse_unquote,
)
from ..utils import (
int_or_none,
parse_iso8601,
@@ -29,7 +34,24 @@ class BeegIE(InfoExtractor):
video_id = self._match_id(url)
video = self._download_json(
'http://beeg.com/api/v1/video/%s' % video_id, video_id)
'http://beeg.com/api/v4/video/%s' % video_id, video_id)
def decrypt_key(key):
# Reverse engineered from http://static.beeg.com/cpl/1067.js
a = '8RPUUCS35ZWp3ADnKcSmpH71ZusrROo'
e = compat_urllib_parse_unquote(key)
return ''.join([
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 25)
for n in range(len(e))])
def decrypt_url(encrypted_url):
encrypted_url = self._proto_relative_url(
encrypted_url.replace('{DATA_MARKERS}', ''), 'http:')
key = self._search_regex(
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
if not key:
return encrypted_url
return encrypted_url.replace(key, decrypt_key(key))
formats = []
for format_id, video_url in video.items():
@@ -40,7 +62,7 @@ class BeegIE(InfoExtractor):
if not height:
continue
formats.append({
'url': self._proto_relative_url(video_url.replace('{DATA_MARKERS}', ''), 'http:'),
'url': decrypt_url(video_url),
'format_id': format_id,
'height': int(height),
})

View File

@@ -2,143 +2,109 @@
from __future__ import unicode_literals
import re
import itertools
import json
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
)
from ..compat import compat_str
from ..utils import (
int_or_none,
unified_strdate,
unescapeHTML,
ExtractorError,
xpath_text,
)
class BiliBiliIE(InfoExtractor):
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/'
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
_TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/',
'md5': '2c301e4dab317596e837c3e7633e7d86',
'info_dict': {
'id': '1074402_part1',
'id': '1554319',
'ext': 'flv',
'title': '【金坷垃】金泡沫',
'duration': 308,
'duration': 308313,
'upload_date': '20140420',
'thumbnail': 're:^https?://.+\.jpg',
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
'timestamp': 1397983878,
'uploader': '菊子桑',
},
}, {
'url': 'http://www.bilibili.com/video/av1041170/',
'info_dict': {
'id': '1041170',
'title': '【BD1080P】刀语【诸神&异域】',
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
'uploader': '枫叶逝去',
'timestamp': 1396501299,
},
'playlist_count': 9,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page_num = mobj.group('page_num') or '1'
if '(此视频不存在或被删除)' in webpage:
raise ExtractorError(
'The video does not exist or was deleted', expected=True)
view_data = self._download_json(
'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
video_id)
if 'error' in view_data:
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage:
raise ExtractorError(
'The video is not available in your region due to copyright reasons',
expected=True)
cid = view_data['cid']
title = unescapeHTML(view_data['title'])
video_code = self._search_regex(
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
doc = self._download_xml(
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
cid,
'Downloading page %s/%s' % (page_num, view_data['pages'])
)
title = self._html_search_meta(
'media:title', video_code, 'title', fatal=True)
duration_str = self._html_search_meta(
'duration', video_code, 'duration')
if duration_str is None:
duration = None
else:
duration_mobj = re.match(
r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
duration_str)
duration = (
int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
int(duration_mobj.group('minutes')) * 60 +
int(duration_mobj.group('seconds')))
upload_date = unified_strdate(self._html_search_meta(
'uploadDate', video_code, fatal=False))
thumbnail = self._html_search_meta(
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
if xpath_text(doc, './result') == 'error':
raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
entries = []
lq_page = self._download_webpage(
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
video_id,
note='Downloading LQ video info'
)
try:
err_info = json.loads(lq_page)
raise ExtractorError(
'BiliBili said: ' + err_info['error_text'], expected=True)
except ValueError:
pass
lq_doc = compat_etree_fromstring(lq_page)
lq_durls = lq_doc.findall('./durl')
hq_doc = self._download_xml(
'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
video_id,
note='Downloading HQ video info',
fatal=False,
)
if hq_doc is not False:
hq_durls = hq_doc.findall('./durl')
assert len(lq_durls) == len(hq_durls)
else:
hq_durls = itertools.repeat(None)
i = 1
for lq_durl, hq_durl in zip(lq_durls, hq_durls):
for durl in doc.findall('./durl'):
size = xpath_text(durl, ['./filesize', './size'])
formats = [{
'format_id': 'lq',
'quality': 1,
'url': lq_durl.find('./url').text,
'filesize': int_or_none(
lq_durl.find('./size'), get_attr='text'),
'url': durl.find('./url').text,
'filesize': int_or_none(size),
'ext': 'flv',
}]
if hq_durl is not None:
formats.append({
'format_id': 'hq',
'quality': 2,
'ext': 'flv',
'url': hq_durl.find('./url').text,
'filesize': int_or_none(
hq_durl.find('./size'), get_attr='text'),
})
self._sort_formats(formats)
backup_urls = durl.find('./backup_url')
if backup_urls is not None:
for backup_url in backup_urls.findall('./url'):
formats.append({'url': backup_url.text})
formats.reverse()
entries.append({
'id': '%s_part%d' % (video_id, i),
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
'title': title,
'duration': int_or_none(xpath_text(durl, './length'), 1000),
'formats': formats,
'duration': duration,
'upload_date': upload_date,
'thumbnail': thumbnail,
})
i += 1
return {
'_type': 'multi_video',
'entries': entries,
'id': video_id,
'title': title
info = {
'id': compat_str(cid),
'title': title,
'description': view_data.get('description'),
'thumbnail': view_data.get('pic'),
'uploader': view_data.get('author'),
'timestamp': int_or_none(view_data.get('created')),
'view_count': int_or_none(view_data.get('play')),
'duration': int_or_none(xpath_text(doc, './timelength')),
}
if len(entries) == 1:
entries[0].update(info)
return entries[0]
else:
info.update({
'_type': 'multi_video',
'id': video_id,
'entries': entries,
})
return info

View File

@@ -4,14 +4,12 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
xpath_text,
xpath_with_ns,
@@ -219,7 +217,7 @@ class BlipTVIE(InfoExtractor):
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
_TEST = {
_TESTS = [{
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
# The md5 checksum changes
'info_dict': {
@@ -17,22 +17,39 @@ class BloombergIE(InfoExtractor):
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
'description': 'md5:a8ba0302912d03d246979735c17d2761',
},
}
}, {
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
'only_matching': True,
}, {
'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump',
'only_matching': True,
}]
def _real_extract(self, url):
name = self._match_id(url)
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
video_id = self._search_regex(
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
webpage, 'id', group='url')
title = re.sub(': Video$', '', self._og_search_title(webpage))
embed_info = self._download_json(
'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
formats = []
for stream in embed_info['streams']:
if stream["muxing_format"] == "TS":
formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
stream_url = stream.get('url')
if not stream_url:
continue
if stream['muxing_format'] == 'TS':
m3u8_formats = self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
formats.extend(self._extract_f4m_formats(stream['url'], video_id))
f4m_formats = self._extract_f4m_formats(
stream_url, video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
self._sort_formats(formats)
return {

View File

@@ -11,7 +11,6 @@ from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
@@ -20,12 +19,18 @@ from ..utils import (
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
js_to_json,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
unsmuggle_url,
)
class BrightcoveIE(InfoExtractor):
class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -245,7 +250,7 @@ class BrightcoveIE(InfoExtractor):
def _get_video_info(self, video_id, query_str, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query_str
req = compat_urllib_request.Request(request_url)
req = sanitized_Request(request_url)
linkBase = query.get('linkBaseURL')
if linkBase is not None:
referer = linkBase[0]
@@ -346,3 +351,172 @@ class BrightcoveIE(InfoExtractor):
if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info
class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
_TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
'info_dict': {
'id': '4463358922001',
'ext': 'mp4',
'title': 'Meet the man behind Popcorn Time',
'description': 'md5:eac376a4fe366edc70279bfb681aea16',
'duration': 165.768,
'timestamp': 1441391203,
'upload_date': '20150904',
'uploader_id': '929656772001',
'formats': 'mincount:22',
},
}, {
# with rtmp streams
'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
'info_dict': {
'id': '4279049078001',
'ext': 'mp4',
'title': 'Titansgrave: Chapter 0',
'description': 'Titansgrave: Chapter 0',
'duration': 1242.058,
'timestamp': 1433556729,
'upload_date': '20150606',
'uploader_id': '4036320279001',
'formats': 'mincount:41',
},
'params': {
'skip_download': True,
}
}]
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
entries = []
# Look for iframe embeds [1]
for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url)
# Look for embed_in_page embeds [2]
for video_id, account_id, player_id, embed in re.findall(
# According to examples from [3] it's unclear whether video id
# may be optional and what to do when it is
r'''(?sx)
<video[^>]+
data-video-id=["\'](\d+)["\'][^>]*>.*?
</video>.*?
<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/
(\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
''', webpage):
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
return entries
def _real_extract(self, url):
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
req = sanitized_Request(
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
% (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key})
json_data = self._download_json(req, video_id)
title = json_data['name']
formats = []
for source in json_data.get('sources', []):
source_type = source.get('type')
src = source.get('src')
if source_type == 'application/x-mpegURL':
if not src:
continue
m3u8_formats = self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
streaming_src = source.get('streaming_src')
stream_name, app_name = source.get('stream_name'), source.get('app_name')
if not src and not streaming_src and (not stream_name or not app_name):
continue
tbr = float_or_none(source.get('avg_bitrate'), 1000)
height = int_or_none(source.get('height'))
f = {
'tbr': tbr,
'width': int_or_none(source.get('width')),
'height': height,
'filesize': int_or_none(source.get('size')),
'container': source.get('container'),
'vcodec': source.get('codec'),
'ext': source.get('container').lower(),
}
def build_format_id(kind):
format_id = kind
if tbr:
format_id += '-%dk' % int(tbr)
if height:
format_id += '-%dp' % height
return format_id
if src or streaming_src:
f.update({
'url': src or streaming_src,
'format_id': build_format_id('http' if src else 'http-streaming'),
'preference': 2 if src else 1,
})
else:
f.update({
'url': app_name,
'play_path': stream_name,
'format_id': build_format_id('rtmp'),
})
formats.append(f)
self._sort_formats(formats)
description = json_data.get('description')
thumbnail = json_data.get('thumbnail')
timestamp = parse_iso8601(json_data.get('published_at'))
duration = float_or_none(json_data.get('duration'), 1000)
tags = json_data.get('tags', [])
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader_id': account_id,
'formats': formats,
'tags': tags,
}

View File

@@ -14,9 +14,10 @@ class BYUtvIE(InfoExtractor):
'info_dict': {
'id': 'studio-c-season-5-episode-5',
'ext': 'mp4',
'description': 'md5:5438d33774b6bdc662f9485a340401cc',
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'title': 'Season 5 Episode 5',
'thumbnail': 're:^https?://.*\.jpg$'
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 1486.486,
},
'params': {
'skip_download': True,

View File

@@ -1,6 +1,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
sanitized_Request,
smuggle_url,
)
class CBSIE(InfoExtractor):
@@ -46,13 +50,19 @@ class CBSIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
request = sanitized_Request(url)
# Android UA is served with higher quality (720p) streams (see
# https://github.com/rg3/youtube-dl/issues/7490)
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
webpage = self._download_webpage(request, display_id)
real_id = self._search_regex(
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
webpage, 'real video ID')
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': 'theplatform:%s' % real_id,
'url': smuggle_url(
'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
{'force_smil_url': True}),
'display_id': display_id,
}

View File

@@ -67,9 +67,12 @@ class CBSNewsIE(InfoExtractor):
'format_id': format_id,
}
if uri.startswith('rtmp'):
play_path = re.sub(
r'{slistFilePath}', '',
uri.split('<break>')[-1].split('{break}')[-1])
fmt.update({
'app': 'ondemand?auth=cbs',
'play_path': 'mp4:' + uri.split('<break>')[-1],
'play_path': 'mp4:' + play_path,
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
'page_url': 'http://www.cbsnews.com',
'ext': 'flv',

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
@@ -13,6 +12,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
)
@@ -100,7 +100,7 @@ class CeskaTelevizeIE(InfoExtractor):
'requestSource': 'iVysilani',
}
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=compat_urllib_parse.urlencode(data))
@@ -115,7 +115,7 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist_title = self._og_search_title(webpage)

View File

@@ -1,14 +1,9 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
js_to_json,
parse_iso8601,
remove_end,
unified_strdate,
)
@@ -21,48 +16,47 @@ class ClipfishIE(InfoExtractor):
'id': '3966754',
'ext': 'mp4',
'title': 'FIFA 14 - E3 2013 Trailer',
'timestamp': 1370938118,
'description': 'Video zu FIFA 14: E3 2013 Trailer',
'upload_date': '20130611',
'duration': 82,
'view_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_info = self._parse_json(
js_to_json(self._html_search_regex(
'(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')),
video_id)
video_info = self._download_json(
'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id,
video_id)['items'][0]
formats = []
for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage):
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.append({
'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
'ext': 'mp4',
'format_id': 'hls',
})
else:
formats.append({
'url': video_url,
'format_id': ext,
})
self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - Video')
thumbnail = self._og_search_thumbnail(webpage)
duration = int_or_none(video_info.get('length'))
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date'))
m3u8_url = video_info.get('media_videourl_hls')
if m3u8_url:
formats.append({
'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
'ext': 'mp4',
'format_id': 'hls',
})
mp4_url = video_info.get('media_videourl')
if mp4_url:
formats.append({
'url': mp4_url,
'format_id': 'mp4',
'width': int_or_none(video_info.get('width')),
'height': int_or_none(video_info.get('height')),
'tbr': int_or_none(video_info.get('bitrate')),
})
return {
'id': video_id,
'title': title,
'title': video_info['title'],
'description': video_info.get('descr'),
'formats': formats,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
'duration': int_or_none(video_info.get('media_length')),
'upload_date': unified_strdate(video_info.get('pubDate')),
'view_count': int_or_none(video_info.get('media_views'))
}

View File

@@ -1,7 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import determine_ext
from ..utils import int_or_none
_translation_table = {
@@ -42,31 +42,26 @@ class CliphunterIE(InfoExtractor):
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
fmts = {}
for fmt in ('mp4', 'flv'):
fmt_list = self._parse_json(self._search_regex(
r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id)
for f in fmt_list:
fmts[f['fname']] = _decode(f['sUrl'])
qualities = self._parse_json(self._search_regex(
r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id)
gexo_files = self._parse_json(
self._search_regex(
r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
video_id)
formats = []
for fname, url in fmts.items():
f = {
'url': url,
}
if fname in qualities:
qual = qualities[fname]
f.update({
'format_id': '%s_%sp' % (determine_ext(url), qual['h']),
'width': qual['w'],
'height': qual['h'],
'tbr': qual['br'],
})
formats.append(f)
for format_id, f in gexo_files.items():
video_url = f.get('url')
if not video_url:
continue
fmt = f.get('fmt')
height = f.get('h')
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
formats.append({
'url': _decode(video_url),
'format_id': format_id,
'width': int_or_none(f.get('w')),
'height': int_or_none(height),
'tbr': int_or_none(f.get('br')),
})
self._sort_formats(formats)
thumbnail = self._search_regex(

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
float_or_none,
int_or_none,
sanitized_Request,
)
@@ -52,7 +52,7 @@ class CollegeRamaIE(InfoExtractor):
}
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
json.dumps(player_options_request))
request.add_header('Content-Type', 'application/json')

View File

@@ -19,7 +19,6 @@ from ..compat import (
compat_urllib_error,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
compat_str,
compat_etree_fromstring,
@@ -37,6 +36,7 @@ from ..utils import (
int_or_none,
RegexNotFoundError,
sanitize_filename,
sanitized_Request,
unescapeHTML,
unified_strdate,
url_basename,
@@ -167,7 +167,7 @@ class InfoExtractor(object):
"ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles', used by the YoutubeIE for
automatically generated captions
duration: Length of the video in seconds, as an integer.
duration: Length of the video in seconds, as an integer or float.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
@@ -891,6 +891,11 @@ class InfoExtractor(object):
if not media_nodes:
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
@@ -898,7 +903,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@@ -1280,7 +1285,7 @@ class InfoExtractor(object):
def _get_cookies(self, url):
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie'))

View File

@@ -23,6 +23,7 @@ from ..utils import (
int_or_none,
lowercase_escape,
remove_end,
sanitized_Request,
unified_strdate,
urlencode_postdata,
xpath_text,
@@ -46,7 +47,7 @@ class CrunchyrollBaseIE(InfoExtractor):
'name': username,
'password': password,
})
login_request = compat_urllib_request.Request(login_url, data)
login_request = sanitized_Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info')
@@ -55,7 +56,7 @@ class CrunchyrollBaseIE(InfoExtractor):
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else compat_urllib_request.Request(url_or_request))
else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/rg3/youtube-dl/issues/6797.
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
@@ -307,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_uploader', fatal=False)
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req = sanitized_Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
@@ -319,7 +320,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request(
streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (stream_id, stream_format, stream_quality),
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))

View File

@@ -9,6 +9,7 @@ from ..utils import (
find_xpath_attr,
smuggle_url,
determine_ext,
ExtractorError,
)
from .senateisvp import SenateISVPIE
@@ -18,33 +19,32 @@ class CSpanIE(InfoExtractor):
IE_DESC = 'C-SPAN'
_TESTS = [{
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
'md5': '8e44ce11f0f725527daccc453f553eb0',
'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
'info_dict': {
'id': '315139',
'ext': 'mp4',
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.',
},
'skip': 'Regularly fails on travis, for unknown reasons',
}, {
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
# For whatever reason, the served video alternates between
# two different ones
'md5': '8e5fbfabe6ad0f89f3012a7943c1287b',
'info_dict': {
'id': '340723',
'id': 'c4486943',
'ext': 'mp4',
'title': 'International Health Care Models',
'title': 'CSPAN - International Health Care Models',
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
}
}, {
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
'md5': '446562a736c6bf97118e389433ed88d4',
'md5': '2ae5051559169baadba13fc35345ae74',
'info_dict': {
'id': '342759',
'ext': 'mp4',
'title': 'General Motors Ignition Switch Recall',
'duration': 14848,
'description': 'md5:70c7c3b8fa63fa60d42772440596034c'
'description': 'md5:118081aedd24bf1d3b68b3803344e7f3'
},
}, {
# Video from senate.gov
@@ -57,67 +57,77 @@ class CSpanIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_id = mobj.group('id')
webpage = self._download_webpage(url, page_id)
video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage)
if matches:
video_type, video_id = matches.groups()
if video_type == 'prog':
video_type = 'program'
else:
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
if senate_isvp_url:
title = self._og_search_title(webpage)
surl = smuggle_url(senate_isvp_url, {'force_title': title})
return self.url_result(surl, 'SenateISVP', video_id, title)
description = self._html_search_regex(
[
# The full description
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
# If the description is small enough the other div is not
# present, otherwise this is a stripped version
r'<p class=\'initial\'>(.*?)</p>'
],
webpage, 'description', flags=re.DOTALL, default=None)
def get_text_attr(d, attr):
return d.get(attr, {}).get('#text')
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
data = self._download_json(info_url, video_id)
data = self._download_json(
'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id),
video_id)['video']
if data['@status'] != 'Success':
raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True)
doc = self._download_xml(
'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id),
video_id)
description = self._html_search_meta('description', webpage)
title = find_xpath_attr(doc, './/string', 'name', 'title').text
thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
if senate_isvp_url:
surl = smuggle_url(senate_isvp_url, {'force_title': title})
return self.url_result(surl, 'SenateISVP', video_id, title)
files = data['files']
capfile = get_text_attr(data, 'capfile')
files = data['video']['files']
try:
capfile = data['video']['capfile']['#text']
except KeyError:
capfile = None
entries = [{
'id': '%s_%d' % (video_id, partnum + 1),
'title': (
title if len(files) == 1 else
'%s part %d' % (title, partnum + 1)),
'url': unescapeHTML(f['path']['#text']),
'description': description,
'thumbnail': thumbnail,
'duration': int_or_none(f.get('length', {}).get('#text')),
'subtitles': {
'en': [{
'url': capfile,
'ext': determine_ext(capfile, 'dfxp')
}],
} if capfile else None,
} for partnum, f in enumerate(files)]
entries = []
for partnum, f in enumerate(files):
formats = []
for quality in f['qualities']:
formats.append({
'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
'url': unescapeHTML(get_text_attr(quality, 'file')),
'height': int_or_none(get_text_attr(quality, 'height')),
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
})
self._sort_formats(formats)
entries.append({
'id': '%s_%d' % (video_id, partnum + 1),
'title': (
title if len(files) == 1 else
'%s part %d' % (title, partnum + 1)),
'formats': formats,
'description': description,
'thumbnail': thumbnail,
'duration': int_or_none(get_text_attr(f, 'length')),
'subtitles': {
'en': [{
'url': capfile,
'ext': determine_ext(capfile, 'dfxp')
}],
} if capfile else None,
})
if len(entries) == 1:
entry = dict(entries[0])
entry['id'] = video_id
entry['id'] = 'c' + video_id if video_type == 'clip' else video_id
return entry
else:
return {
'_type': 'playlist',
'entries': entries,
'title': title,
'id': video_id,
'id': 'c' + video_id if video_type == 'clip' else video_id,
}

View File

@@ -7,15 +7,13 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_request,
)
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
str_to_int,
unescapeHTML,
)
@@ -25,7 +23,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('Cookie', 'family_filter=off; ff=off')
return request

View File

@@ -13,8 +13,8 @@ from ..utils import (
class DBTVIE(InfoExtractor):
_VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
_TESTS = [{
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
'info_dict': {
@@ -30,12 +30,18 @@ class DBTVIE(InfoExtractor):
'view_count': int,
'categories': list,
}
}
}, {
'url': 'http://dbtv.no/3649835190001',
'only_matching': True,
}, {
'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
display_id = mobj.group('display_id') or video_id
data = self._download_json(
'http://api.dbtv.no/discovery/%s' % video_id, display_id)

View File

@@ -2,13 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -36,7 +34,7 @@ class DCNIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
headers={'Origin': 'http://www.dcndigital.ae'})

View File

@@ -0,0 +1,51 @@
# encoding: utf-8
from __future__ import unicode_literals
import time
from .common import InfoExtractor
from ..utils import int_or_none
class DPlayIE(InfoExtractor):
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
'info_dict': {
'id': '3172',
'ext': 'mp4',
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
'title': 'Svensken lär sig njuta av livet',
'duration': 2650,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-video-id="(\d+)"', webpage, 'video id')
info = self._download_json(
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
video_id)['data'][0]
self._set_cookie(
'secure.dplay.se', 'dsc-geo',
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
# TODO: consider adding support for 'stream_type=hds', it seems to
# require setting some cookies
manifest_url = self._download_json(
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
video_id, 'Getting manifest url for hls stream')['hls']
formats = self._extract_m3u8_formats(
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
return {
'id': video_id,
'display_id': display_id,
'title': info['title'],
'formats': formats,
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
}

View File

@@ -7,7 +7,6 @@ from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -16,6 +15,7 @@ from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -51,7 +51,7 @@ class DramaFeverBaseIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
response = self._download_webpage(
request, None, 'Logging in as %s' % username)

View File

@@ -2,14 +2,17 @@
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import qualities
from ..utils import (
qualities,
sanitized_Request,
)
class DumpertIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
_VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@@ -26,10 +29,12 @@ class DumpertIE(InfoExtractor):
}]
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
protocol = mobj.group('protocol')
url = 'https://www.dumpert.nl/mediabase/' + video_id
req = compat_urllib_request.Request(url)
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
req = sanitized_Request(url)
req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id)

View File

@@ -2,11 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
float_or_none,
int_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -57,7 +57,7 @@ class EitbIE(InfoExtractor):
hls_url = media.get('HLS_SURL')
if hls_url:
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url})
token_data = self._download_json(

View File

@@ -3,13 +3,12 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
determine_ext,
clean_html,
int_or_none,
float_or_none,
sanitized_Request,
)
@@ -75,7 +74,7 @@ class EscapistIE(InfoExtractor):
video_id = ims_video['videoID']
key = ims_video['hash']
config_req = compat_urllib_request.Request(
config_req = sanitized_Request(
'http://www.escapistmagazine.com/videos/'
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
config_req.add_header('Referer', url)

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -42,7 +40,7 @@ class EveryonesMixtapeIE(InfoExtractor):
playlist_id = mobj.group('id')
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
pllist_req = compat_urllib_request.Request(pllist_url)
pllist_req = sanitized_Request(pllist_url)
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist_list = self._download_json(
@@ -55,7 +53,7 @@ class EveryonesMixtapeIE(InfoExtractor):
raise ExtractorError('Playlist id not found')
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
pl_req = compat_urllib_request.Request(pl_url)
pl_req = sanitized_Request(pl_url)
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
playlist = self._download_json(
pl_req, playlist_id, note='Downloading playlist info')

View File

@@ -3,9 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
int_or_none,
sanitized_Request,
str_to_int,
)
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -10,11 +10,11 @@ from ..compat import (
compat_str,
compat_urllib_error,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
limit_length,
sanitized_Request,
urlencode_postdata,
get_element_by_id,
clean_html,
@@ -73,7 +73,7 @@ class FacebookIE(InfoExtractor):
if useremail is None:
return
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
login_page_req = sanitized_Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US')
login_page = self._download_webpage(login_page_req, None,
note='Downloading login page',
@@ -94,7 +94,7 @@ class FacebookIE(InfoExtractor):
'timezone': '-60',
'trynum': '1',
}
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
login_results = self._download_webpage(request, None,
@@ -109,7 +109,7 @@ class FacebookIE(InfoExtractor):
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
'name_action_selected': 'dont_save',
}
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = self._download_webpage(check_req, None,
note='Confirming login')
@@ -164,7 +164,7 @@ class FacebookIE(InfoExtractor):
if not video_title:
video_title = self._html_search_regex(
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
webpage, 'alternative title', fatal=False)
webpage, 'alternative title', default=None)
video_title = limit_length(video_title, 80)
if not video_title:
video_title = 'Facebook video #%s' % video_id

View File

@@ -12,6 +12,7 @@ from ..compat import (
from ..utils import (
encode_dict,
ExtractorError,
sanitized_Request,
)
@@ -36,8 +37,8 @@ class FC2IE(InfoExtractor):
'params': {
'username': 'ytdl@yt-dl.org',
'password': '(snip)',
'skip': 'requires actual password'
}
},
'skip': 'requires actual password',
}, {
'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
'only_matching': True,
@@ -57,7 +58,7 @@ class FC2IE(InfoExtractor):
}
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@@ -66,7 +67,7 @@ class FC2IE(InfoExtractor):
return False
# this is also needed
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
self._download_webpage(
login_redir, None, note='Login redirect', errnote='Login redirect failed')

View File

@@ -3,10 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
ExtractorError,
find_xpath_attr,
sanitized_Request,
)
@@ -30,7 +30,7 @@ class FlickrIE(InfoExtractor):
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
req = compat_urllib_request.Request(webpage_url)
req = sanitized_Request(webpage_url)
req.add_header(
'User-Agent',
# it needs a more recent version

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_duration,
parse_iso8601,
sanitized_Request,
str_to_int,
)
@@ -93,7 +91,7 @@ class FourTubeIE(InfoExtractor):
b'Content-Type': b'application/x-www-form-urlencoded',
b'Origin': b'http://www.4tube.com',
}
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
token_req = sanitized_Request(token_url, b'{}', headers)
tokens = self._download_json(token_req, video_id)
formats = [{
'url': tokens[format]['token'],

View File

@@ -45,11 +45,20 @@ class FunnyOrDieIE(InfoExtractor):
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates')
bitrates = [int(b) for b in bitrates.rstrip(',').split(',')]
bitrates.sort()
m3u8_url = self._search_regex(
r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
webpage, 'm3u8 url', default=None, group='url')
formats = []
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
bitrates.sort()
for bitrate in bitrates:
for link in links:
formats.append({

View File

@@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
class GameInformerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
_TEST = {
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
'info_dict': {
'id': '4515472681001',
'ext': 'm3u8',
'title': 'Replay - Animal Crossing',
'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
'timestamp': 1443457610706,
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
bc_api_url = self._search_regex(r"getVideo\('([^']+)'", webpage, 'brightcove api url')
json_data = self._download_json(
bc_api_url + '&video_fields=id,name,shortDescription,publishedDate,videoStillURL,length,IOSRenditions',
display_id)
return {
'id': compat_str(json_data['id']),
'display_id': display_id,
'url': json_data['IOSRenditions'][0]['url'],
'title': json_data['name'],
'description': json_data.get('shortDescription'),
'timestamp': int_or_none(json_data.get('publishedDate')),
'duration': int_or_none(json_data.get('length')),
}

View File

@@ -1,19 +1,62 @@
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_age_limit,
url_basename,
)
class GametrailersIE(MTVServicesInfoExtractor):
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
class GametrailersIE(InfoExtractor):
_VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
_TEST = {
'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7',
'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a',
'info_dict': {
'id': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d',
'id': '2983958',
'ext': 'mp4',
'title': 'E3 2013: Debut Trailer',
'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
'display_id': '116437-Just-Cause-3-Review',
'title': 'Just Cause 3 - Review',
'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?',
},
}
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'<title>(.+?)\|', webpage, 'title').strip()
embed_url = self._proto_relative_url(
self._search_regex(
r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage,
'embed url'),
scheme='http:')
video_id = url_basename(embed_url)
embed_page = self._download_webpage(embed_url, video_id)
embed_vars_json = self._search_regex(
r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page,
'embed vars')
info = self._parse_json(embed_vars_json, video_id)
formats = []
for media in info['media']:
if media['mediaPurpose'] == 'play':
formats.append({
'url': media['uri'],
'height': media['height'],
'width:': media['width'],
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': info.get('thumbUri'),
'description': self._og_search_description(webpage),
'duration': int_or_none(info.get('videoLengthInSeconds')),
'age_limit': parse_age_limit(info.get('audienceRating')),
}

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
remove_end,
HEADRequest,
sanitized_Request,
)
@@ -125,7 +123,7 @@ class GDCVaultIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(request, display_id, 'Logging in')
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')

View File

@@ -11,7 +11,6 @@ from .youtube import YoutubeIE
from ..compat import (
compat_etree_fromstring,
compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
@@ -22,6 +21,7 @@ from ..utils import (
HEADRequest,
is_html,
orderedSet,
sanitized_Request,
smuggle_url,
unescapeHTML,
unified_strdate,
@@ -30,7 +30,10 @@ from ..utils import (
url_basename,
xpath_text,
)
from .brightcove import BrightcoveIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
@@ -51,6 +54,7 @@ from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
class GenericIE(InfoExtractor):
@@ -275,7 +279,7 @@ class GenericIE(InfoExtractor):
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': {
'id': '2765128793001',
@@ -299,7 +303,7 @@ class GenericIE(InfoExtractor):
'uploader': 'thestar.com',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
},
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
},
{
'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -314,7 +318,7 @@ class GenericIE(InfoExtractor):
},
{
# https://github.com/rg3/youtube-dl/issues/3541
'add_ie': ['Brightcove'],
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': {
'id': '3866516442001',
@@ -336,6 +340,7 @@ class GenericIE(InfoExtractor):
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
'duration': 238.231,
},
'add_ie': ['Ooyala'],
},
@@ -347,6 +352,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': '"Steve Jobs: Man in the Machine" trailer',
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
'duration': 135.427,
},
'params': {
'skip_download': True,
@@ -820,6 +826,19 @@ class GenericIE(InfoExtractor):
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
},
},
# Kaltura embed protected with referrer
{
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
'info_dict': {
'id': '1_g4fbemnq',
'ext': 'mp4',
'title': 'Violetta - Achter De Schermen - Ruggero',
'description': 'Achter de schermen met Ruggero',
'timestamp': 1435133761,
'upload_date': '20150624',
'uploader_id': 'echojecka',
},
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -944,8 +963,9 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
'description': 'VIDEO: Index/Match versus VLOOKUP.',
'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes',
'duration': 191.933,
},
'params': {
# m3u8 downloads
@@ -1031,6 +1051,31 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'cinemasnob',
},
},
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
'info_dict': {
'id': '4238694884001',
'ext': 'flv',
'title': 'Tabletop: Dread, Last Thoughts',
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
},
# JWPlayer with M3U8
{
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
'info_dict': {
'id': 'playlist',
'ext': 'mp4',
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
'uploader': 'ren.tv',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
}
]
@@ -1174,7 +1219,7 @@ class GenericIE(InfoExtractor):
full_response = None
if head_response is False:
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id)
head_response = full_response
@@ -1203,7 +1248,7 @@ class GenericIE(InfoExtractor):
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response:
request = compat_urllib_request.Request(url)
request = sanitized_Request(url)
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding
@@ -1290,14 +1335,14 @@ class GenericIE(InfoExtractor):
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for BrightCove:
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
# Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
self.to_screen('Brightcove video detected.')
entries = [{
'_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove'
'ie_key': 'BrightcoveLegacy'
} for bc_url in bc_urls]
return {
@@ -1307,6 +1352,11 @@ class GenericIE(InfoExtractor):
'entries': entries,
}
# Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(webpage)
if bc_urls:
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@@ -1455,7 +1505,7 @@ class GenericIE(InfoExtractor):
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
return OoyalaIE._build_url_result(mobj.group('ec'))
return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
@@ -1463,7 +1513,7 @@ class GenericIE(InfoExtractor):
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds:
return _playlist_from_matches(
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
# Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -1675,7 +1725,9 @@ class GenericIE(InfoExtractor):
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
return self.url_result(smuggle_url(
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
{'source_url': url}), 'Kaltura')
# Look for Eagle.Platform embeds
mobj = re.search(
@@ -1690,10 +1742,9 @@ class GenericIE(InfoExtractor):
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
# Look for Pladform embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Pladform')
pladform_url = PladformIE._extract_url(webpage)
if pladform_url:
return self.url_result(pladform_url)
# Look for Playwire embeds
mobj = re.search(
@@ -1720,7 +1771,7 @@ class GenericIE(InfoExtractor):
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -1840,6 +1891,7 @@ class GenericIE(InfoExtractor):
entries = []
for video_url in found:
video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
@@ -1851,25 +1903,24 @@ class GenericIE(InfoExtractor):
# here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
entry_info_dict = {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
}
ext = determine_ext(video_url)
if ext == 'smil':
entries.append({
'id': video_id,
'formats': self._extract_smil_formats(video_url, video_id),
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'm3u8':
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
else:
entries.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
entry_info_dict['url'] = video_url
entries.append(entry_info_dict)
if len(entries) == 1:
return entries[0]

View File

@@ -18,6 +18,8 @@ class GrouponIE(InfoExtractor):
'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
'ext': 'mp4',
'title': 'Bikram Yoga Huntington Beach | Orange County',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'duration': 44.961,
},
}],
'params': {

View File

@@ -4,12 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
HEADRequest,
sanitized_Request,
str_to_int,
urlencode_postdata,
urlhandle_detect_ext,
@@ -47,7 +45,7 @@ class HearThisAtIE(InfoExtractor):
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
payload = urlencode_postdata({'tracks[]': track_id})
req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
req = sanitized_Request(self._PLAYLIST_URL, payload)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
track = self._download_json(req, track_id, 'Downloading playlist')[0]

View File

@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
HEADRequest,
sanitized_Request,
)
@@ -41,7 +39,7 @@ class HotNewHipHopIE(InfoExtractor):
('mediaType', 's'),
('mediaId', video_id),
])
r = compat_urllib_request.Request(
r = sanitized_Request(
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
mkd = self._download_json(

View File

@@ -16,6 +16,7 @@ class HowcastIE(InfoExtractor):
'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
'timestamp': 1276081287,
'upload_date': '20100609',
'duration': 56.823,
},
'params': {
# m3u8 download

View File

@@ -4,12 +4,10 @@ import json
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -30,15 +28,12 @@ class HypemIE(InfoExtractor):
track_id = self._match_id(url)
data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url)
request = sanitized_Request(url + '?' + compat_urllib_parse.urlencode(data))
response, urlh = self._download_webpage_handle(
request, track_id, 'Downloading webpage with the url')
cookie = urlh.headers.get('Set-Cookie', '')
html_tracks = self._html_search_regex(
r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>',
r'(?ms)<script type="application/json" id="displayList-data">(.+?)</script>',
response, 'tracks')
try:
track_list = json.loads(html_tracks)
@@ -48,15 +43,14 @@ class HypemIE(InfoExtractor):
key = track['key']
track_id = track['id']
artist = track['artist']
title = track['song']
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
request = compat_urllib_request.Request(
serve_url, '', {'Content-Type': 'application/json'})
request.add_header('cookie', cookie)
request = sanitized_Request(
'http://hypem.com/serve/source/%s/%s' % (track_id, key),
'', {'Content-Type': 'application/json'})
song_data = self._download_json(request, track_id, 'Downloading metadata')
final_url = song_data["url"]
final_url = song_data['url']
artist = track.get('artist')
return {
'id': track_id,

View File

@@ -10,8 +10,8 @@ from ..utils import (
class InstagramIE(InfoExtractor):
_VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
'info_dict': {
@@ -21,7 +21,10 @@ class InstagramIE(InfoExtractor):
'title': 'Video by naomipq',
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
}
}, {
'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -6,12 +6,10 @@ from random import random
from math import floor
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
remove_end,
sanitized_Request,
)
@@ -61,7 +59,7 @@ class IPrimaIE(InfoExtractor):
(floor(random() * 1073741824), floor(random() * 1073741824))
)
req = compat_urllib_request.Request(player_url)
req = sanitized_Request(player_url)
req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id)

View File

@@ -205,9 +205,8 @@ class IqiyiIE(InfoExtractor):
def get_enc_key(self, swf_url, video_id):
# TODO: automatic key extraction
# last update at 2015-10-22 for Zombie::bite
# '7223c67061dbea1259d0ceb44f44b6d62288f4f80c972170de5201d2321060270e05'[2:66][0::2]
enc_key = '2c76de15dcb44bd28ff0927d50d31620'
# last update at 2015-12-06 for Zombie::bite
enc_key = '3719f6a1da83ee0aee3488d8802d7696'[::-1]
return enc_key
def _real_extract(self, url):

View File

@@ -5,11 +5,9 @@ import re
import json
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -78,7 +76,7 @@ class IviIE(InfoExtractor):
]
}
request = compat_urllib_request.Request(api_url, json.dumps(data))
request = sanitized_Request(api_url, json.dumps(data))
video_json_page = self._download_webpage(
request, video_id, 'Downloading video JSON')

View File

@@ -2,12 +2,18 @@
from __future__ import unicode_literals
import re
import base64
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
unsmuggle_url,
)
@@ -121,31 +127,47 @@ class KalturaIE(InfoExtractor):
video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
info, source_data = self._get_video_info(entry_id, partner_id)
formats = [{
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f['fileExt'],
'tbr': f['bitrate'],
'fps': f.get('frameRate'),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': f.get('height'),
'width': f.get('width'),
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
} for f in source_data['flavorAssets']]
source_url = smuggled_data.get('source_url')
if source_url:
referrer = base64.b64encode(
'://'.join(compat_urlparse.urlparse(source_url)[:2])
.encode('utf-8')).decode('utf-8')
else:
referrer = None
formats = []
for f in source_data['flavorAssets']:
video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
if referrer:
video_url += '?referrer=%s' % referrer
formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'),
'tbr': int_or_none(f['bitrate']),
'fps': int_or_none(f.get('frameRate')),
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
'container': f.get('containerFormat'),
'vcodec': f.get('videoCodecId'),
'height': int_or_none(f.get('height')),
'width': int_or_none(f.get('width')),
'url': video_url,
})
self._check_formats(formats, entry_id)
self._sort_formats(formats)
return {
'id': entry_id,
'title': info['name'],
'formats': formats,
'description': info.get('description'),
'description': clean_html(info.get('description')),
'thumbnail': info.get('thumbnailUrl'),
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),

View File

@@ -1,12 +1,11 @@
from __future__ import unicode_literals
import os
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
from ..utils import (
sanitized_Request,
url_basename,
)
@@ -14,19 +13,20 @@ class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
_TEST = {
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
'md5': '6e297b7e789329923fcf83abb67c9289',
'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
'info_dict': {
'id': '1214711',
'ext': 'mp4',
'title': 'Petite Asian Lady Mai Playing In Bathtub',
'age_limit': 18,
'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
@@ -38,21 +38,29 @@ class KeezMoviesIE(InfoExtractor):
video_title = self._html_search_regex(
r'<h1 [^>]*>([^<]+)', webpage, 'title')
video_url = self._html_search_regex(
r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join(format)
flashvars = self._parse_json(self._search_regex(
r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id)
formats = []
for height in (180, 240, 480):
if flashvars.get('quality_%dp' % height):
video_url = flashvars['quality_%dp' % height]
a_format = {
'url': video_url,
'height': height,
'format_id': '%dp' % height,
}
filename_parts = url_basename(video_url).split('_')
if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]):
a_format['tbr'] = int(filename_parts[1][:-1])
formats.append(a_format)
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'formats': formats,
'age_limit': age_limit,
'thumbnail': flashvars.get('image_url')
}

View File

@@ -8,13 +8,13 @@ import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_ord,
)
from ..utils import (
determine_ext,
ExtractorError,
parse_iso8601,
sanitized_Request,
int_or_none,
encode_data_uri,
)
@@ -114,7 +114,7 @@ class LetvIE(InfoExtractor):
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
}
play_json_req = compat_urllib_request.Request(
play_json_req = sanitized_Request(
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')

View File

@@ -7,12 +7,12 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
sanitized_Request,
)
@@ -25,7 +25,7 @@ class LyndaBaseIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
@@ -35,7 +35,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false',
'stayPut': 'false'
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
@@ -64,7 +64,7 @@ class LyndaBaseIE(InfoExtractor):
'remember': 'false',
'stayPut': 'false',
}
request = compat_urllib_request.Request(
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
login_page = self._download_webpage(
request, None,
@@ -83,6 +83,10 @@ class LyndaBaseIE(InfoExtractor):
raise ExtractorError('Unable to log in')
def _logout(self):
username, _ = self._get_login_info()
if username is None:
return
self._download_webpage(
'http://www.lynda.com/ajax/logout.aspx', None,
'Logging out', 'Unable to log out', fatal=False)

View File

@@ -7,12 +7,12 @@ from ..compat import (
compat_parse_qs,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor):
'filters': '0',
'submit': "Continue - I'm over 18",
}
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.report_age_confirmation()
self._download_webpage(request, None, False, 'Unable to confirm age')
@@ -142,7 +142,7 @@ class MetacafeIE(InfoExtractor):
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
# Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
# AnyClip videos require the flashversion cookie so that we get the link
# to the mp4 file
@@ -154,10 +154,10 @@ class MetacafeIE(InfoExtractor):
# Extract URL, uploader and title from webpage
self.report_extraction(video_id)
video_url = None
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
mobj = re.search(r'(?m)&(?:media|video)URL=([^&]+)', webpage)
if mobj is not None:
mediaURL = compat_urllib_parse_unquote(mobj.group(1))
video_ext = mediaURL[-3:]
video_ext = determine_ext(mediaURL)
# Extract gdaKey if available
mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
@@ -229,7 +229,7 @@ class MetacafeIE(InfoExtractor):
age_limit = (
18
if re.search(r'"contentRating":"restricted"', webpage)
if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage)
else 0)
if isinstance(video_url, list):

View File

@@ -2,14 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
int_or_none,
parse_duration,
parse_filesize,
sanitized_Request,
)
@@ -39,7 +37,7 @@ class MinhatecaIE(InfoExtractor):
('fileId', video_id),
('__RequestVerificationToken', token),
]
req = compat_urllib_request.Request(
req = sanitized_Request(
'http://minhateca.com.br/action/License/Download',
data=compat_urllib_parse.urlencode(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -4,11 +4,11 @@ from __future__ import unicode_literals
import random
from .common import InfoExtractor
from ..compat import compat_urllib_request
from ..utils import (
xpath_text,
int_or_none,
ExtractorError,
sanitized_Request,
)
@@ -63,7 +63,7 @@ class MioMioIE(InfoExtractor):
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
video_id)
vid_config_request = compat_urllib_request.Request(
vid_config_request = sanitized_Request(
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
headers=http_headers)

View File

@@ -64,7 +64,8 @@ class MixcloudIE(InfoExtractor):
preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
song_url = song_url.replace('/previews/', '/c/originals/')
if not self._check_url(song_url, track_id, 'mp3'):
song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
if not self._check_url(song_url, track_id, 'm4a'):

View File

@@ -5,13 +5,11 @@ import json
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
)
@@ -80,7 +78,7 @@ class MoeVideoIE(InfoExtractor):
]
r_json = json.dumps(r)
post = compat_urllib_parse.urlencode({'r': r_json})
req = compat_urllib_request.Request(self._API_URL, post)
req = sanitized_Request(self._API_URL, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
response = self._download_json(req, video_id)

View File

@@ -7,8 +7,8 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..utils import sanitized_Request
class MofosexIE(InfoExtractor):
@@ -29,7 +29,7 @@ class MofosexIE(InfoExtractor):
video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)

View File

@@ -5,13 +5,11 @@ import os.path
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
remove_start,
sanitized_Request,
)
@@ -81,7 +79,7 @@ class MonikerIE(InfoExtractor):
orig_webpage, 'builtin URL', default=None, group='url')
if builtin_url:
req = compat_urllib_request.Request(builtin_url)
req = sanitized_Request(builtin_url)
req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
title = self._og_search_title(orig_webpage).strip()
@@ -94,7 +92,7 @@ class MonikerIE(InfoExtractor):
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
req = sanitized_Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note='Downloading video page ...')

View File

@@ -3,12 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -59,7 +57,7 @@ class MooshareIE(InfoExtractor):
'hash': hash_key,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import sanitized_Request
class MovieClipsIE(InfoExtractor):
@@ -25,7 +23,7 @@ class MovieClipsIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
req = compat_urllib_request.Request(url)
req = sanitized_Request(url)
# it doesn't work if it thinks the browser it's too old
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
webpage = self._download_webpage(req, display_id)

View File

@@ -1,27 +0,0 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class MovShareIE(NovaMovIE):
IE_NAME = 'movshare'
IE_DESC = 'MovShare'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
_HOST = 'www.movshare.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.movshare.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_str,
)
from ..utils import (
@@ -13,6 +12,7 @@ from ..utils import (
find_xpath_attr,
fix_xml_ampersands,
HEADRequest,
sanitized_Request,
unescapeHTML,
url_basename,
RegexNotFoundError,
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url)
req = sanitized_Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript
req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,

View File

@@ -11,10 +11,10 @@ from ..compat import (
compat_ord,
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -83,7 +83,7 @@ class MyVideoIE(InfoExtractor):
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))

View File

@@ -1,63 +1,102 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
remove_end,
parse_duration,
int_or_none,
xpath_text,
xpath_attr,
)
class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)?video/(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
'info_dict': {
'id': '0021200253-okc-bkn-recap.nba',
'ext': 'mp4',
'id': '0021200253-okc-bkn-recap',
'ext': 'flv',
'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
'timestamp': 1354638466,
'upload_date': '20121204',
},
}, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
}, {
'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
'info_dict': {
'id': '0041400301-cle-atl-recap.nba',
'id': '0041400301-cle-atl-recap',
'ext': 'mp4',
'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
'title': 'Hawks vs. Cavaliers Game 1',
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
'duration': 228,
},
'params': {
'skip_download': True,
'timestamp': 1432134543,
'upload_date': '20150520',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
path, video_id = re.match(self._VALID_URL, url).groups()
if path.startswith('nba/'):
path = path[3:]
video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
video_id = xpath_text(video_info, 'slug')
title = xpath_text(video_info, 'headline')
description = xpath_text(video_info, 'description')
duration = parse_duration(xpath_text(video_info, 'length'))
timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
thumbnails = []
for image in video_info.find('images'):
thumbnails.append({
'id': image.attrib.get('cut'),
'url': image.text,
'width': int_or_none(image.attrib.get('width')),
'height': int_or_none(image.attrib.get('height')),
})
shortened_video_id = video_id.rpartition('/')[2]
title = remove_end(
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
description = self._og_search_description(webpage)
duration_str = self._html_search_meta(
'duration', webpage, 'duration', default=None)
if not duration_str:
duration_str = self._html_search_regex(
r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False)
duration = parse_duration(duration_str)
formats = []
for video_file in video_info.findall('.//file'):
video_url = video_file.text
if video_url.startswith('/'):
continue
if video_url.endswith('.m3u8'):
m3u8_formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif video_url.endswith('.f4m'):
f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
else:
key = video_file.attrib.get('bitrate')
format_info = {
'format_id': key,
'url': video_url,
}
mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
if mobj:
format_info.update({
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
'tbr': int_or_none(mobj.group(3)),
})
formats.append(format_info)
self._sort_formats(formats)
return {
'id': shortened_video_id,
'url': video_url,
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@@ -11,6 +11,7 @@ from ..utils import (
ExtractorError,
find_xpath_attr,
lowercase_escape,
smuggle_url,
unescapeHTML,
)
@@ -62,12 +63,13 @@ class NBCIE(InfoExtractor):
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
[
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
r'"embedURL"\s*:\s*"([^"]+)"'
],
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
if theplatform_url.startswith('//'):
theplatform_url = 'http:' + theplatform_url
return self.url_result(theplatform_url)
return self.url_result(smuggle_url(theplatform_url, {'source_url': url}))
class NBCSportsVPlayerIE(InfoExtractor):

View File

@@ -8,11 +8,11 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
compat_str,
compat_itertools_count,
)
from ..utils import sanitized_Request
class NetEaseMusicBaseIE(InfoExtractor):
@@ -40,7 +40,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
if not details:
continue
formats.append({
'url': 'http://m1.music.126.net/%s/%s.%s' %
'url': 'http://m5.music.126.net/%s/%s.%s' %
(cls._encrypt(details['dfsId']), details['dfsId'],
details['extension']),
'ext': details.get('extension'),
@@ -56,7 +56,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
return int(round(ms / 1000.0))
def query_api(self, endpoint, video_id, note):
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
req.add_header('Referer', self._API_BASE)
return self._download_json(req, video_id, note)

View File

@@ -1,10 +1,8 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
)
from ..compat import compat_urllib_parse
from ..utils import sanitized_Request
class NFBIE(InfoExtractor):
@@ -40,8 +38,9 @@ class NFBIE(InfoExtractor):
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request = sanitized_Request(
'https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')

View File

@@ -8,7 +8,6 @@ import datetime
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
sanitized_Request,
xpath_text,
determine_ext,
)
@@ -102,7 +102,7 @@ class NiconicoIE(InfoExtractor):
'password': password,
}
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, None, note='Logging in', errnote='Unable to log in')
@@ -145,7 +145,7 @@ class NiconicoIE(InfoExtractor):
'k': thumb_play_key,
'v': video_id
})
flv_info_request = compat_urllib_request.Request(
flv_info_request = sanitized_Request(
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
{'Content-Type': 'application/x-www-form-urlencoded'})
flv_info_webpage = self._download_webpage(

View File

@@ -9,7 +9,6 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
clean_html,
@@ -17,6 +16,7 @@ from ..utils import (
int_or_none,
float_or_none,
parse_iso8601,
sanitized_Request,
)
@@ -74,7 +74,7 @@ class NocoIE(InfoExtractor):
'username': username,
'password': password,
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
login = self._download_json(request, None, 'Logging in as %s' % username)

View File

@@ -4,11 +4,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
ExtractorError,
sanitized_Request,
urlencode_postdata,
xpath_text,
xpath_with_ns,
@@ -41,7 +39,7 @@ class NosVideoIE(InfoExtractor):
'op': 'download1',
'method_free': 'Continue to Video',
}
req = compat_urllib_request.Request(url, urlencode_postdata(fields))
req = sanitized_Request(url, urlencode_postdata(fields))
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(req, video_id,
'Downloading download page')

View File

@@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
NO_DEFAULT,
encode_dict,
sanitized_Request,
urlencode_postdata,
)
@@ -38,19 +40,40 @@ class NovaMovIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
page = self._download_webpage(
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
url = 'http://%s/video/%s' % (self._HOST, video_id)
if re.search(self._FILE_DELETED_REGEX, page) is not None:
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
def extract_filekey(default=NO_DEFAULT):
return self._search_regex(
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
filekey = extract_filekey(default=None)
if not filekey:
fields = self._hidden_inputs(webpage)
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
'post url', default=url, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(url, post_url)
request = sanitized_Request(
post_url, urlencode_postdata(encode_dict(fields)))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('Referer', post_url)
webpage = self._download_webpage(
request, video_id, 'Downloading continue to the video page')
filekey = extract_filekey()
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
api_response = self._download_webpage(
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
@@ -69,3 +92,89 @@ class NovaMovIE(InfoExtractor):
'title': title,
'description': description
}
class WholeCloudIE(NovaMovIE):
IE_NAME = 'wholecloud'
IE_DESC = 'WholeCloud'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': '(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
_HOST = 'www.wholecloud.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.wholecloud.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}
class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.nowvideo.to/video/0mw0yow7b6dxa',
'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
'info_dict': {
'id': '0mw0yow7b6dxa',
'ext': 'flv',
'title': 'youtubedl test video _BaW_jenozKc.mp4',
'description': 'Description',
}
}
class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': 'b42178afbea14',
'ext': 'flv',
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
}
class CloudTimeIE(NovaMovIE):
IE_NAME = 'cloudtime'
IE_DESC = 'CloudTime'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'cloudtime\.to'}
_HOST = 'www.cloudtime.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
_TEST = None

View File

@@ -1,12 +1,12 @@
# encoding: utf-8
from __future__ import unicode_literals
from .brightcove import BrightcoveIE
from .brightcove import BrightcoveLegacyIE
from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import (
compat_str,
compat_urllib_request,
from ..compat import compat_str
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -22,10 +22,10 @@ class NownessBaseIE(InfoExtractor):
'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
note='Downloading player JavaScript',
errnote='Unable to download player JavaScript')
bc_url = BrightcoveIE._extract_brightcove_url(player_code)
bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
if bc_url is None:
raise ExtractorError('Could not find player definition')
return self.url_result(bc_url, 'Brightcove')
return self.url_result(bc_url, 'BrightcoveLegacy')
elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube':
@@ -37,7 +37,7 @@ class NownessBaseIE(InfoExtractor):
def _api_request(self, url, request_path):
display_id = self._match_id(url)
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://api.nowness.com/api/' + request_path % display_id,
headers={
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -13,8 +15,63 @@ from ..utils import (
)
class NowTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
class NowTVBaseIE(InfoExtractor):
_VIDEO_FIELDS = (
'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
'broadcastStartDate', 'seoUrl', 'duration', 'files',
'format.defaultImage169Format', 'format.defaultImage169Logo')
def _extract_video(self, info, display_id=None):
video_id = compat_str(info['id'])
files = info['files']
if not files:
if info.get('geoblocked', False):
raise ExtractorError(
'Video %s is not available from your location due to geo restriction' % video_id,
expected=True)
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
formats = []
for item in files['items']:
if determine_ext(item['path']) != 'f4v':
continue
app, play_path = remove_start(item['path'], '/').split('/', 1)
formats.append({
'url': 'rtmpe://fms.rtl.de',
'app': app,
'play_path': 'mp4:%s' % play_path,
'ext': 'flv',
'page_url': 'http://rtlnow.rtl.de',
'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
'tbr': int_or_none(item.get('bitrate')),
})
self._sort_formats(formats)
title = info['title']
description = info.get('articleLong') or info.get('articleShort')
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
duration = parse_duration(info.get('duration'))
f = info.get('format', {})
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
return {
'id': video_id,
'display_id': display_id or info.get('seoUrl'),
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
class NowTVIE(NowTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
_TESTS = [{
# rtl
@@ -23,7 +80,7 @@ class NowTVIE(InfoExtractor):
'id': '203519',
'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
'ext': 'flv',
'title': 'Die neuen Bauern und eine Hochzeit',
'title': 'Inka Bause stellt die neuen Bauern vor',
'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1432580700,
@@ -133,61 +190,71 @@ class NowTVIE(InfoExtractor):
}, {
'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
'only_matching': True,
}, {
'url': 'http://www.nowtv.de/rtl2/zuhause-im-glueck/jahr/2015/11/eine-erschuetternde-diagnose/player',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
display_id_split = display_id.split('/')
if len(display_id) > 2:
display_id = '/'.join((display_id_split[0], display_id_split[-1]))
mobj = re.match(self._VALID_URL, url)
display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id'))
info = self._download_json(
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
display_id)
'https://api.nowtv.de/v3/movies/%s?fields=%s'
% (display_id, ','.join(self._VIDEO_FIELDS)), display_id)
video_id = compat_str(info['id'])
return self._extract_video(info, display_id)
files = info['files']
if not files:
if info.get('geoblocked', False):
raise ExtractorError(
'Video %s is not available from your location due to geo restriction' % video_id,
expected=True)
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
formats = []
for item in files['items']:
if determine_ext(item['path']) != 'f4v':
continue
app, play_path = remove_start(item['path'], '/').split('/', 1)
formats.append({
'url': 'rtmpe://fms.rtl.de',
'app': app,
'play_path': 'mp4:%s' % play_path,
'ext': 'flv',
'page_url': 'http://rtlnow.rtl.de',
'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
'tbr': int_or_none(item.get('bitrate')),
})
self._sort_formats(formats)
class NowTVListIE(NowTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$'
title = info['title']
description = info.get('articleLong') or info.get('articleShort')
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
duration = parse_duration(info.get('duration'))
_SHOW_FIELDS = ('title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
f = info.get('format', {})
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
_TESTS = [{
'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell',
'info_dict': {
'id': '17006',
'title': 'stern TV - Aktuell',
},
'playlist_count': 1,
}, {
'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8',
'info_dict': {
'id': '20716',
'title': 'Das Supertalent - FREE Staffel 8',
},
'playlist_count': 14,
}]
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_id = mobj.group('show_id')
season_id = mobj.group('id')
fields = []
fields.extend(self._SHOW_FIELDS)
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
fields.extend(
'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
list_info = self._download_json(
'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php'
% (','.join(fields), show_id),
season_id)
season = next(
season for season in list_info['formatTabs']['items']
if season.get('seoheadline') == season_id)
title = '%s - %s' % (list_info['title'], season['headline'])
entries = []
for container in season['formatTabPages']['items']:
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
entries.append(self._extract_video(info))
return self.playlist_result(
entries, compat_str(season.get('id') or season_id), title)

View File

@@ -1,28 +0,0 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.ch'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
'info_dict': {
'id': '0mw0yow7b6dxa',
'ext': 'flv',
'title': 'youtubedl test video _BaW_jenozKc.mp4',
'description': 'Description',
}
}

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
parse_duration,
@@ -48,12 +49,22 @@ class NRKIE(InfoExtractor):
'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
video_id, 'Downloading media JSON')
if data['usageRights']['isGeoBlocked']:
raise ExtractorError(
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
expected=True)
media_url = data.get('mediaUrl')
video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
if not media_url:
if data['usageRights']['isGeoBlocked']:
raise ExtractorError(
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
expected=True)
if determine_ext(media_url) == 'f4m':
formats = self._extract_f4m_formats(
media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
else:
formats = [{
'url': media_url,
'ext': 'flv',
}]
duration = parse_duration(data.get('duration'))
@@ -67,12 +78,11 @@ class NRKIE(InfoExtractor):
return {
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': data['title'],
'description': data['description'],
'duration': duration,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -3,11 +3,9 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
parse_duration,
sanitized_Request,
unified_strdate,
)
@@ -33,7 +31,7 @@ class NuvidIE(InfoExtractor):
formats = []
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
request = compat_urllib_request.Request(
request = sanitized_Request(
'http://m.nuvid.com/play/%s' % video_id)
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
webpage = self._download_webpage(

View File

@@ -1,108 +1,69 @@
from __future__ import unicode_literals
import re
import json
import base64
from .common import InfoExtractor
from ..utils import (
unescapeHTML,
ExtractorError,
determine_ext,
int_or_none,
float_or_none,
ExtractorError,
unsmuggle_url,
)
from ..compat import compat_urllib_parse
class OoyalaBaseIE(InfoExtractor):
def _extract_result(self, info, more_info):
embedCode = info['embedCode']
video_url = info.get('ipad_url') or info['url']
if determine_ext(video_url) == 'm3u8':
formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
else:
formats = [{
'url': video_url,
'ext': 'mp4',
}]
return {
'id': embedCode,
'title': unescapeHTML(info['title']),
'formats': formats,
'description': unescapeHTML(more_info['description']),
'thumbnail': more_info['promo'],
def _extract(self, content_tree_url, video_id, domain='example.org'):
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code']
pcode = metadata.get('asset_pcode') or embed_code
video_info = {
'id': embed_code,
'title': metadata['title'],
'description': metadata.get('description'),
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
'duration': float_or_none(metadata.get('duration'), 1000),
}
def _extract(self, player_url, video_id):
player = self._download_webpage(player_url, video_id)
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, 'mobile player url')
# Looks like some videos are only available for particular devices
# (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
# is only available for ipad)
# Working around with fetching URLs for all the devices found starting with 'unknown'
# until we succeed or eventually fail for each device.
devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
devices.remove('unknown')
devices.insert(0, 'unknown')
for device in devices:
mobile_player = self._download_webpage(
'%s&device=%s' % (mobile_url, device), video_id,
'Downloading mobile player JS for %s device' % device)
videos_info = self._search_regex(
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
mobile_player, 'info', fatal=False, default=None)
if videos_info:
break
if not videos_info:
formats = []
formats = []
for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
auth_data = self._download_json(
'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id),
video_id)
'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}),
video_id, 'Downloading %s JSON' % supported_format)
cur_auth_data = auth_data['authorization_data'][video_id]
cur_auth_data = auth_data['authorization_data'][embed_code]
for stream in cur_auth_data['streams']:
formats.append({
'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
'ext': stream.get('delivery_type'),
'format': stream.get('video_codec'),
'format_id': stream.get('profile'),
'width': int_or_none(stream.get('width')),
'height': int_or_none(stream.get('height')),
'abr': int_or_none(stream.get('audio_bitrate')),
'vbr': int_or_none(stream.get('video_bitrate')),
})
if formats:
return {
'id': video_id,
'formats': formats,
'title': 'Ooyala video',
}
if cur_auth_data['authorized']:
for stream in cur_auth_data['streams']:
url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8')
delivery_type = stream['delivery_type']
if delivery_type == 'remote_asset':
video_info['url'] = url
return video_info
if delivery_type == 'hls':
formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
elif delivery_type == 'hds':
formats.extend(self._extract_f4m_formats(url, embed_code, -1, 'hds', fatal=False))
else:
formats.append({
'url': url,
'ext': stream.get('delivery_type'),
'vcodec': stream.get('video_codec'),
'format_id': '%s-%s-%sp' % (stream.get('profile'), delivery_type, stream.get('height')),
'width': int_or_none(stream.get('width')),
'height': int_or_none(stream.get('height')),
'abr': int_or_none(stream.get('audio_bitrate')),
'vbr': int_or_none(stream.get('video_bitrate')),
'fps': float_or_none(stream.get('framerate')),
})
else:
raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True)
self._sort_formats(formats)
if not cur_auth_data['authorized']:
raise ExtractorError(cur_auth_data['message'], expected=True)
if not videos_info:
raise ExtractorError('Unable to extract info')
videos_info = videos_info.replace('\\"', '"')
videos_more_info = self._search_regex(
r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
videos_info = json.loads(videos_info)
videos_more_info = json.loads(videos_more_info)
if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
return {
'_type': 'playlist',
'id': video_id,
'title': unescapeHTML(videos_more_info['title']),
'entries': videos,
}
else:
return self._extract_result(videos_info[0], videos_more_info)
video_info['formats'] = formats
return video_info
class OoyalaIE(OoyalaBaseIE):
@@ -117,6 +78,7 @@ class OoyalaIE(OoyalaBaseIE):
'ext': 'mp4',
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
'duration': 853.386,
},
}, {
# Only available for ipad
@@ -125,7 +87,7 @@ class OoyalaIE(OoyalaBaseIE):
'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
'ext': 'mp4',
'title': 'Simulation Overview - Levels of Simulation',
'description': '',
'duration': 194.948,
},
},
{
@@ -136,7 +98,8 @@ class OoyalaIE(OoyalaBaseIE):
'info_dict': {
'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
'ext': 'mp4',
'title': 'Ooyala video',
'title': 'Divide Tool Path.mp4',
'duration': 204.405,
}
}
]
@@ -151,9 +114,11 @@ class OoyalaIE(OoyalaBaseIE):
ie=cls.ie_key())
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url)
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
return self._extract(player_url, embed_code)
domain = smuggled_data.get('domain')
content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code)
return self._extract(content_tree_url, embed_code, domain)
class OoyalaExternalIE(OoyalaBaseIE):
@@ -170,7 +135,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
.*?&pcode=
)
(?P<pcode>.+?)
(&|$)
(?:&|$)
'''
_TEST = {
@@ -179,7 +144,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'ext': 'mp4',
'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
'description': '',
'duration': 1302000,
},
'params': {
# m3u8 download
@@ -188,9 +153,6 @@ class OoyalaExternalIE(OoyalaBaseIE):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
pcode = mobj.group('pcode')
player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode)
return self._extract(player_url, video_id)
partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id)
return self._extract(content_tree_url, video_id)

View File

@@ -2,9 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
js_to_json,
)
from ..utils import js_to_json
class PatreonIE(InfoExtractor):
@@ -65,7 +63,7 @@ class PatreonIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(
request = sanitized_Request(
'https://www.patreon.com/processLogin',
compat_urllib_parse.urlencode(login_form).encode('utf-8')
)

View File

@@ -15,16 +15,489 @@ from ..utils import (
class PBSIE(InfoExtractor):
_STATIONS = (
('pbs.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
('aptv.org', 'APT - Alabama Public Television (WBIQ)'), # AL, http://aptv.org/
('gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # AL, http://www.gpb.org/
('mpbonline.org', 'Mississippi Public Broadcasting (WMPN)'), # AL, http://www.mpbonline.org
('wnpt.org', 'Nashville Public Television (WNPT)'), # AL, http://www.wnpt.org
('wfsu.org', 'WFSU-TV (WFSU)'), # AL, http://wfsu.org/
('wsre.org', 'WSRE (WSRE)'), # AL, http://www.wsre.org
('wtcitv.org', 'WTCI (WTCI)'), # AL, http://www.wtcitv.org
('pba.org', 'WPBA/Channel 30 (WPBA)'), # AL, http://pba.org/
('alaskapublic.org', 'Alaska Public Media (KAKM)'), # AK, http://alaskapublic.org/kakm
('kuac.org', 'KUAC (KUAC)'), # AK, http://kuac.org/kuac-tv/
('ktoo.org', '360 North (KTOO)'), # AK, http://www.ktoo.org/
('azpm.org', 'KUAT 6 (KUAT)'), # AZ, http://www.azpm.org/
('azpbs.org', 'Arizona PBS (KAET)'), # AZ, http://www.azpbs.org
('newmexicopbs.org', 'KNME-TV/Channel 5 (KNME)'), # AZ, http://www.newmexicopbs.org/
('vegaspbs.org', 'Vegas PBS (KLVX)'), # AZ, http://vegaspbs.org/
('aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # AR, http://www.aetn.org/
('ket.org', 'KET (WKLE)'), # AR, http://www.ket.org/
('wkno.org', 'WKNO/Channel 10 (WKNO)'), # AR, http://www.wkno.org/
('lpb.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # AR, http://www.lpb.org/
('mpbonline.org', 'Mississippi Public Broadcasting (WMPN)'), # AR, http://www.mpbonline.org
('oeta.tv', 'OETA (KETA)'), # AR, http://www.oeta.tv
('optv.org', 'Ozarks Public Television (KOZK)'), # AR, http://www.optv.org/
('wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # AR, http://www.wsiu.org/
('keet.org', 'KEET TV (KEET)'), # CA, http://www.keet.org
('kixe.org', 'KIXE/Channel 9 (KIXE)'), # CA, http://kixe.org/
('kpbs.org', 'KPBS San Diego (KPBS)'), # CA, http://www.kpbs.org/
('kqed.org', 'KQED (KQED)'), # CA, http://www.kqed.org
('kvie.org', 'KVIE Public Television (KVIE)'), # CA, http://www.kvie.org
('pbssocal.org', 'PBS SoCal/KOCE (KOCE)'), # CA, http://www.pbssocal.org/
('valleypbs.org', 'ValleyPBS (KVPT)'), # CA, http://www.valleypbs.org/
('aptv.org', 'APT - Alabama Public Television (WBIQ)'), # CA, http://aptv.org/
('cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # CA, http://cptv.org
('azpbs.org', 'Arizona PBS (KAET)'), # CA, http://www.azpbs.org
('knpb.org', 'KNPB Channel 5 (KNPB)'), # CA, http://www.knpb.org/
('soptv.org', 'SOPTV (KSYS)'), # CA, http://www.soptv.org
('klcs.org', 'KLCS/Channel 58 (KLCS)'), # CA, http://www.klcs.org
('krcb.org', 'KRCB Television & Radio (KRCB)'), # CA, http://www.krcb.org
('kvcr.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # CA, http://kvcr.org
('rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # CO, http://www.rmpbs.org
('kenw.org', 'KENW-TV3 (KENW)'), # CO, http://www.kenw.org
('kued.org', 'KUED Channel 7 (KUED)'), # CO, http://www.kued.org
('newmexicopbs.org', 'KNME-TV/Channel 5 (KNME)'), # CO, http://www.newmexicopbs.org/
('wyomingpbs.org', 'Wyoming PBS (KCWC)'), # CO, http://www.wyomingpbs.org
('cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # CO, http://www.cpt12.org/
('kbyutv.org', 'KBYU-TV (KBYU)'), # CO, http://www.kbyutv.org/
('cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # CT, http://cptv.org
('thirteen.org', 'Thirteen/WNET New York (WNET)'), # CT, http://www.thirteen.org
('wgbh.org', 'WGBH/Channel 2 (WGBH)'), # CT, http://wgbh.org
('wgby.org', 'WGBY (WGBY)'), # CT, http://www.wgby.org
('njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # CT, http://www.njtvonline.org/
('ripbs.org', 'Rhode Island PBS (WSBE)'), # CT, http://www.ripbs.org/home/
('wliw.org', 'WLIW21 (WLIW)'), # CT, http://www.wliw.org/
('mpt.org', 'mpt/Maryland Public Television (WMPB)'), # DE, http://www.mpt.org
('weta.org', 'WETA Television and Radio (WETA)'), # DE, http://www.weta.org
('whyy.org', 'WHYY (WHYY)'), # DE, http://www.whyy.org
('njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # DE, http://www.njtvonline.org/
('wlvt.org', 'PBS 39 (WLVT)'), # DE, http://www.wlvt.org/
('wliw.org', 'WLIW21 (WLIW)'), # DE, http://www.wliw.org/
('weta.org', 'WETA Television and Radio (WETA)'), # DC, http://www.weta.org
('mpt.org', 'mpt/Maryland Public Television (WMPB)'), # DC, http://www.mpt.org
('wvpt.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # DC, http://www.wvpt.net
('whut.org', 'Howard University Television (WHUT)'), # DC, http://www.whut.org
('wedu.org', 'WEDU PBS (WEDU)'), # FL, http://www.wedu.org
('wfsu.org', 'WFSU-TV (WFSU)'), # FL, http://wfsu.org/
('wgcu.org', 'WGCU Public Media (WGCU)'), # FL, http://www.wgcu.org/
('wjct.org', 'WJCT Public Broadcasting (WJCT)'), # FL, http://www.wjct.org
('wpbt2.org', 'WPBT2 (WPBT)'), # FL, http://www.wpbt2.org
('wsre.org', 'WSRE (WSRE)'), # FL, http://www.wsre.org
('wucftv.org', 'WUCF TV (WUCF)'), # FL, http://wucftv.org
('wuft.org', 'WUFT/Channel 5 (WUFT)'), # FL, http://www.wuft.org
('wxel.org', 'WXEL/Channel 42 (WXEL)'), # FL, http://www.wxel.org/home/
('aptv.org', 'APT - Alabama Public Television (WBIQ)'), # FL, http://aptv.org/
('gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # FL, http://www.gpb.org/
('wlrn.org', 'WLRN/Channel 17 (WLRN)'), # FL, http://www.wlrn.org/
('wusf.org', 'WUSF Public Broadcasting (WUSF)'), # FL, http://wusf.org/
('njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # FL, http://www.njtvonline.org/
('gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # GA, http://www.gpb.org/
('aptv.org', 'APT - Alabama Public Television (WBIQ)'), # GA, http://aptv.org/
('scetv.org', 'ETV (WRLK)'), # GA, http://www.scetv.org
('unctv.org', 'UNC-TV (WUNC)'), # GA, http://www.unctv.org/
('wfsu.org', 'WFSU-TV (WFSU)'), # GA, http://wfsu.org/
('wjct.org', 'WJCT Public Broadcasting (WJCT)'), # GA, http://www.wjct.org
('wtcitv.org', 'WTCI (WTCI)'), # GA, http://www.wtcitv.org
('pba.org', 'WPBA/Channel 30 (WPBA)'), # GA, http://pba.org/
('pbsguam.org', 'PBS Guam (KGTF)'), # GU, http://www.pbsguam.org/
('pbshawaii.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # HI, http://www.pbshawaii.org/
('idahoptv.org', 'Idaho Public Television (KAID)'), # ID, http://idahoptv.org
('ksps.org', 'KSPS (KSPS)'), # ID, http://www.ksps.org/home/
('kued.org', 'KUED Channel 7 (KUED)'), # ID, http://www.kued.org
('opb.org', 'OPB (KOPB)'), # ID, http://www.opb.org
('soptv.org', 'SOPTV (KSYS)'), # ID, http://www.soptv.org
('wyomingpbs.org', 'Wyoming PBS (KCWC)'), # ID, http://www.wyomingpbs.org
('cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # ID, http://www.cpt12.org/
('kbyutv.org', 'KBYU-TV (KBYU)'), # ID, http://www.kbyutv.org/
('kwsu.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # ID, http://www.kwsu.org
('illinois.edu', 'WILL-TV (WILL)'), # IL, http://will.illinois.edu/
('wsec.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # IL, http://www.wsec.tv
('wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # IL, http://www.wsiu.org/
('wttw.com', 'WTTW11 (WTTW)'), # IL, http://www.wttw.com/
('wtvp.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # IL, http://www.wtvp.org/
('iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # IL, http://www.iptv.org/
('ket.org', 'KET (WKLE)'), # IL, http://www.ket.org/
('ninenet.org', 'Nine Network (KETC)'), # IL, http://www.ninenet.org
('wfwa.org', 'PBS39 Fort Wayne (WFWA)'), # IL, http://wfwa.org/
('wfyi.org', 'WFYI Indianapolis (WFYI)'), # IL, http://www.wfyi.org
('mptv.org', 'Milwaukee Public Television (WMVS)'), # IL, http://www.mptv.org
('wnin.org', 'WNIN (WNIN)'), # IL, http://www.wnin.org/
('wnit.org', 'WNIT Public Television (WNIT)'), # IL, http://www.wnit.org/
('wpt.org', 'WPT (WPNE)'), # IL, http://www.wpt.org/
('wvut.org', 'WVUT/Channel 22 (WVUT)'), # IL, http://wvut.org/
('weiu.net', 'WEIU/Channel 51 (WEIU)'), # IL, http://www.weiu.net
('wqpt.org', 'WQPT-TV (WQPT)'), # IL, http://www.wqpt.org
('wycc.org', 'WYCC PBS Chicago (WYCC)'), # IL, http://www.wycc.org
('lakeshorepublicmedia.org', 'Lakeshore Public Television (WYIN)'), # IL, http://lakeshorepublicmedia.org/
('wfwa.org', 'PBS39 Fort Wayne (WFWA)'), # IN, http://wfwa.org/
('wfyi.org', 'WFYI Indianapolis (WFYI)'), # IN, http://www.wfyi.org
('wipb.org', 'WIPB-TV (WIPB)'), # IN, http://wipb.org
('wnin.org', 'WNIN (WNIN)'), # IN, http://www.wnin.org/
('wnit.org', 'WNIT Public Television (WNIT)'), # IN, http://www.wnit.org/
('indianapublicmedia.org', 'WTIU (WTIU)'), # IN, http://indianapublicmedia.org/tv/
('wvut.org', 'WVUT/Channel 22 (WVUT)'), # IN, http://wvut.org/
('cetconnect.org', 'CET (WCET)'), # IN, http://www.cetconnect.org
('ket.org', 'KET (WKLE)'), # IN, http://www.ket.org/
('thinktv.org', 'ThinkTVNetwork (WPTD)'), # IN, http://www.thinktv.org
('wbgu.org', 'WBGU-TV (WBGU)'), # IN, http://wbgu.org
('wgvu.org', 'WGVU TV (WGVU)'), # IN, http://www.wgvu.org/
('illinois.edu', 'WILL-TV (WILL)'), # IN, http://will.illinois.edu/
('wsec.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # IN, http://www.wsec.tv
('wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # IN, http://www.wsiu.org/
('wttw.com', 'WTTW11 (WTTW)'), # IN, http://www.wttw.com/
('lakeshorepublicmedia.org', 'Lakeshore Public Television (WYIN)'), # IN, http://lakeshorepublicmedia.org/
('weiu.net', 'WEIU/Channel 51 (WEIU)'), # IN, http://www.weiu.net
('wycc.org', 'WYCC PBS Chicago (WYCC)'), # IN, http://www.wycc.org
('iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # IA, http://www.iptv.org/
('netnebraska.org', 'NET1 (KUON)'), # IA, http://netnebraska.org
('pioneer.org', 'Pioneer Public Television (KWCM)'), # IA, http://www.pioneer.org
('sdpb.org', 'SDPB Television (KUSD)'), # IA, http://www.sdpb.org
('tpt.org', 'TPT (KTCA)'), # IA, http://www.tpt.org
('wsec.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # IA, http://www.wsec.tv
('wpt.org', 'WPT (WPNE)'), # IA, http://www.wpt.org/
('ksmq.org', 'KSMQ (KSMQ)'), # IA, http://www.ksmq.org/
('wqpt.org', 'WQPT-TV (WQPT)'), # IA, http://www.wqpt.org
('kpts.org', 'KPTS/Channel 8 (KPTS)'), # KS, http://www.kpts.org/
('ktwu.org', 'KTWU/Channel 11 (KTWU)'), # KS, http://ktwu.org
('shptv.org', 'Smoky Hills Public Television (KOOD)'), # KS, http://www.shptv.org
('kcpt.org', 'KCPT Kansas City Public Television (KCPT)'), # KS, http://kcpt.org/
('netnebraska.org', 'NET1 (KUON)'), # KS, http://netnebraska.org
('oeta.tv', 'OETA (KETA)'), # KS, http://www.oeta.tv
('optv.org', 'Ozarks Public Television (KOZK)'), # KS, http://www.optv.org/
('rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # KS, http://www.rmpbs.org
('cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # KS, http://www.cpt12.org/
('ket.org', 'KET (WKLE)'), # KY, http://www.ket.org/
('blueridgepbs.org', 'Blue Ridge PBS (WBRA)'), # KY, http://www.blueridgepbs.org/
('cetconnect.org', 'CET (WCET)'), # KY, http://www.cetconnect.org
('easttennesseepbs.org', 'East Tennessee PBS (WSJK)'), # KY, http://easttennesseepbs.org
('wnpt.org', 'Nashville Public Television (WNPT)'), # KY, http://www.wnpt.org
('thinktv.org', 'ThinkTVNetwork (WPTD)'), # KY, http://www.thinktv.org
('wcte.org', 'WCTE-TV (WCTE)'), # KY, http://www.wcte.org
('wljt.org', 'WLJT, Channel 11 (WLJT)'), # KY, http://wljt.org/
('wnin.org', 'WNIN (WNIN)'), # KY, http://www.wnin.org/
('wosu.org', 'WOSU TV (WOSU)'), # KY, http://wosu.org/
('woub.org', 'WOUB/WOUC (WOUB)'), # KY, http://woub.org/tv/index.php?section=5
('wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # KY, http://www.wsiu.org/
('wvpublic.org', 'WVPB (WVPB)'), # KY, http://wvpublic.org/
('wkyupbs.org', 'WKYU-PBS (WKYU)'), # KY, http://www.wkyupbs.org
('lpb.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # LA, http://www.lpb.org/
('wyes.org', 'WYES-TV/New Orleans (WYES)'), # LA, http://www.wyes.org
('aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # LA, http://www.aetn.org/
('kera.org', 'KERA 13 (KERA)'), # LA, http://www.kera.org/
('mpbonline.org', 'Mississippi Public Broadcasting (WMPN)'), # LA, http://www.mpbonline.org
('mpbn.net', 'MPBN (WCBB)'), # ME, http://www.mpbn.net/
('mountainlake.org', 'Mountain Lake PBS (WCFE)'), # ME, http://www.mountainlake.org/
('nhptv.org', 'NHPTV (WENH)'), # ME, http://nhptv.org/
('vpt.org', 'Vermont PBS (WETK)'), # ME, http://www.vpt.org
('wgbh.org', 'WGBH/Channel 2 (WGBH)'), # ME, http://wgbh.org
('mpt.org', 'mpt/Maryland Public Television (WMPB)'), # MD, http://www.mpt.org
('thirteen.org', 'Thirteen/WNET New York (WNET)'), # MD, http://www.thirteen.org
('weta.org', 'WETA Television and Radio (WETA)'), # MD, http://www.weta.org
('whyy.org', 'WHYY (WHYY)'), # MD, http://www.whyy.org
('witf.org', 'witf (WITF)'), # MD, http://www.witf.org
('wqed.org', 'WQED Multimedia (WQED)'), # MD, http://www.wqed.org/
('wvpublic.org', 'WVPB (WVPB)'), # MD, http://wvpublic.org/
('wvpt.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # MD, http://www.wvpt.net
('whut.org', 'Howard University Television (WHUT)'), # MD, http://www.whut.org
('wliw.org', 'WLIW21 (WLIW)'), # MD, http://www.wliw.org/
('wgbh.org', 'WGBH/Channel 2 (WGBH)'), # MA, http://wgbh.org
('wgby.org', 'WGBY (WGBY)'), # MA, http://www.wgby.org
('cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # MA, http://cptv.org
('nhptv.org', 'NHPTV (WENH)'), # MA, http://nhptv.org/
('vpt.org', 'Vermont PBS (WETK)'), # MA, http://www.vpt.org
('wmht.org', 'WMHT Educational Telecommunications (WMHT)'), # MA, http://www.wmht.org/home/
('ripbs.org', 'Rhode Island PBS (WSBE)'), # MA, http://www.ripbs.org/home/
('deltabroadcasting.org', 'Q-TV (WDCQ)'), # MI, http://www.deltabroadcasting.org
('dptv.org', 'WTVS Detroit Public TV (WTVS)'), # MI, http://www.dptv.org/
('wcmu.org', 'CMU Public Television (WCMU)'), # MI, http://www.wcmu.org
('wgvu.org', 'WGVU TV (WGVU)'), # MI, http://www.wgvu.org/
('wkar.org', 'WKAR-TV (WKAR)'), # MI, http://wkar.org/
('nmu.edu', 'WNMU-TV Public TV 13 (WNMU)'), # MI, http://wnmutv.nmu.edu
('wdse.org', 'WDSE - WRPT (WDSE)'), # MI, http://www.wdse.org/
('wbgu.org', 'WBGU-TV (WBGU)'), # MI, http://wbgu.org
('wgte.org', 'WGTE TV (WGTE)'), # MI, http://www.wgte.org
('wnit.org', 'WNIT Public Television (WNIT)'), # MI, http://www.wnit.org/
('wpt.org', 'WPT (WPNE)'), # MI, http://www.wpt.org/
('wttw.com', 'WTTW11 (WTTW)'), # MI, http://www.wttw.com/
('wycc.org', 'WYCC PBS Chicago (WYCC)'), # MI, http://www.wycc.org
('lakelandptv.org', 'Lakeland Public Television (KAWE)'), # MN, http://www.lakelandptv.org
('wdse.org', 'WDSE - WRPT (WDSE)'), # MN, http://www.wdse.org/
('pioneer.org', 'Pioneer Public Television (KWCM)'), # MN, http://www.pioneer.org
('tpt.org', 'TPT (KTCA)'), # MN, http://www.tpt.org
('iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # MN, http://www.iptv.org/
('prairiepublic.org', 'PRAIRIE PUBLIC (KFME)'), # MN, http://www.prairiepublic.org/
('ninenet.org', 'Nine Network (KETC)'), # MN, http://www.ninenet.org
('sdpb.org', 'SDPB Television (KUSD)'), # MN, http://www.sdpb.org
('wpt.org', 'WPT (WPNE)'), # MN, http://www.wpt.org/
('ksmq.org', 'KSMQ (KSMQ)'), # MN, http://www.ksmq.org/
('mpbonline.org', 'Mississippi Public Broadcasting (WMPN)'), # MS, http://www.mpbonline.org
('aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # MS, http://www.aetn.org/
('aptv.org', 'APT - Alabama Public Television (WBIQ)'), # MS, http://aptv.org/
('wkno.org', 'WKNO/Channel 10 (WKNO)'), # MS, http://www.wkno.org/
('lpb.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # MS, http://www.lpb.org/
('wsre.org', 'WSRE (WSRE)'), # MS, http://www.wsre.org
('wyes.org', 'WYES-TV/New Orleans (WYES)'), # MS, http://www.wyes.org
('kcpt.org', 'KCPT Kansas City Public Television (KCPT)'), # MO, http://kcpt.org/
('kmos.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # MO, http://www.kmos.org/
('ninenet.org', 'Nine Network (KETC)'), # MO, http://www.ninenet.org
('optv.org', 'Ozarks Public Television (KOZK)'), # MO, http://www.optv.org/
('aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # MO, http://www.aetn.org/
('dptv.org', 'WTVS Detroit Public TV (WTVS)'), # MO, http://www.dptv.org/
('iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # MO, http://www.iptv.org/
('ket.org', 'KET (WKLE)'), # MO, http://www.ket.org/
('wkno.org', 'WKNO/Channel 10 (WKNO)'), # MO, http://www.wkno.org/
('ktwu.org', 'KTWU/Channel 11 (KTWU)'), # MO, http://ktwu.org
('netnebraska.org', 'NET1 (KUON)'), # MO, http://netnebraska.org
('rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # MO, http://www.rmpbs.org
('wsec.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # MO, http://www.wsec.tv
('wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # MO, http://www.wsiu.org/
('montanapbs.org', 'MontanaPBS (KUSM)'), # MT, http://montanapbs.org
('azpbs.org', 'Arizona PBS (KAET)'), # MT, http://www.azpbs.org
('idahoptv.org', 'Idaho Public Television (KAID)'), # MT, http://idahoptv.org
('prairiepublic.org', 'PRAIRIE PUBLIC (KFME)'), # MT, http://www.prairiepublic.org/
('ksps.org', 'KSPS (KSPS)'), # MT, http://www.ksps.org/home/
('rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # MT, http://www.rmpbs.org
('sdpb.org', 'SDPB Television (KUSD)'), # MT, http://www.sdpb.org
('netnebraska.org', 'NET1 (KUON)'), # NE, http://netnebraska.org
('iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # NE, http://www.iptv.org/
('ktwu.org', 'KTWU/Channel 11 (KTWU)'), # NE, http://ktwu.org
('pioneer.org', 'Pioneer Public Television (KWCM)'), # NE, http://www.pioneer.org
('rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # NE, http://www.rmpbs.org
('sdpb.org', 'SDPB Television (KUSD)'), # NE, http://www.sdpb.org
('wyomingpbs.org', 'Wyoming PBS (KCWC)'), # NE, http://www.wyomingpbs.org
('cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # NE, http://www.cpt12.org/
('knpb.org', 'KNPB Channel 5 (KNPB)'), # NV, http://www.knpb.org/
('vegaspbs.org', 'Vegas PBS (KLVX)'), # NV, http://vegaspbs.org/
('azpbs.org', 'Arizona PBS (KAET)'), # NV, http://www.azpbs.org
('kued.org', 'KUED Channel 7 (KUED)'), # NV, http://www.kued.org
('pbssocal.org', 'PBS SoCal/KOCE (KOCE)'), # NV, http://www.pbssocal.org/
('kbyutv.org', 'KBYU-TV (KBYU)'), # NV, http://www.kbyutv.org/
('nhptv.org', 'NHPTV (WENH)'), # NH, http://nhptv.org/
('scetv.org', 'ETV (WRLK)'), # NH, http://www.scetv.org
('mountainlake.org', 'Mountain Lake PBS (WCFE)'), # NH, http://www.mountainlake.org/
('mpbn.net', 'MPBN (WCBB)'), # NH, http://www.mpbn.net/
('vpt.org', 'Vermont PBS (WETK)'), # NH, http://www.vpt.org
('wgbh.org', 'WGBH/Channel 2 (WGBH)'), # NH, http://wgbh.org
('wgby.org', 'WGBY (WGBY)'), # NH, http://www.wgby.org
('cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # NJ, http://cptv.org
('thirteen.org', 'Thirteen/WNET New York (WNET)'), # NJ, http://www.thirteen.org
('whyy.org', 'WHYY (WHYY)'), # NJ, http://www.whyy.org
('njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # NJ, http://www.njtvonline.org/
('wlvt.org', 'PBS 39 (WLVT)'), # NJ, http://www.wlvt.org/
('wliw.org', 'WLIW21 (WLIW)'), # NJ, http://www.wliw.org/
('kenw.org', 'KENW-TV3 (KENW)'), # NM, http://www.kenw.org
('krwg.org', 'KRWG/Channel 22 (KRWG)'), # NM, http://www.krwg.org
('newmexicopbs.org', 'KNME-TV/Channel 5 (KNME)'), # NM, http://www.newmexicopbs.org/
('azpm.org', 'KUAT 6 (KUAT)'), # NM, http://www.azpm.org/
('panhandlepbs.org', 'KACV (KACV)'), # NM, http://www.panhandlepbs.org/home/
('kcostv.org', 'KCOS/Channel 13 (KCOS)'), # NM, www.kcostv.org
('kued.org', 'KUED Channel 7 (KUED)'), # NM, http://www.kued.org
('oeta.tv', 'OETA (KETA)'), # NM, http://www.oeta.tv
('rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # NM, http://www.rmpbs.org
('cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # NM, http://www.cpt12.org/
('kbyutv.org', 'KBYU-TV (KBYU)'), # NM, http://www.kbyutv.org/
('mountainlake.org', 'Mountain Lake PBS (WCFE)'), # NY, http://www.mountainlake.org/
('thirteen.org', 'Thirteen/WNET New York (WNET)'), # NY, http://www.thirteen.org
('wcny.org', 'WCNY/Channel 24 (WCNY)'), # NY, http://www.wcny.org
('wmht.org', 'WMHT Educational Telecommunications (WMHT)'), # NY, http://www.wmht.org/home/
('wned.org', 'WNED (WNED)'), # NY, http://www.wned.org/
('wpbstv.org', 'WPBS (WPBS)'), # NY, http://www.wpbstv.org
('wskg.org', 'WSKG Public TV (WSKG)'), # NY, http://wskg.org
('wxxi.org', 'WXXI (WXXI)'), # NY, http://wxxi.org
('cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # NY, http://cptv.org
('nhptv.org', 'NHPTV (WENH)'), # NY, http://nhptv.org/
('vpt.org', 'Vermont PBS (WETK)'), # NY, http://www.vpt.org
('wgbh.org', 'WGBH/Channel 2 (WGBH)'), # NY, http://wgbh.org
('wpsu.org', 'WPSU (WPSU)'), # NY, http://www.wpsu.org
('wqln.org', 'WQLN/Channel 54 (WQLN)'), # NY, http://www.wqln.org
('wvia.org', 'WVIA Public Media Studios (WVIA)'), # NY, http://www.wvia.org/
('wliw.org', 'WLIW21 (WLIW)'), # NY, http://www.wliw.org/
('njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # NY, http://www.njtvonline.org/
('unctv.org', 'UNC-TV (WUNC)'), # NC, http://www.unctv.org/
('wtvi.org', 'WTVI (WTVI)'), # NC, http://www.wtvi.org/
('blueridgepbs.org', 'Blue Ridge PBS (WBRA)'), # NC, http://www.blueridgepbs.org/
('scetv.org', 'ETV (WRLK)'), # NC, http://www.scetv.org
('gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # NC, http://www.gpb.org/
('whro.org', 'WHRO (WHRO)'), # NC, http://whro.org
('wtcitv.org', 'WTCI (WTCI)'), # NC, http://www.wtcitv.org
('pba.org', 'WPBA/Channel 30 (WPBA)'), # NC, http://pba.org/
('prairiepublic.org', 'PRAIRIE PUBLIC (KFME)'), # ND, http://www.prairiepublic.org/
('sdpb.org', 'SDPB Television (KUSD)'), # ND, http://www.sdpb.org
('cetconnect.org', 'CET (WCET)'), # OH, http://www.cetconnect.org
('thinktv.org', 'ThinkTVNetwork (WPTD)'), # OH, http://www.thinktv.org
('wbgu.org', 'WBGU-TV (WBGU)'), # OH, http://wbgu.org
('wgte.org', 'WGTE TV (WGTE)'), # OH, http://www.wgte.org
('WesternReservePublicMedia.org', 'Western Reserve PBS (WNEO)'), # OH, http://www.WesternReservePublicMedia.org/
('wosu.org', 'WOSU TV (WOSU)'), # OH, http://wosu.org/
('woub.org', 'WOUB/WOUC (WOUB)'), # OH, http://woub.org/tv/index.php?section=5
('wviz.org', 'WVIZ/PBS ideastream (WVIZ)'), # OH, http://www.wviz.org/
('dptv.org', 'WTVS Detroit Public TV (WTVS)'), # OH, http://www.dptv.org/
('ket.org', 'KET (WKLE)'), # OH, http://www.ket.org/
('wfwa.org', 'PBS39 Fort Wayne (WFWA)'), # OH, http://wfwa.org/
('wipb.org', 'WIPB-TV (WIPB)'), # OH, http://wipb.org
('wqed.org', 'WQED Multimedia (WQED)'), # OH, http://www.wqed.org/
('wqln.org', 'WQLN/Channel 54 (WQLN)'), # OH, http://www.wqln.org
('wvpublic.org', 'WVPB (WVPB)'), # OH, http://wvpublic.org/
('oeta.tv', 'OETA (KETA)'), # OK, http://www.oeta.tv
('aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # OK, http://www.aetn.org/
('panhandlepbs.org', 'KACV (KACV)'), # OK, http://www.panhandlepbs.org/home/
('kera.org', 'KERA 13 (KERA)'), # OK, http://www.kera.org/
('lpb.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # OK, http://www.lpb.org/
('optv.org', 'Ozarks Public Television (KOZK)'), # OK, http://www.optv.org/
('opb.org', 'OPB (KOPB)'), # OR, http://www.opb.org
('soptv.org', 'SOPTV (KSYS)'), # OR, http://www.soptv.org
('idahoptv.org', 'Idaho Public Television (KAID)'), # OR, http://idahoptv.org
('kcts9.org', 'KCTS 9 (KCTS)'), # OR, http://kcts9.org/
('keet.org', 'KEET TV (KEET)'), # OR, http://www.keet.org
('ksps.org', 'KSPS (KSPS)'), # OR, http://www.ksps.org/home/
('kwsu.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # OR, http://www.kwsu.org
('whyy.org', 'WHYY (WHYY)'), # PA, http://www.whyy.org
('witf.org', 'witf (WITF)'), # PA, http://www.witf.org
('wpsu.org', 'WPSU (WPSU)'), # PA, http://www.wpsu.org
('wqed.org', 'WQED Multimedia (WQED)'), # PA, http://www.wqed.org/
('wqln.org', 'WQLN/Channel 54 (WQLN)'), # PA, http://www.wqln.org
('wvia.org', 'WVIA Public Media Studios (WVIA)'), # PA, http://www.wvia.org/
('cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # PA, http://cptv.org
('mpt.org', 'mpt/Maryland Public Television (WMPB)'), # PA, http://www.mpt.org
('thirteen.org', 'Thirteen/WNET New York (WNET)'), # PA, http://www.thirteen.org
('weta.org', 'WETA Television and Radio (WETA)'), # PA, http://www.weta.org
('wned.org', 'WNED (WNED)'), # PA, http://www.wned.org/
('WesternReservePublicMedia.org', 'Western Reserve PBS (WNEO)'), # PA, http://www.WesternReservePublicMedia.org/
('wskg.org', 'WSKG Public TV (WSKG)'), # PA, http://wskg.org
('wviz.org', 'WVIZ/PBS ideastream (WVIZ)'), # PA, http://www.wviz.org/
('wvpublic.org', 'WVPB (WVPB)'), # PA, http://wvpublic.org/
('wvpt.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # PA, http://www.wvpt.net
('wlvt.org', 'PBS 39 (WLVT)'), # PA, http://www.wlvt.org/
('njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # PA, http://www.njtvonline.org/
('whut.org', 'Howard University Television (WHUT)'), # PA, http://www.whut.org
('wliw.org', 'WLIW21 (WLIW)'), # PA, http://www.wliw.org/
('cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # RI, http://cptv.org
('nhptv.org', 'NHPTV (WENH)'), # RI, http://nhptv.org/
('wgbh.org', 'WGBH/Channel 2 (WGBH)'), # RI, http://wgbh.org
('ripbs.org', 'Rhode Island PBS (WSBE)'), # RI, http://www.ripbs.org/home/
('scetv.org', 'ETV (WRLK)'), # SC, http://www.scetv.org
('gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # SC, http://www.gpb.org/
('unctv.org', 'UNC-TV (WUNC)'), # SC, http://www.unctv.org/
('wtvi.org', 'WTVI (WTVI)'), # SC, http://www.wtvi.org/
('pba.org', 'WPBA/Channel 30 (WPBA)'), # SC, http://pba.org/
('sdpb.org', 'SDPB Television (KUSD)'), # SD, http://www.sdpb.org
('iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # SD, http://www.iptv.org/
('prairiepublic.org', 'PRAIRIE PUBLIC (KFME)'), # SD, http://www.prairiepublic.org/
('netnebraska.org', 'NET1 (KUON)'), # SD, http://netnebraska.org
('pioneer.org', 'Pioneer Public Television (KWCM)'), # SD, http://www.pioneer.org
('easttennesseepbs.org', 'East Tennessee PBS (WSJK)'), # TN, http://easttennesseepbs.org
('wkno.org', 'WKNO/Channel 10 (WKNO)'), # TN, http://www.wkno.org/
('wnpt.org', 'Nashville Public Television (WNPT)'), # TN, http://www.wnpt.org
('wcte.org', 'WCTE-TV (WCTE)'), # TN, http://www.wcte.org
('wljt.org', 'WLJT, Channel 11 (WLJT)'), # TN, http://wljt.org/
('wtcitv.org', 'WTCI (WTCI)'), # TN, http://www.wtcitv.org
('aptv.org', 'APT - Alabama Public Television (WBIQ)'), # TN, http://aptv.org/
('blueridgepbs.org', 'Blue Ridge PBS (WBRA)'), # TN, http://www.blueridgepbs.org/
('gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # TN, http://www.gpb.org/
('lakelandptv.org', 'Lakeland Public Television (KAWE)'), # TN, http://www.lakelandptv.org
('ket.org', 'KET (WKLE)'), # TN, http://www.ket.org/
('mpbonline.org', 'Mississippi Public Broadcasting (WMPN)'), # TN, http://www.mpbonline.org
('unctv.org', 'UNC-TV (WUNC)'), # TN, http://www.unctv.org/
('WesternReservePublicMedia.org', 'Western Reserve PBS (WNEO)'), # TN, http://www.WesternReservePublicMedia.org/
('wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # TN, http://www.wsiu.org/
('wviz.org', 'WVIZ/PBS ideastream (WVIZ)'), # TN, http://www.wviz.org/
('wkyupbs.org', 'WKYU-PBS (WKYU)'), # TN, http://www.wkyupbs.org
('pba.org', 'WPBA/Channel 30 (WPBA)'), # TN, http://pba.org/
('basinpbs.org', 'Basin PBS (KPBT)'), # TX, http://www.basinpbs.org
('houstonpublicmedia.org', 'KUHT / Channel 8 (KUHT)'), # TX, http://www.houstonpublicmedia.org/
('panhandlepbs.org', 'KACV (KACV)'), # TX, http://www.panhandlepbs.org/home/
('tamu.edu', 'KAMU - TV (KAMU)'), # TX, http://KAMU.tamu.edu
('kcostv.org', 'KCOS/Channel 13 (KCOS)'), # TX, www.kcostv.org
('kedt.org', 'KEDT/Channel 16 (KEDT)'), # TX, http://www.kedt.org
('kera.org', 'KERA 13 (KERA)'), # TX, http://www.kera.org/
('klrn.org', 'KLRN (KLRN)'), # TX, http://www.klrn.org
('klru.org', 'KLRU (KLRU)'), # TX, http://www.klru.org
('kmbh.org', 'KMBH-TV (KMBH)'), # TX, http://www.kmbh.org
('knct.org', 'KNCT (KNCT)'), # TX, http://www.knct.org
('ktxt.org', 'KTTZ-TV (KTXT)'), # TX, http://www.ktxt.org
('aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # TX, http://www.aetn.org/
('kenw.org', 'KENW-TV3 (KENW)'), # TX, http://www.kenw.org
('krwg.org', 'KRWG/Channel 22 (KRWG)'), # TX, http://www.krwg.org
('lpb.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # TX, http://www.lpb.org/
('netnebraska.org', 'NET1 (KUON)'), # TX, http://netnebraska.org
('newmexicopbs.org', 'KNME-TV/Channel 5 (KNME)'), # TX, http://www.newmexicopbs.org/
('wnpt.org', 'Nashville Public Television (WNPT)'), # TX, http://www.wnpt.org
('oeta.tv', 'OETA (KETA)'), # TX, http://www.oeta.tv
('rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # TX, http://www.rmpbs.org
('kued.org', 'KUED Channel 7 (KUED)'), # UT, http://www.kued.org
('idahoptv.org', 'Idaho Public Television (KAID)'), # UT, http://idahoptv.org
('vegaspbs.org', 'Vegas PBS (KLVX)'), # UT, http://vegaspbs.org/
('kbyutv.org', 'KBYU-TV (KBYU)'), # UT, http://www.kbyutv.org/
('vpt.org', 'Vermont PBS (WETK)'), # VT, http://www.vpt.org
('mountainlake.org', 'Mountain Lake PBS (WCFE)'), # VT, http://www.mountainlake.org/
('nhptv.org', 'NHPTV (WENH)'), # VT, http://nhptv.org/
('wgbh.org', 'WGBH/Channel 2 (WGBH)'), # VT, http://wgbh.org
('wgby.org', 'WGBY (WGBY)'), # VT, http://www.wgby.org
('wmht.org', 'WMHT Educational Telecommunications (WMHT)'), # VT, http://www.wmht.org/home/
('wtjx.org', 'WTJX Channel 12 (WTJX)'), # VI, http://www.wtjx.org/
('blueridgepbs.org', 'Blue Ridge PBS (WBRA)'), # VA, http://www.blueridgepbs.org/
('ideastations.org', 'WCVE PBS (WCVE)'), # VA, http://ideastations.org/
('whro.org', 'WHRO (WHRO)'), # VA, http://whro.org
('wvpt.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # VA, http://www.wvpt.net
('easttennesseepbs.org', 'East Tennessee PBS (WSJK)'), # VA, http://easttennesseepbs.org
('ket.org', 'KET (WKLE)'), # VA, http://www.ket.org/
('mpt.org', 'mpt/Maryland Public Television (WMPB)'), # VA, http://www.mpt.org
('unctv.org', 'UNC-TV (WUNC)'), # VA, http://www.unctv.org/
('weta.org', 'WETA Television and Radio (WETA)'), # VA, http://www.weta.org
('whut.org', 'Howard University Television (WHUT)'), # VA, http://www.whut.org
('kcts9.org', 'KCTS 9 (KCTS)'), # WA, http://kcts9.org/
('ksps.org', 'KSPS (KSPS)'), # WA, http://www.ksps.org/home/
('idahoptv.org', 'Idaho Public Television (KAID)'), # WA, http://idahoptv.org
('opb.org', 'OPB (KOPB)'), # WA, http://www.opb.org
('kbtc.org', 'KBTC Public Television (KBTC)'), # WA, http://kbtc.org
('kwsu.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # WA, http://www.kwsu.org
('wvpublic.org', 'WVPB (WVPB)'), # WV, http://wvpublic.org/
('blueridgepbs.org', 'Blue Ridge PBS (WBRA)'), # WV, http://www.blueridgepbs.org/
('ket.org', 'KET (WKLE)'), # WV, http://www.ket.org/
('mpt.org', 'mpt/Maryland Public Television (WMPB)'), # WV, http://www.mpt.org
('weta.org', 'WETA Television and Radio (WETA)'), # WV, http://www.weta.org
('WesternReservePublicMedia.org', 'Western Reserve PBS (WNEO)'), # WV, http://www.WesternReservePublicMedia.org/
('wosu.org', 'WOSU TV (WOSU)'), # WV, http://wosu.org/
('woub.org', 'WOUB/WOUC (WOUB)'), # WV, http://woub.org/tv/index.php?section=5
('wqed.org', 'WQED Multimedia (WQED)'), # WV, http://www.wqed.org/
('wvpt.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # WV, http://www.wvpt.net
('whut.org', 'Howard University Television (WHUT)'), # WV, http://www.whut.org
('mptv.org', 'Milwaukee Public Television (WMVS)'), # WI, http://www.mptv.org
('wpt.org', 'WPT (WPNE)'), # WI, http://www.wpt.org/
('iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # WI, http://www.iptv.org/
('lakelandptv.org', 'Lakeland Public Television (KAWE)'), # WI, http://www.lakelandptv.org
('wdse.org', 'WDSE - WRPT (WDSE)'), # WI, http://www.wdse.org/
('tpt.org', 'TPT (KTCA)'), # WI, http://www.tpt.org
('nmu.edu', 'WNMU-TV Public TV 13 (WNMU)'), # WI, http://wnmutv.nmu.edu
('wttw.com', 'WTTW11 (WTTW)'), # WI, http://www.wttw.com/
('ksmq.org', 'KSMQ (KSMQ)'), # WI, http://www.ksmq.org/
('wycc.org', 'WYCC PBS Chicago (WYCC)'), # WI, http://www.wycc.org
('wyomingpbs.org', 'Wyoming PBS (KCWC)'), # WY, http://www.wyomingpbs.org
('idahoptv.org', 'Idaho Public Television (KAID)'), # WY, http://idahoptv.org
('kued.org', 'KUED Channel 7 (KUED)'), # WY, http://www.kued.org
('montanapbs.org', 'MontanaPBS (KUSM)'), # WY, http://montanapbs.org
('netnebraska.org', 'NET1 (KUON)'), # WY, http://netnebraska.org
('rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # WY, http://www.rmpbs.org
('sdpb.org', 'SDPB Television (KUSD)'), # WY, http://www.sdpb.org
('cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # WY, http://www.cpt12.org/
('kbyutv.org', 'KBYU-TV (KBYU)'), # WY, http://www.kbyutv.org/
)
IE_NAME = 'pbs'
IE_DESC = 'Public Broadcasting Service (PBS) and member stations: %s' % ', '.join(list(zip(*_STATIONS))[1])
_VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
(?:videos?|watch)\.(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
'''
''' % '|'.join(list(zip(*_STATIONS))[0])
_TESTS = [
{
@@ -170,6 +643,14 @@ class PBSIE(InfoExtractor):
'params': {
'skip_download': True, # requires ffmpeg
},
},
{
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
'only_matching': True,
},
{
'url': 'http://watch.knpb.org/video/2365616055/',
'only_matching': True,
}
]
_ERRORS = {
@@ -200,6 +681,7 @@ class PBSIE(InfoExtractor):
MEDIA_ID_REGEXES = [
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
r'class="coveplayerid">([^<]+)<', # coveplayer
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
]
@@ -259,7 +741,7 @@ class PBSIE(InfoExtractor):
return self.playlist_result(entries, display_id)
info = self._download_json(
'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
display_id)
formats = []

View File

@@ -2,16 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import parse_iso8601
class PeriscopeIE(InfoExtractor):
IE_DESC = 'Periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -29,6 +25,9 @@ class PeriscopeIE(InfoExtractor):
}, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,
}, {
'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
'only_matching': True,
}]
def _call_api(self, method, value):
@@ -81,24 +80,3 @@ class PeriscopeIE(InfoExtractor):
'thumbnails': thumbnails,
'formats': formats,
}
class QuickscopeIE(InfoExtractor):
IE_DESC = 'Quick Scope'
_VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
_TEST = {
'url': 'https://watchonperiscope.com/broadcast/56180087',
'only_matching': True,
}
def _real_extract(self, url):
broadcast_id = self._match_id(url)
request = compat_urllib_request.Request(
'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
'broadcast_id': broadcast_id,
'entry_ticket': '',
'from_push': 'false',
'uses_sessions': 'true',
}).encode('utf-8'))
return self.url_result(
self._download_json(request, broadcast_id)['share_url'], 'Periscope')

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -44,6 +46,13 @@ class PladformIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -5,12 +5,10 @@ import re
import os.path
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
)
@@ -46,7 +44,7 @@ class PlayedIE(InfoExtractor):
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
req = sanitized_Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note='Downloading video page ...')

Some files were not shown because too many files have changed in this diff Show More