Compare commits

...

179 Commits

Author SHA1 Message Date
Philipp Hagemeister
f10c27b8cb release 2015.12.23 2015-12-23 14:05:06 +01:00
remitamine
60427f63d1 [appletrailers] Add support for AppleTrailers Section 2015-12-23 10:40:45 +01:00
Sergey M․
178b47e6af [daum] Add test for #7949 2015-12-23 02:59:49 +06:00
Sergey M․
3a70ed9ebe [daum] Fix extraction (Closes #7949) 2015-12-23 02:54:32 +06:00
Sergey M․
89abf7bf4d [periscope] Fix token based extraction (Closes #7943) 2015-12-23 02:09:50 +06:00
Sergey M․
cfe9e5aa6c [comcarcoff] Extract duration 2015-12-23 01:18:14 +06:00
Sergey M․
4c24ed9464 [comcarcoff] Improve json data regex and modernize 2015-12-23 01:16:14 +06:00
Sergey M
11208ebbf1 Merge pull request #7942 from ausbin/comcarcoff-json-fix
[comcarcoff] adjust for json updates
2015-12-23 01:05:54 +06:00
Sergey M․
774ce35571 [imgur] Improve (Closes #7928) 2015-12-22 21:48:48 +06:00
Abhishek Kedia
dbee18b552 Improve extraction (Closes #7918)
remove outer parentheses in if

Conflicts:
	youtube_dl/extractor/imgur.py

checked code with flake8

not returning list in case of single images.

using the fact that id with length 5 are albums and more are single videos.
Also for single videos ie ImgurIE both urls - http://imgur.com/gallery/oWeAMW2 and http://imgur.com/oWeAMW2 are equally fine. Change regex to allow thuis.
For albums urls - http://imgur.com/gallery/Q95ko and http://imgur.com/Q95ko are ok. Change regex to allow this also.

update description in ImgurIE Tests.
Also move single video test 'https://imgur.com/gallery/YcAQlkx' from ImgurAlbumIE to ImgurIE.
2015-12-22 21:43:49 +06:00
remitamine
31d9ea4a3e Merge pull request #7322 from remitamine/vgtv
[vgtv] extract videos from FTV, Aftenposten, Aftonbladet using VGTVIE
2015-12-22 16:10:04 +01:00
remitamine
3b68efdc6a [vgtv] update tests and correct format sorting 2015-12-22 15:54:51 +01:00
remitamine
2db5806991 [franceinter] use _match_id 2015-12-22 11:30:35 +01:00
remitamine
220bc3f0e3 [franceinter] fix title extraction 2015-12-22 11:27:18 +01:00
remitamine
48a6c984b8 [bleacherreport] update test 2015-12-22 10:14:57 +01:00
remitamine
dc016bf521 [viki] detect errors and fix formats extraction 2015-12-22 09:55:25 +01:00
remitamine
ff43d2365f [soompi] remove extractor
http://tv.soompi.com now redirect to viki.com because Viki has acquired
Soompi
http://www.soompi.com/2015/08/19/we-got-married-soompi-joins-viki/
2015-12-22 07:58:33 +01:00
Austin Adams
ed63cbd6ee [comcarcoff] adjust for json updates 2015-12-21 20:28:38 -05:00
remitamine
c7224074d6 [audimedia] correct test case id 2015-12-21 23:02:55 +01:00
remitamine
eed30fea75 [flickr] fix format sorting 2015-12-21 22:10:16 +01:00
remitamine
5625bd0617 [br] add support for br-klassik.de and improve extraction
- extend _VALID_URL to match both br.de and br-klassik.de
- extract all formats(hls,hds and rtmp)
- use xpath_element and xpath_text for xml info extraction
2015-12-21 21:06:10 +01:00
Sergey M․
5ef5d25b15 [audiomack] Fix typo (Closes #7936) 2015-12-21 22:51:58 +06:00
remitamine
0f15ad7b9b [adultswim] update test 2015-12-21 17:07:19 +01:00
remitamine
f11d00fa41 [test_subtitles] remove BlipTV test 2015-12-21 16:52:47 +01:00
remitamine
61ebb401b7 [atresplayer] improve extraction
- select hashlib.md5 constructor as digestmod(in python 3.4+ MD5 as
implicit default digest for digestmod is deprecated.)
- extract hls formats
- update tests
- extract errors
2015-12-21 16:26:40 +01:00
remitamine
5c5a3ecf1b [abc] detect expired state and update tests 2015-12-21 13:07:52 +01:00
Philipp Hagemeister
0197004f78 release 2015.12.21 2015-12-21 11:42:25 +01:00
remitamine
2c28da8e05 Merge branch 'bleacherreport' of github.com:remitamine/youtube-dl into remitamine-bleacherreport 2015-12-21 11:18:32 +01:00
remitamine
c7fa5fa42c [bleacherreport] fix style issues and simplify 2015-12-21 11:12:58 +01:00
remitamine
7ba71e30fb Merge branch 'bliptv' of github.com:remitamine/youtube-dl into remitamine-bliptv 2015-12-21 04:31:17 +01:00
remitamine
7cb0952474 [makertv] improve extraction 2015-12-21 04:24:58 +01:00
remitamine
a8ae232fa9 Merge branch 'googledrive' of github.com:remitamine/youtube-dl into remitamine-googledrive 2015-12-21 03:15:19 +01:00
remitamine
5b251628e9 [googledrive] Modernize 2015-12-21 03:05:34 +01:00
remitamine
b9a324c0da Merge branch 'flickr' of github.com:remitamine/youtube-dl into remitamine-flickr 2015-12-21 00:37:51 +01:00
remitamine
5b95419ca5 [flickr] extract views_count and tags 2015-12-21 00:20:22 +01:00
remitamine
ecbccea703 [faz] extract duration and bitrate and use xpath_element and xpath_text for extraction 2015-12-20 21:38:30 +01:00
remitamine
c240ab6ecf Merge pull request #6790 from remitamine/tele13
[canal13cl] fix info extraction
2015-12-20 16:11:07 +01:00
remitamine
6882c0870e [tele13] improve extraction
- improve jwplayer setup regex
- sort formats
- remove duplicate formats
- update youtube test
2015-12-20 15:48:19 +01:00
remitamine
b0eeaf4f40 Merge pull request #6928 from remitamine/cnet
[cnet] fix extraction and extract more formats and metadata(closes #7003)
2015-12-20 12:59:35 +01:00
remitamine
c6ed6fadc2 [cnet] improve extraction
- relex data json regex
- extract the platform metadata once
- extract hds formats
- extract duration
- extract thumbnail
2015-12-20 12:43:00 +01:00
Sergey M․
e462474e1d [youtube] Generalize playlists extractor 2015-12-20 07:48:16 +06:00
Sergey M․
6b77d52b1f [test_utils] Add tests for encode_compat_str 2015-12-20 07:07:14 +06:00
Sergey M․
9b9c5355e4 Rename error_to_str to error_to_compat_str 2015-12-20 07:00:39 +06:00
Sergey M․
d890b4cc0a [nbc:news] Remove unnecessary compat_str 2015-12-20 06:43:42 +06:00
Sergey M․
2c74e6fa77 [YoutubeDL] Revert error_to_str for ExtractorError 2015-12-20 06:35:58 +06:00
Sergey M․
c0384f221e Use proper encoding on compat_str construction when necessary 2015-12-20 06:29:36 +06:00
Sergey M․
8e60dc7526 [utils] Add encode_compat_str 2015-12-20 06:26:26 +06:00
Sergey M․
8900ab4d9b [YoutubeDL] More error_to_str 2015-12-20 06:22:01 +06:00
Sergey M․
fb043a6e4e [YoutubeDL] Use error_to_str 2015-12-20 06:16:19 +06:00
Sergey M․
7f8b271465 Properly convert errors to strings 2015-12-20 05:27:38 +06:00
Sergey M․
fdae235858 [utils] Add error_to_str 2015-12-20 05:26:47 +06:00
remitamine
1deb710f26 [gputechconf] improve extraction 2015-12-19 23:59:00 +01:00
remitamine
ec6504b39c [gputechconf] Add new extractor(closes #5775) 2015-12-19 23:28:54 +01:00
Sergey M․
dd85e4d707 [extractor/common] Properly decode error string on python 2 (Closes #1354, closes #3957, closes #4037, closes #6449) 2015-12-20 02:43:50 +06:00
remitamine
fa64a84311 [faz] fix info extraction 2015-12-19 19:02:04 +01:00
remitamine
e0f06eae43 [fktv] fix info extraction 2015-12-19 18:26:28 +01:00
Sergey M․
0f206ee814 [toggle] Change IE_NAME 2015-12-19 23:11:23 +06:00
Sergey M․
cc0f378d54 [toggle] Rename to toggle 2015-12-19 19:59:00 +06:00
Sergey M․
e33c9cba7c [toggle] Improve _VALID_URL 2015-12-19 19:58:18 +06:00
Sergey M․
989e9f8ead [toggle] Improve formats extraction robustness 2015-12-19 19:52:37 +06:00
Sergey M․
8f097af4ec [toggle] Extract counters 2015-12-19 19:23:28 +06:00
Sergey M․
c40dbb19ab [toggle] Extract thumbnails 2015-12-19 19:19:26 +06:00
Sergey M․
ffaf6e66e3 [toggle] Improve 2015-12-19 19:16:49 +06:00
Sergey M․
74c730174f [toggle] Style 2015-12-19 19:06:05 +06:00
Sergey M․
c82a8dd14c [toggle] Remove unused imports 2015-12-19 19:04:38 +06:00
Sergey M․
f8253af561 [toggle] Use sanitized_Request 2015-12-19 19:03:55 +06:00
ping
ed370ff0e6 [togglesg] Fixes 2015-12-19 18:48:59 +06:00
ping
ee0f0393cf [togglesg] New extractor for toggle.sg 2015-12-19 18:48:46 +06:00
Yen Chi Hsuan
db2fe38b55 [utils] Support alternative timestamp format in TTML
Fixes #7608
2015-12-19 19:29:51 +08:00
Yen Chi Hsuan
d631d5f9f2 [utils] Fix TTML conversion
Tolerate invalid timestamps (closes #7909)
2015-12-19 18:21:42 +08:00
Sergey M․
4f29fa9906 [brightcove:new] Add test for ref: prefixed video id 2015-12-18 22:31:48 +06:00
Sergey M․
5b72fda140 [brightcove:new] Clarify ref: prefix 2015-12-18 22:22:41 +06:00
Sergey M․
f81ccbb3df [brightcove:new] Fix typo 2015-12-18 22:20:44 +06:00
Sergey M․
9fd0f67678 [brightcove:new] Add support for ref: preffixed video ids (Closes #7794) 2015-12-18 22:18:55 +06:00
Sergey M․
15d50aca64 [nowness] Add support for brightcove:new videos (Closes #7884) 2015-12-18 22:05:56 +06:00
Sergey M․
7234d1d9c7 [brightcove:new] Add _extract_url 2015-12-18 22:05:32 +06:00
Sergey M․
9796a9b20c [ndr] Fix description and upload date extraction (Closes #7893) 2015-12-18 21:34:17 +06:00
Philipp Hagemeister
016dd82050 release 2015.12.18 2015-12-18 14:21:30 +01:00
Sergey M․
b95779be21 [jsinterp] Extend function regex (Closes #7900, closes #7901) 2015-12-18 18:57:49 +06:00
Yen Chi Hsuan
10171468d9 [iqiyi] Update key (closes #7896) 2015-12-18 18:20:41 +08:00
Yen Chi Hsuan
bf597a6dd1 Merge pull request #7895 from Blue9/patch-1
Fix hyperlink to youtube-dl options
2015-12-18 18:10:41 +08:00
Gautam M
45dad8bab9 Fix hyperlink to youtube-dl options 2015-12-18 03:16:36 -05:00
Sergey M․
9dc1d94a0c [noco] Fix bitrates 2015-12-17 22:18:28 +06:00
Sergey M․
7824e1f6a6 [noco] Modernize 2015-12-17 22:16:58 +06:00
Sergey M․
2469a6aecb [noco] Adjust timestamp according to server time (Closes #7864) 2015-12-17 22:16:22 +06:00
Sergey M․
8f0afda028 [pbs] Extend _VALID_URL (Closes #7889) 2015-12-17 20:24:33 +06:00
remitamine
35e22b6b32 [youku] check for the correct variable 2015-12-17 12:51:50 +01:00
remitamine
323f82a7e0 [vimeo] add test for original format 2015-12-16 17:00:17 +01:00
remitamine
8534bf1f00 [vimeo] prefer original format 2015-12-16 16:36:25 +01:00
remitamine
eb4f27405b [vimeo] extract source file(closes #1072) 2015-12-16 09:43:53 +01:00
Sergey M․
2d3b70271c [rutube] Extend _VALID_URL 2015-12-16 04:44:17 +06:00
Sergey M․
ad1b6017cd [tf1] Fix tests 2015-12-15 21:36:59 +06:00
Sergey M․
05467d5a52 [tf1] Relax _VALID_URL 2015-12-15 21:31:58 +06:00
Sergey M․
ae5e94808e [tf1] Fix extraction (2) 2015-12-15 21:11:52 +06:00
Sergey M․
d7ffcfcf97 [tf1] Fix extraction (Closes #7873) 2015-12-15 21:09:14 +06:00
Sergey M․
0cb58b0259 [youtube] Extract alt_title and creator for music videos (Closes #7862) 2015-12-14 21:31:53 +06:00
Sergey M․
31b2051e21 [utils] Add remove_quotes 2015-12-14 21:30:58 +06:00
Yen Chi Hsuan
eb0bdc2c3e [novamov] Fix again 2015-12-14 02:50:59 +08:00
Yen Chi Hsuan
6583c741cd [novamov] Fix filekey extraction and reupload test video 2015-12-14 02:34:20 +08:00
Sergey M․
2d9295643e [footyroom] Skip test 2015-12-13 23:55:10 +06:00
Sergey M․
ee86e2c6d7 [novamov] Add support for mobile URLs 2015-12-13 19:16:01 +06:00
Yen Chi Hsuan
02a63fadc3 [infoq] Refactor and support the Chinese version
Closes #7576
2015-12-13 19:16:58 +08:00
Philipp Hagemeister
f3711edcf1 release 2015.12.13 2015-12-13 10:52:59 +01:00
Yen Chi Hsuan
22d07ba4e4 [infoq] Fix extraction for HTTP URLs (closes #7739) 2015-12-13 17:29:27 +08:00
Yen Chi Hsuan
f6abca506e [nowvideo] Skip deleted test case 2015-12-13 15:43:20 +08:00
Yen Chi Hsuan
b5424acdb9 [novamov] Improve existence checking 2015-12-13 15:43:20 +08:00
Yen Chi Hsuan
47c7f3d995 [novamov] Fix filekey extraction (closes #7764) 2015-12-13 15:43:20 +08:00
Sergey M․
0014ffa829 [funimation] Improve login 2015-12-13 07:17:42 +06:00
Sergey M․
c03943f394 Credit @Slyneth for funimation (#7775) 2015-12-12 15:19:23 +06:00
Yen Chi Hsuan
deb1e8d20e [youku] Put the missing item to get_hd 2015-12-12 15:49:19 +08:00
Yen Chi Hsuan
174964a7bc Credit @Celthi for fixing Youku extractor 2015-12-12 15:34:40 +08:00
Yen Chi Hsuan
9c568178fb Merge branch 'Celthi-youku_bugfix' 2015-12-12 15:26:43 +08:00
Yen Chi Hsuan
dbb7d7e26c [youku] Reorder format items 2015-12-12 15:24:58 +08:00
Yen Chi Hsuan
ade2340971 [youku] Simplify 2015-12-12 15:19:14 +08:00
Yen Chi Hsuan
4d77550cf0 [youku] Fix tests 2015-12-12 14:57:14 +08:00
Yen Chi Hsuan
c683454e7e [youku] MD5 is unstable 2015-12-12 14:48:46 +08:00
Yen Chi Hsuan
f133fd326b [youku] Cleanup and PEP8 2015-12-12 14:41:53 +08:00
Yen Chi Hsuan
1faa66f005 Merge branch 'youku_bugfix' of https://github.com/Celthi/youtube-dl into Celthi-youku_bugfix 2015-12-12 14:36:29 +08:00
Yen Chi Hsuan
8773f3158f [safari] Use postdata_urlencode (#7465) 2015-12-12 14:28:05 +08:00
Celthi
7e37c39485 merge data1 and data2 2015-12-12 11:26:15 +08:00
Celthi
14c17cafa1 add support to video protected by password 2015-12-12 11:21:44 +08:00
Celthi
8696a7fd13 fix the keyerror(mp4hd), todo support download the video protected by password 2015-12-12 10:44:21 +08:00
Sergey M․
d63cfc3f0f [beeg] API v5 (Closes #7846) 2015-12-12 02:52:20 +06:00
Sergey M․
f377f44dae [funimation] Improve extraction 2015-12-12 01:02:54 +06:00
Sergey M․
0b1bb1ac3a [funimation] Add test for promotional video 2015-12-12 00:52:00 +06:00
Sergey M․
f208e52a76 [funimation] Fix promotional videos extraction 2015-12-12 00:48:09 +06:00
Sergey M․
b091529a3c [funimation] Extend _VALID_URL to match promotional videos 2015-12-12 00:43:03 +06:00
Sergey M․
b323a3cbff [funimation] Remove unused import 2015-12-12 00:39:44 +06:00
Sergey M․
b59623ef43 [funimation] Use mobile webpage for workaround hulu error 2015-12-12 00:38:58 +06:00
Sergey M․
9c163950da [funimation] Improve _VALID_URL 2015-12-11 23:20:10 +06:00
Sergey M․
d357bbd375 [funimation] Update test 2015-12-11 23:06:44 +06:00
Sergey M?
f542a3d26b [funimation] Improve extraction (Closes #7775) 2015-12-11 23:00:38 +06:00
Sergey M?
59a4ff482a [funimation] Real UA is required for login 2015-12-11 23:00:37 +06:00
Sergey M?
40ca5b04f4 [funimation] Remove unnecessary login form field 2015-12-11 23:00:37 +06:00
Sergey M?
411e5b88c9 [funimation] Fix login message 2015-12-11 23:00:37 +06:00
Sergey M?
b4c299bad0 [funimation] PEP 8 2015-12-11 23:00:36 +06:00
Muratcan Simsek
ab4bdc913f [funimation] Add new extractor
Update funimation.py

Update funimation.py

Removed unnecessary lines.

Update funimation.py

Added thumbnail and description.

Filename improvement.

fixed TEST.
2015-12-11 23:00:35 +06:00
remitamine
1fe248a51b Merge pull request #7833 from remitamine/ooyala
[ooyala] improve extraction
2015-12-11 17:55:32 +01:00
remitamine
2559b9d017 [wdr] extract all formats(closes #7788) 2015-12-11 17:31:33 +01:00
Sergey M․
4db43567e8 [downloader/f4m] Decode manifest before fixing 2015-12-11 20:28:44 +06:00
Celthi
5333842a1d According the blog and you-get fixed the issues #7627. 2015-12-11 20:08:14 +08:00
Celthi
98c3806b15 fix some not important codesnips 2015-12-11 19:18:14 +08:00
Yen Chi Hsuan
b6afc225c8 [vevo] Use _download_smil to provide informative error messages 2015-12-11 19:16:51 +08:00
Yen Chi Hsuan
ad30dc1e20 [vevo] Allow calling API without https
Not all proxies allow CONNECT
2015-12-11 19:07:13 +08:00
Yen Chi Hsuan
ff51983e15 [vevo] Handle videos without video_info (#7802) 2015-12-11 18:52:03 +08:00
Celthi
fdf01663d1 able to download first part of the video, but fail in the left part 2015-12-11 17:48:40 +08:00
Yen Chi Hsuan
4b94288301 [vevo] Use _match_id 2015-12-11 17:32:29 +08:00
Yen Chi Hsuan
4bf99ade15 [vevo] Catch the georestriction message (#7802) 2015-12-11 14:25:01 +08:00
remitamine
d50116b8ac [vgtv] extract 5 digit length video ids using both xstream and vgtv 2015-12-10 22:18:42 +01:00
remitamine
75ed53320e [ooyala] improve extraction 2015-12-10 19:08:16 +01:00
Sergey M․
17b786ae73 [downloader/f4m] Fix malformed manifests (Closes #7823) 2015-12-10 22:59:50 +06:00
Sergey M
dfd42a43c3 Merge pull request #7821 from joksnet/patch-1
[FFmpegPostProcessor] Default of prefer ffmpeg
2015-12-10 22:10:20 +06:00
Celthi
51094b1b08 add cookie and referer in headers, change the video url 2015-12-10 21:42:12 +08:00
Juan M Martínez
374c761e77 [FFmpegPostProcessor] Default of prefer ffmpeg
When no `downloader` is passed to `FFmpegPostProcessor`
an exception was raised trying to get the prefer ffmpeg param.

    AttributeError: 'NoneType' object has no attribute 'params'

This fixes and defaults to `False`.
2015-12-09 20:56:00 -03:00
remitamine
41c3b34b1f [vgtv] add sortcut expressions to use the extractor 2015-12-09 10:50:11 +01:00
remitamine
9f6b517671 [vgtv] extract all formats and improve extraction 2015-12-06 07:50:27 +01:00
remitamine
78653a33aa Merge remote-tracking branch 'upstream/master' into bliptv 2015-12-03 20:33:22 +01:00
remitamine
63b728f06f [bleacherreport] Add new Extractor 2015-11-07 16:56:21 +01:00
remitamine
3793090b1b [amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 2015-11-07 16:54:35 +01:00
remitamine
a641b24592 [cnet] skip hls_phone if hls_tablet is present 2015-11-06 07:23:03 +01:00
remitamine
967c9076a3 raise ExtractorError if the page doesn't contain a video 2015-11-05 18:01:13 +01:00
remitamine
f3003531a5 [flickr] handle error message 2015-11-01 13:38:11 +01:00
remitamine
146672254e [flickr] extract fresh api key and remove duplication in test 2015-11-01 13:23:23 +01:00
remitamine
02fb980451 [flickr] extract more info and formats 2015-11-01 02:08:19 +01:00
remitamine
804afc5871 [vgtv] improve _VALID_URL regex 2015-10-30 10:20:38 +01:00
remitamine
00d24327ef [vgtv] extract videos from FTV, Aftenposten, Aftonbladet using VGTVIE 2015-10-30 09:48:56 +01:00
remitamine
77302fe5c9 [bliptv] remove extractor and add support for site replacement(makertv) 2015-10-15 23:27:46 +01:00
remitamine
b306c439d7 [cnet] fix extraction and extract more formats 2015-09-23 13:28:05 +01:00
remitamine
436416afe2 [tele13] skip test 2015-09-07 21:13:49 +01:00
remitamine
8b55cadc83 [canal13cl] fix info extraction 2015-09-07 16:39:01 +01:00
remitamine
8e92d21ebf [googledrive] raise ExtractorError instead of warning 2015-07-23 11:59:13 +01:00
remitamine
36dbca8784 fix recursive error 2015-07-23 11:59:13 +01:00
remitamine
d1cc05e17e remove unnecessary regex group names 2015-07-23 11:59:12 +01:00
remitamine
3b3d531965 fix embed regex 2015-07-23 11:59:12 +01:00
remitamine
653789afc7 add google drive embeds 2015-07-23 11:59:12 +01:00
remitamine
2d651a2d02 import google drive embed class 2015-07-23 11:57:09 +01:00
remitamine
3e5f3df172 move the embed to a separate class 2015-07-23 11:57:09 +01:00
remitamine
f120a7ab5e change the _TEST info 2015-07-23 11:57:09 +01:00
remitamine
984e4d4875 [googledrive] Add new extractor 2015-07-23 11:57:08 +01:00
73 changed files with 2269 additions and 1346 deletions

View File

@@ -147,3 +147,5 @@ Qijiang Fan
Rémy Léone
Marco Ferragina
reiv
Muratcan Simsek
Evan Lu

View File

@@ -757,7 +757,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L117-L265). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:

View File

@@ -23,7 +23,6 @@
- **AdobeTVShow**
- **AdobeTVVideo**
- **AdultSwim**
- **Aftenposten**
- **Aftonbladet**
- **AirMozilla**
- **AlJazeera**
@@ -34,7 +33,8 @@
- **Aparat**
- **AppleConnect**
- **AppleDaily**: 臺灣蘋果日報
- **AppleTrailers**
- **appletrailers**
- **appletrailers:section**
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**
@@ -65,9 +65,9 @@
- **Bet**
- **Bild**: Bild.de
- **BiliBili**
- **BleacherReport**
- **BleacherReportCMS**
- **blinkx**
- **blip.tv:user**
- **BlipTV**
- **Bloomberg**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
@@ -80,7 +80,6 @@
- **BYUtv**
- **Camdemy**
- **CamdemyFolder**
- **Canal13cl**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS**
@@ -188,6 +187,7 @@
- **Freesound**
- **freespeech.org**
- **FreeVideo**
- **Funimation**
- **FunnyOrDie**
- **GameInformer**
- **Gamekings**
@@ -209,7 +209,9 @@
- **GodTube**
- **GoldenMoustache**
- **Golem**
- **GoogleDrive**
- **Goshgay**
- **GPUTechConf**
- **Groupon**
- **Hark**
- **HearThisAt**
@@ -251,6 +253,7 @@
- **Jove**
- **jpopsuki.tv**
- **Jukebox**
- **JWPlatform**
- **Kaltura**
- **KanalPlay**: Kanal 5/9/11 Play
- **Kankan**
@@ -291,6 +294,7 @@
- **m6**
- **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru
- **MakerTV**
- **Malemotion**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
@@ -498,8 +502,6 @@
- **SnagFilmsEmbed**
- **Snotr**
- **Sohu**
- **soompi**
- **soompi:show**
- **soundcloud**
- **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search
@@ -550,6 +552,7 @@
- **TechTalks**
- **techtv.mit.edu**
- **ted**
- **Tele13**
- **TeleBruxelles**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- **Telegraaf**
@@ -572,6 +575,7 @@
- **TMZ**
- **TMZArticle**
- **TNAFlix**
- **toggle**
- **tou.tv**
- **Toypics**: Toypics user profile
- **ToypicsUser**: Toypics user profile
@@ -621,7 +625,7 @@
- **Vessel**
- **Vesti**: Вести.Ru
- **Vevo**
- **VGTV**: VGTV and BTTV
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
- **Vice**
- **Viddler**
@@ -710,6 +714,7 @@
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:playlist**: YouTube.com playlists
- **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
@@ -717,7 +722,6 @@
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:user:playlists**: YouTube.com user playlists
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**

View File

@@ -11,7 +11,6 @@ from test.helper import FakeYDL, md5
from youtube_dl.extractor import (
BlipTVIE,
YoutubeIE,
DailymotionIE,
TEDIE,
@@ -145,18 +144,6 @@ class TestTedSubtitles(BaseTestSubtitles):
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
class TestBlipTVSubtitles(BaseTestSubtitles):
url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE

View File

@@ -22,6 +22,7 @@ from youtube_dl.utils import (
DateRange,
detect_exe_version,
determine_ext,
encode_compat_str,
encodeFilename,
escape_rfc3986,
escape_url,
@@ -43,6 +44,7 @@ from youtube_dl.utils import (
sanitize_path,
prepend_extension,
replace_extension,
remove_quotes,
shell_quote,
smuggle_url,
str_to_int,
@@ -200,6 +202,15 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
def test_remove_quotes(self):
self.assertEqual(remove_quotes(None), None)
self.assertEqual(remove_quotes('"'), '"')
self.assertEqual(remove_quotes("'"), "'")
self.assertEqual(remove_quotes(';'), ';')
self.assertEqual(remove_quotes('";'), '";')
self.assertEqual(remove_quotes('""'), '')
self.assertEqual(remove_quotes('";"'), ';')
def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), [])
@@ -439,6 +450,10 @@ class TestUtil(unittest.TestCase):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes))
def test_encode_compat_str(self):
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
def test_parse_iso8601(self):
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
@@ -651,12 +666,13 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
{'like_count': 190, 'dislike_count': 10}))
def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), 0.0)
self.assertEqual(parse_dfxp_time_expr(''), 0.0)
self.assertEqual(parse_dfxp_time_expr(None), None)
self.assertEqual(parse_dfxp_time_expr(''), None)
self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
self.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1)
def test_dfxp2srt(self):
dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
@@ -666,6 +682,9 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
<p begin="1" end="2">第二行<br/>♪♪</p>
<p begin="2" dur="1"><span>Third<br/>Line</span></p>
<p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
<p begin="-1" end="-1">Ignore, two</p>
<p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
</tt>'''

View File

@@ -47,7 +47,9 @@ from .utils import (
DEFAULT_OUTTMPL,
determine_ext,
DownloadError,
encode_compat_str,
encodeFilename,
error_to_compat_str,
ExtractorError,
format_bytes,
formatSeconds,
@@ -495,7 +497,7 @@ class YoutubeDL(object):
tb = ''
if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
tb += compat_str(traceback.format_exc())
tb += encode_compat_str(traceback.format_exc())
else:
tb_data = traceback.format_list(traceback.extract_stack())
tb = ''.join(tb_data)
@@ -674,14 +676,14 @@ class YoutubeDL(object):
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else:
raise
@@ -1459,7 +1461,7 @@ class YoutubeDL(object):
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + compat_str(err))
self.report_error('unable to create directory ' + error_to_compat_str(err))
return
if self.params.get('writedescription', False):
@@ -1510,7 +1512,7 @@ class YoutubeDL(object):
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, compat_str(err.cause)))
(sub_lang, error_to_compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
@@ -2039,4 +2041,4 @@ class YoutubeDL(object):
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
(t['url'], compat_str(err)))
(t['url'], error_to_compat_str(err)))

View File

@@ -5,9 +5,9 @@ import re
import sys
import time
from ..compat import compat_str
from ..utils import (
encodeFilename,
error_to_compat_str,
decodeArgument,
format_bytes,
timeconvert,
@@ -186,7 +186,7 @@ class FileDownloader(object):
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
self.report_error('unable to rename file: %s' % compat_str(err))
self.report_error('unable to rename file: %s' % error_to_compat_str(err))
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""

View File

@@ -15,6 +15,7 @@ from ..compat import (
)
from ..utils import (
encodeFilename,
fix_xml_ampersands,
sanitize_open,
struct_pack,
struct_unpack,
@@ -288,7 +289,10 @@ class F4mFD(FragmentFD):
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(man_url)
man_url = urlh.geturl()
manifest = urlh.read()
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
# and https://github.com/rg3/youtube-dl/issues/7823)
manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
doc = compat_etree_fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)

View File

@@ -15,7 +15,6 @@ from .adobetv import (
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
@@ -26,7 +25,10 @@ from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
from .appletrailers import AppleTrailersIE
from .appletrailers import (
AppleTrailersIE,
AppleTrailersSectionIE,
)
from .archiveorg import ArchiveOrgIE
from .ard import (
ARDIE,
@@ -61,8 +63,11 @@ from .beatportpro import BeatportProIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
from .bleacherreport import (
BleacherReportIE,
BleacherReportCMSIE,
)
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .bpb import BpbIE
from .br import BRIE
@@ -78,7 +83,6 @@ from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
@@ -206,6 +210,7 @@ from .francetv import (
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE
from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE
@@ -231,9 +236,11 @@ from .globo import (
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE
from .hark import HarkIE
from .hearthisat import HearThisAtIE
@@ -280,6 +287,7 @@ from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
from .kanalplay import KanalPlayIE
@@ -334,6 +342,7 @@ from .lynda import (
from .m6 import M6IE
from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE
from .makertv import MakerTVIE
from .malemotion import MalemotionIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
@@ -584,10 +593,6 @@ from .snagfilms import (
)
from .snotr import SnotrIE
from .sohu import SohuIE
from .soompi import (
SoompiIE,
SoompiShowIE,
)
from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
@@ -646,6 +651,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
@@ -674,6 +680,7 @@ from .tnaflix import (
EMPFlixIE,
MovieFapIE,
)
from .toggle import ToggleIE
from .thvideo import (
THVideoIE,
THVideoPlaylistIE
@@ -849,7 +856,7 @@ from .youtube import (
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeUserIE,
YoutubeUserPlaylistsIE,
YoutubePlaylistsIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE

View File

@@ -23,6 +23,7 @@ class ABCIE(InfoExtractor):
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
},
'skip': 'this video has expired',
}, {
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
'md5': 'db2a5369238b51f9811ad815b69dc086',
@@ -36,6 +37,7 @@ class ABCIE(InfoExtractor):
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
},
'add_ie': ['Youtube'],
'skip': 'Not accessible from Travis CI server',
}, {
'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
@@ -58,6 +60,9 @@ class ABCIE(InfoExtractor):
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
webpage)
if mobj is None:
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
if expired:
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json(

View File

@@ -68,7 +68,7 @@ class AdultSwimIE(InfoExtractor):
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
'ext': 'flv',
'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
@@ -79,6 +79,10 @@ class AdultSwimIE(InfoExtractor):
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
'params': {
# m3u8 download
'skip_download': True,
}
}]
@staticmethod

View File

@@ -1,23 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class AftenpostenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: "Sweatshop" - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'timestamp': 1416927969,
'upload_date': '20141125',
}
}
def _real_extract(self, url):
return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')

View File

@@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class AMPIE(InfoExtractor):
# parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url):
item = self._download_json(
url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed')['channel']['item']
video_id = item['guid']
def get_media_node(name, default=None):
media_name = 'media-%s' % name
media_group = item.get('media-group') or item
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
thumbnails = []
media_thumbnail = get_media_node('thumbnail')
if media_thumbnail:
if isinstance(media_thumbnail, dict):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data['@attributes']
thumbnails.append({
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
subtitles = {}
media_subtitle = get_media_node('subTitle')
if media_subtitle:
if isinstance(media_subtitle, dict):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
subtitle = subtitle_data['@attributes']
lang = subtitle.get('lang') or 'en'
subtitles[lang] = [{'url': subtitle['href']}]
formats = []
media_content = get_media_node('content')
if isinstance(media_content, dict):
media_content = [media_content]
for media_data in media_content:
media = media_data['@attributes']
media_type = media['type']
if media_type == 'video/f4m':
f4m_formats = self._extract_f4m_formats(
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif media_type == 'application/x-mpegURL':
m3u8_formats = self._extract_m3u8_formats(
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
formats.append({
'format_id': media_data['media-category']['@attributes']['label'],
'url': media['url'],
'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': get_media_node('title'),
'description': get_media_node('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
'formats': formats,
}

View File

@@ -11,6 +11,7 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor):
IE_NAME = 'appletrailers'
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
@@ -144,3 +145,76 @@ class AppleTrailersIE(InfoExtractor):
'id': movie,
'entries': playlist,
}
class AppleTrailersSectionIE(InfoExtractor):
IE_NAME = 'appletrailers:section'
_SECTIONS = {
'justadded': {
'feed_path': 'just_added',
'title': 'Just Added',
},
'exclusive': {
'feed_path': 'exclusive',
'title': 'Exclusive',
},
'justhd': {
'feed_path': 'just_hd',
'title': 'Just HD',
},
'mostpopular': {
'feed_path': 'most_pop',
'title': 'Most Popular',
},
'moviestudios': {
'feed_path': 'studios',
'title': 'Movie Studios',
},
}
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
_TESTS = [{
'url': 'http://trailers.apple.com/#section=justadded',
'info_dict': {
'title': 'Just Added',
'id': 'justadded',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=exclusive',
'info_dict': {
'title': 'Exclusive',
'id': 'exclusive',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=justhd',
'info_dict': {
'title': 'Just HD',
'id': 'justhd',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=mostpopular',
'info_dict': {
'title': 'Most Popular',
'id': 'mostpopular',
},
'playlist_mincount': 80,
}, {
'url': 'http://trailers.apple.com/#section=moviestudios',
'info_dict': {
'title': 'Movie Studios',
'id': 'moviestudios',
},
'playlist_mincount': 80,
}]
def _real_extract(self, url):
section = self._match_id(url)
section_data = self._download_json(
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
section)
entries = [
self.url_result('http://trailers.apple.com' + e['location'])
for e in section_data]
return self.playlist_result(entries, section, self._SECTIONS[section]['title'])

View File

@@ -2,6 +2,8 @@ from __future__ import unicode_literals
import time
import hmac
import hashlib
import re
from .common import InfoExtractor
from ..compat import (
@@ -32,6 +34,19 @@ class AtresPlayerIE(InfoExtractor):
'duration': 5527.6,
'thumbnail': 're:^https?://.*\.jpg$',
},
'skip': 'This video is only available for registered users'
},
{
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
'md5': '0d0e918533bbd4b263f2de4d197d4aac',
'info_dict': {
'id': 'capitulo-112-david-bustamante',
'ext': 'flv',
'title': 'David Bustamante',
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
'duration': 1439.0,
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
@@ -50,6 +65,13 @@ class AtresPlayerIE(InfoExtractor):
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
_ERRORS = {
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
'DELETED': 'This video has expired and is no longer available for online streaming.',
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
# 'PREMIUM': 'PREMIUM',
}
def _real_initialize(self):
self._login()
@@ -83,58 +105,81 @@ class AtresPlayerIE(InfoExtractor):
episode_id = self._search_regex(
r'episode="([^"]+)"', webpage, 'episode id')
request = sanitized_Request(
self._PLAYER_URL_TEMPLATE % episode_id,
headers={'User-Agent': self._USER_AGENT})
player = self._download_json(request, episode_id, 'Downloading player JSON')
episode_type = player.get('typeOfEpisode')
error_message = self._ERRORS.get(episode_type)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
formats = []
video_url = player.get('urlVideo')
if video_url:
format_info = {
'url': video_url,
'format_id': 'http',
}
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
if mobj:
format_info.update({
'width': int_or_none(mobj.group('width')),
'height': int_or_none(mobj.group('height')),
'tbr': int_or_none(mobj.group('bitrate')),
})
formats.append(format_info)
m3u8_url = player.get('urlVideoHls')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new(
self._MAGIC.encode('ascii'),
(episode_id + timestamp_shifted).encode('utf-8')
(episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
).hexdigest()
formats = []
for fmt in ['windows', 'android_tablet']:
request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('User-Agent', self._USER_AGENT)
request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
headers={'User-Agent': self._USER_AGENT})
fmt_json = self._download_json(
request, video_id, 'Downloading %s video JSON' % fmt)
fmt_json = self._download_json(
request, video_id, 'Downloading windows video JSON')
result = fmt_json.get('resultDes')
if result.lower() != 'ok':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
result = fmt_json.get('resultDes')
if result.lower() != 'ok':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
for format_id, video_url in fmt_json['resultObject'].items():
if format_id == 'token' or not video_url.startswith('http'):
continue
if video_url.endswith('/Manifest'):
if 'geodeswowsmpra3player' in video_url:
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# this videos are protected by DRM, the f4m downloader doesn't support them
continue
else:
f4m_url = video_url[:-9] + '/manifest.f4m'
formats.extend(self._extract_f4m_formats(f4m_url, video_id))
else:
formats.append({
'url': video_url,
'format_id': 'android-%s' % format_id,
'preference': 1,
})
for format_id, video_url in fmt_json['resultObject'].items():
if format_id == 'token' or not video_url.startswith('http'):
continue
if 'geodeswowsmpra3player' in video_url:
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# this videos are protected by DRM, the f4m downloader doesn't support them
continue
else:
f4m_url = video_url[:-9] + '/manifest.f4m'
f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
self._sort_formats(formats)
player = self._download_json(
self._PLAYER_URL_TEMPLATE % episode_id,
episode_id)
path_data = player.get('pathData')
episode = self._download_xml(
self._EPISODE_URL_TEMPLATE % path_data,
video_id, 'Downloading episode XML')
self._EPISODE_URL_TEMPLATE % path_data, video_id,
'Downloading episode XML')
duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration'))

View File

@@ -15,7 +15,7 @@ class AudiMediaIE(InfoExtractor):
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
'id': '1564',
'id': '1565',
'ext': 'mp4',
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
'description': 'md5:60e5d30a78ced725f7b8d34370762941',

View File

@@ -56,7 +56,7 @@ class AudiomackIE(InfoExtractor):
# API is inconsistent with errors
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
raise ExtractorError('Invalid url %s', url)
raise ExtractorError('Invalid url %s' % url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor

View File

@@ -34,15 +34,29 @@ class BeegIE(InfoExtractor):
video_id = self._match_id(url)
video = self._download_json(
'http://beeg.com/api/v4/video/%s' % video_id, video_id)
'http://beeg.com/api/v5/video/%s' % video_id, video_id)
def split(o, e):
def cut(s, x):
n.append(s[:x])
return s[x:]
n = []
r = len(o) % e
if r > 0:
o = cut(o, r)
while len(o) > e:
o = cut(o, e)
n.append(o)
return n
def decrypt_key(key):
# Reverse engineered from http://static.beeg.com/cpl/1067.js
a = '8RPUUCS35ZWp3ADnKcSmpH71ZusrROo'
# Reverse engineered from http://static.beeg.com/cpl/1105.js
a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB'
e = compat_urllib_parse_unquote(key)
return ''.join([
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 25)
o = ''.join([
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
for n in range(len(e))])
return ''.join(split(o, 3)[::-1])
def decrypt_url(encrypted_url):
encrypted_url = self._proto_relative_url(

View File

@@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .amp import AMPIE
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
)
class BleacherReportIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
_TESTS = [{
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
'info_dict': {
'id': '2496438',
'ext': 'mp4',
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
'uploader_id': 3992341,
'description': 'CFB, ACC, Florida State',
'timestamp': 1434380212,
'upload_date': '20150615',
'uploader': 'Team Stream Now ',
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
'info_dict': {
'id': '2586817',
'ext': 'mp4',
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961,
'uploader': 'Sean Fay',
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
'uploader_id': 6466954,
'upload_date': '20151011',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
article_id = self._match_id(url)
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
thumbnails = []
primary_photo = article_data.get('primaryPhoto')
if primary_photo:
thumbnails = [{
'url': primary_photo['url'],
'width': primary_photo.get('width'),
'height': primary_photo.get('height'),
}]
info = {
'_type': 'url_transparent',
'id': article_id,
'title': article_data['title'],
'uploader': article_data.get('author', {}).get('name'),
'uploader_id': article_data.get('authorId'),
'timestamp': parse_iso8601(article_data.get('createdAt')),
'thumbnails': thumbnails,
'comment_count': int_or_none(article_data.get('commentsCount')),
'view_count': int_or_none(article_data.get('hitCount')),
}
video = article_data.get('video')
if video:
video_type = video['type']
if video_type == 'cms.bleacherreport.com':
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
elif video_type == 'ooyala.com':
info['url'] = 'ooyala:%s' % video['id']
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':
info['url'] = 'https://vine.co/v/%s' % video['id']
else:
info['url'] = video_type + video['id']
return info
else:
raise ExtractorError('no video in the article', expected=True)
class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'md5': '8c2c12e3af7805152675446c905d159b',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
info['id'] = video_id
return info

View File

@@ -1,290 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
sanitized_Request,
unescapeHTML,
xpath_text,
xpath_with_ns,
)
class BlipTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
_TESTS = [
{
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
'md5': '80baf1ec5c3d2019037c1c707d676b9f',
'info_dict': {
'id': '5779306',
'ext': 'm4v',
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
'timestamp': 1323138843,
'upload_date': '20111206',
'uploader': 'cbr',
'uploader_id': '679425',
'duration': 81,
}
},
{
# https://github.com/rg3/youtube-dl/pull/2274
'note': 'Video with subtitles',
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
'md5': '309f9d25b820b086ca163ffac8031806',
'info_dict': {
'id': '6586561',
'ext': 'mp4',
'title': 'Red vs. Blue Season 11 Episode 1',
'description': 'One-Zero-One',
'timestamp': 1371261608,
'upload_date': '20130615',
'uploader': 'redvsblue',
'uploader_id': '792887',
'duration': 279,
}
},
{
# https://bugzilla.redhat.com/show_bug.cgi?id=967465
'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
'info_dict': {
'id': '6573122',
'ext': 'mov',
'upload_date': '20130520',
'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
'title': 'Red vs. Blue Season 11 Trailer',
'timestamp': 1369029609,
'uploader': 'redvsblue',
'uploader_id': '792887',
}
},
{
'url': 'http://blip.tv/play/gbk766dkj4Yn',
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
'info_dict': {
'id': '1749452',
'ext': 'mp4',
'upload_date': '20090208',
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
'title': 'Nostalgia Critic: Transformers',
'timestamp': 1234068723,
'uploader': 'NostalgiaCritic',
'uploader_id': '246467',
}
},
{
# https://github.com/rg3/youtube-dl/pull/4404
'note': 'Audio only',
'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
'md5': '76c0a56f24e769ceaab21fbb6416a351',
'info_dict': {
'id': '7103299',
'ext': 'flv',
'title': 'Weekly Manga Recap: Kingdom',
'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
'timestamp': 1417660321,
'upload_date': '20141204',
'uploader': 'The Rollo T',
'uploader_id': '407429',
'duration': 7251,
'vcodec': 'none',
}
},
{
# missing duration
'url': 'http://blip.tv/rss/flash/6700880',
'info_dict': {
'id': '6684191',
'ext': 'm4v',
'title': 'Cowboy Bebop: Gateway Shuffle Review',
'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
'timestamp': 1386639757,
'upload_date': '20131210',
'uploader': 'sfdebris',
'uploader_id': '706520',
}
}
]
@staticmethod
def _extract_url(webpage):
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
return 'http://blip.tv/a/a-' + mobj.group(1)
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
if mobj:
return mobj.group(1)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
lookup_id = mobj.group('lookup_id')
# See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197
if lookup_id:
urlh = self._request_webpage(
'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
url = compat_urlparse.urlparse(urlh.geturl())
qs = compat_urlparse.parse_qs(url.query)
mobj = re.match(self._VALID_URL, qs['file'][0])
video_id = mobj.group('id')
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
def _x(p):
return xpath_with_ns(p, {
'blip': 'http://blip.tv/dtd/blip/1.0',
'media': 'http://search.yahoo.com/mrss/',
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
})
item = rss.find('channel/item')
video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
title = xpath_text(item, 'title', 'title', fatal=True)
description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
uploader = xpath_text(item, _x('blip:user'), 'uploader')
uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
media_thumbnail = item.find(_x('media:thumbnail'))
thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
else xpath_text(item, 'image', 'thumbnail'))
categories = [category.text for category in item.findall('category') if category is not None]
formats = []
subtitles_urls = {}
media_group = item.find(_x('media:group'))
for media_content in media_group.findall(_x('media:content')):
url = media_content.get('url')
role = media_content.get(_x('blip:role'))
msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role)
real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
media_type = media_content.get('type')
if media_type == 'text/srt' or url.endswith('.srt'):
LANGS = {
'english': 'en',
}
lang = role.rpartition('-')[-1].strip().lower()
langcode = LANGS.get(lang, lang)
subtitles_urls[langcode] = url
elif media_type.startswith('video/'):
formats.append({
'url': real_url,
'format_id': role,
'format_note': media_type,
'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
'acodec': media_content.get(_x('blip:acodec')),
'filesize': media_content.get('filesize'),
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
})
self._check_formats(formats, video_id)
self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id, subtitles_urls)
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
'duration': duration,
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
'subtitles': subtitles,
}
def _get_subtitles(self, video_id, subtitles_urls):
subtitles = {}
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = sanitized_Request(url)
req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file
'ext': 'ass',
'data': self._download_webpage(req, None, note=False),
}]
return subtitles
class BlipTVUserIE(InfoExtractor):
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
_PAGE_SIZE = 12
IE_NAME = 'blip.tv:user'
_TEST = {
'url': 'http://blip.tv/actone',
'info_dict': {
'id': 'actone',
'title': 'Act One: The Series',
},
'playlist_count': 5,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
username = mobj.group(1)
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
page = self._download_webpage(url, username, 'Downloading user page')
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
title = self._og_search_title(page)
# Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query
# page by page until there are no video ids - it means we got
# all of them.
video_ids = []
pagenum = 1
while True:
url = page_base + "&page=" + str(pagenum)
page = self._download_webpage(
url, username, 'Downloading video ids from page %d' % pagenum)
# Extract video identifiers
ids_in_page = []
for mobj in re.finditer(r'href="/([^"]+)"', page):
if mobj.group(1) not in ids_in_page:
ids_in_page.append(unescapeHTML(mobj.group(1)))
video_ids.extend(ids_in_page)
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
# we can assume that this page is the last one - there
# are no more ids on further pages - no need to query
# again.
if len(ids_in_page) < self._PAGE_SIZE:
break
pagenum += 1
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
return self.playlist_result(
url_entries, playlist_title=title, playlist_id=username)

View File

@@ -1,18 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
xpath_element,
xpath_text,
)
class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_BASE_URL = 'http://www.br.de'
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_TESTS = [
{
@@ -22,7 +25,7 @@ class BRIE(InfoExtractor):
'id': '48f656ef-287e-486f-be86-459122db22cc',
'ext': 'mp4',
'title': 'Die böse Überraschung',
'description': 'Betriebliche Altersvorsorge: Die böse Überraschung',
'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
'duration': 180,
'uploader': 'Reinhard Weber',
'upload_date': '20150422',
@@ -30,23 +33,23 @@ class BRIE(InfoExtractor):
},
{
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
'md5': 'a44396d73ab6a68a69a568fae10705bb',
'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
'info_dict': {
'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
'ext': 'mp4',
'ext': 'flv',
'title': 'Manfred Schreiber ist tot',
'description': 'Abendschau kompakt: Manfred Schreiber ist tot',
'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
'duration': 26,
}
},
{
'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html',
'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
'info_dict': {
'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
'ext': 'aac',
'title': 'Kurzweilig und sehr bewegend',
'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend',
'description': 'md5:0351996e3283d64adeb38ede91fac54e',
'duration': 296,
}
},
@@ -57,7 +60,7 @@ class BRIE(InfoExtractor):
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer',
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
'duration': 116,
}
},
@@ -68,7 +71,7 @@ class BRIE(InfoExtractor):
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
'ext': 'mp4',
'title': 'Folge 1 - Metaphysik',
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
'duration': 893,
'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
@@ -77,28 +80,31 @@ class BRIE(InfoExtractor):
]
def _real_extract(self, url):
display_id = self._match_id(url)
base_url, display_id = re.search(self._VALID_URL, url).groups()
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
xml = self._download_xml(self._BASE_URL + xml_url, None)
xml = self._download_xml(base_url + xml_url, display_id)
medias = []
for xml_media in xml.findall('video') + xml.findall('audio'):
media_id = xml_media.get('externalId')
media = {
'id': xml_media.get('externalId'),
'title': xml_media.find('title').text,
'duration': parse_duration(xml_media.find('duration').text),
'formats': self._extract_formats(xml_media.find('assets')),
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
'webpage_url': xml_media.find('permalink').text
'id': media_id,
'title': xpath_text(xml_media, 'title', 'title', True),
'duration': parse_duration(xpath_text(xml_media, 'duration')),
'formats': self._extract_formats(xpath_element(
xml_media, 'assets'), media_id),
'thumbnails': self._extract_thumbnails(xpath_element(
xml_media, 'teaserImage/variants'), base_url),
'description': xpath_text(xml_media, 'desc'),
'webpage_url': xpath_text(xml_media, 'permalink'),
'uploader': xpath_text(xml_media, 'author'),
}
if xml_media.find('author').text:
media['uploader'] = xml_media.find('author').text
if xml_media.find('broadcastDate').text:
media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
broadcast_date = xpath_text(xml_media, 'broadcastDate')
if broadcast_date:
media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
medias.append(media)
if len(medias) > 1:
@@ -109,35 +115,58 @@ class BRIE(InfoExtractor):
raise ExtractorError('No media entries found')
return medias[0]
def _extract_formats(self, assets):
def text_or_none(asset, tag):
elem = asset.find(tag)
return None if elem is None else elem.text
formats = [{
'url': text_or_none(asset, 'downloadUrl'),
'ext': text_or_none(asset, 'mediaType'),
'format_id': asset.get('type'),
'width': int_or_none(text_or_none(asset, 'frameWidth')),
'height': int_or_none(text_or_none(asset, 'frameHeight')),
'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
'vcodec': text_or_none(asset, 'codecVideo'),
'acodec': text_or_none(asset, 'codecAudio'),
'container': text_or_none(asset, 'mediaType'),
'filesize': int_or_none(text_or_none(asset, 'size')),
} for asset in assets.findall('asset')
if asset.find('downloadUrl') is not None]
def _extract_formats(self, assets, media_id):
formats = []
for asset in assets.findall('asset'):
format_url = xpath_text(asset, ['downloadUrl', 'url'])
asset_type = asset.get('type')
if asset_type == 'HDS':
f4m_formats = self._extract_f4m_formats(
format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif asset_type == 'HLS':
m3u8_formats = self._extract_m3u8_formats(
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
format_info = {
'ext': xpath_text(asset, 'mediaType'),
'width': int_or_none(xpath_text(asset, 'frameWidth')),
'height': int_or_none(xpath_text(asset, 'frameHeight')),
'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
'vcodec': xpath_text(asset, 'codecVideo'),
'acodec': xpath_text(asset, 'codecAudio'),
'container': xpath_text(asset, 'mediaType'),
'filesize': int_or_none(xpath_text(asset, 'size')),
}
format_url = self._proto_relative_url(format_url)
if format_url:
http_format_info = format_info.copy()
http_format_info.update({
'url': format_url,
'format_id': 'http-%s' % asset_type,
})
formats.append(http_format_info)
server_prefix = xpath_text(asset, 'serverPrefix')
if server_prefix:
rtmp_format_info = format_info.copy()
rtmp_format_info.update({
'url': server_prefix,
'play_path': xpath_text(asset, 'fileName'),
'format_id': 'rtmp-%s' % asset_type,
})
formats.append(rtmp_format_info)
self._sort_formats(formats)
return formats
def _extract_thumbnails(self, variants):
def _extract_thumbnails(self, variants, base_url):
thumbnails = [{
'url': self._BASE_URL + variant.find('url').text,
'width': int_or_none(variant.find('width').text),
'height': int_or_none(variant.find('height').text),
} for variant in variants.findall('variant')]
'url': base_url + xpath_text(variant, 'url'),
'width': int_or_none(xpath_text(variant, 'width')),
'height': int_or_none(xpath_text(variant, 'height')),
} for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails

View File

@@ -355,7 +355,7 @@ class BrightcoveLegacyIE(InfoExtractor):
class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)'
_TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
@@ -387,14 +387,24 @@ class BrightcoveNewIE(InfoExtractor):
'params': {
'skip_download': True,
}
}, {
# ref: prefixed video id
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
urls = BrightcoveNewIE._extract_urls(webpage)
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
# 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
entries = []
@@ -407,9 +417,10 @@ class BrightcoveNewIE(InfoExtractor):
for video_id, account_id, player_id, embed in re.findall(
# According to examples from [3] it's unclear whether video id
# may be optional and what to do when it is
# According to [4] data-video-id may be prefixed with ref:
r'''(?sx)
<video[^>]+
data-video-id=["\'](\d+)["\'][^>]*>.*?
data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*?
</video>.*?
<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/

View File

@@ -1,48 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class Canal13clIE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': '1403022125',
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True)
description = self._html_search_meta(
'twitter:description', webpage, 'description')
url = self._html_search_regex(
r'articuloVideo = \"(.*?)\"', webpage, 'url')
real_id = self._search_regex(
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
thumbnail = self._html_search_regex(
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
return {
'id': real_id,
'display_id': display_id,
'url': url,
'title': title,
'description': description,
'ext': 'mp4',
'thumbnail': thumbnail,
}

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import ExtractorError
from .bliptv import BlipTVIE
from .screenwavemedia import ScreenwaveMediaIE
@@ -34,18 +33,17 @@ class CinemassacreIE(InfoExtractor):
},
},
{
# blip.tv embedded video
# Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de',
'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
'info_dict': {
'id': '4065369',
'ext': 'flv',
'id': 'OEVzPCY2T-g',
'ext': 'mp4',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207',
'uploader': 'cinemassacre',
'uploader_id': '250778',
'timestamp': 1283233867,
'description': 'md5:0a108c78d130676b207d0f6d029ecffd',
'uploader': 'Cinemassacre',
'uploader_id': 'JamesNintendoNerd',
'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
}
},
{
@@ -88,8 +86,6 @@ class CinemassacreIE(InfoExtractor):
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
],
webpage, 'player data URL', default=None, group='url')
if not playerdata_url:
playerdata_url = BlipTVIE._extract_url(webpage)
if not playerdata_url:
raise ExtractorError('Unable to find player data')

View File

@@ -1,15 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
from .theplatform import ThePlatformIE
from ..utils import int_or_none
class CNETIE(InfoExtractor):
class CNETIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
_TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
@@ -18,25 +14,20 @@ class CNETIE(InfoExtractor):
'ext': 'flv',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
'uploader': 'Sarah Mitroff',
'duration': 70,
},
'params': {
'skip_download': 'requires rtmpdump',
}
}, {
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
'info_dict': {
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
'ext': 'flv',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
'uploader': 'Ashley Esqueda',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
},
'params': {
'skip_download': True, # requires rtmpdump
'duration': 1482,
},
}]
@@ -45,26 +36,13 @@ class CNETIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
r"data-cnet-video(?:-uvp)?-options='([^']+)'",
webpage, 'data json')
data = json.loads(data_json)
vdata = data['video']
if not vdata:
vdata = data['videos'][0]
if not vdata:
raise ExtractorError('Cannot find video data')
mpx_account = data['config']['players']['default']['mpx_account']
vid = vdata['files'].get('rtmp', vdata['files']['hds'])
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
data = self._parse_json(data_json, display_id)
vdata = data.get('video') or data['videos'][0]
video_id = vdata['id']
title = vdata.get('headline')
if title is None:
title = vdata.get('title')
if title is None:
raise ExtractorError('Cannot find title!')
thumbnail = vdata.get('image', {}).get('path')
title = vdata['title']
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
@@ -73,13 +51,34 @@ class CNETIE(InfoExtractor):
uploader = None
uploader_id = None
mpx_account = data['config']['uvpConfig']['default']['mpx_account']
metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
description = vdata.get('description') or metadata.get('description')
duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
formats = []
subtitles = {}
for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue
release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
if fkey == 'hds':
release_url += '&manifest=f4m'
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
self._sort_formats(formats)
return {
'_type': 'url_transparent',
'url': tp_link,
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': metadata.get('thumbnail'),
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
'subtitles': subtitles,
'formats': formats,
}

View File

@@ -1,10 +1,12 @@
# encoding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import parse_iso8601
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class ComCarCoffIE(InfoExtractor):
@@ -16,6 +18,7 @@ class ComCarCoffIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20141127',
'timestamp': 1417107600,
'duration': 1232,
'title': 'Happy Thanksgiving Miranda',
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
@@ -31,9 +34,10 @@ class ComCarCoffIE(InfoExtractor):
display_id = 'comediansincarsgettingcoffee.com'
webpage = self._download_webpage(url, display_id)
full_data = json.loads(self._search_regex(
r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
webpage, 'full data json'))
full_data = self._parse_json(
self._search_regex(
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
display_id)['videoData']
video_id = full_data['activeVideo']['video']
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
@@ -45,12 +49,18 @@ class ComCarCoffIE(InfoExtractor):
formats = self._extract_m3u8_formats(
video_data['mediaUrl'], video_id, ext='mp4')
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
video_data.get('pubDate'))
duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
video_data.get('duration'))
return {
'id': video_id,
'display_id': display_id,
'title': video_data['title'],
'description': video_data.get('description'),
'timestamp': parse_iso8601(video_data.get('pubDate')),
'timestamp': timestamp,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),

View File

@@ -30,6 +30,7 @@ from ..utils import (
clean_html,
compiled_regex_type,
determine_ext,
error_to_compat_str,
ExtractorError,
fix_xml_ampersands,
float_or_none,
@@ -332,7 +333,8 @@ class InfoExtractor(object):
return False
if errnote is None:
errnote = 'Unable to download webpage'
errmsg = '%s: %s' % (errnote, compat_str(err))
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
@@ -622,7 +624,7 @@ class InfoExtractor(object):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
return (username, password)

View File

@@ -7,10 +7,10 @@ import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
error_to_compat_str,
ExtractorError,
int_or_none,
parse_iso8601,
sanitized_Request,
@@ -278,7 +278,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
video_id, note=False)
except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
return {}
info = json.loads(sub_list)
if (info['total'] > 0):

View File

@@ -24,6 +24,18 @@ class DaumIE(InfoExtractor):
'upload_date': '20130831',
'duration': 3868,
},
}, {
# Test for https://github.com/rg3/youtube-dl/issues/7949
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290',
'md5': 'c92d78bcee4424451f1667f275c1dc97',
'info_dict': {
'id': '73147290',
'ext': 'mp4',
'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218',
'description': '싸이 - 나팔바지',
'upload_date': '20151219',
'duration': 232,
},
}, {
'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
'only_matching': True,
@@ -37,9 +49,11 @@ class DaumIE(InfoExtractor):
video_id = mobj.group('id')
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id)
og_url = self._og_search_url(webpage, default=None) or self._search_regex(
r'<link[^>]+rel=(["\'])canonical\1[^>]+href=(["\'])(?P<url>.+?)\2',
webpage, 'canonical url', group='url')
full_id = self._search_regex(
r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
webpage, 'full id')
r'tvpot\.daum\.net/v/([^/]+)', og_url, 'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from .amp import AMPIE
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
@@ -12,14 +12,11 @@ from ..compat import (
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request,
)
class DramaFeverBaseIE(InfoExtractor):
class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
@@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 343,
}
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.')
try:
feed = self._download_json(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
video_id, 'Downloading episode JSON')['channel']['item']
info = self._extract_feed_info(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
raise ExtractorError(
'Currently unavailable in your country.', expected=True)
raise
media_group = feed.get('media-group', {})
formats = []
for media_content in media_group['media-content']:
src = media_content.get('@attributes', {}).get('url')
if not src:
continue
ext = determine_ext(src)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id='hds'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id='hls'))
else:
formats.append({
'url': src,
})
self._sort_formats(formats)
title = media_group.get('media-title')
description = media_group.get('media-description')
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
thumbnail = self._proto_relative_url(
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
subtitles = {}
for media_subtitle in media_group.get('media-subTitle', []):
lang = media_subtitle.get('@attributes', {}).get('lang')
href = media_subtitle.get('@attributes', {}).get('href')
if not lang or not href:
continue
subtitles[lang] = [{
'ext': 'ttml',
'url': href,
}]
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
@@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
if value:
subfile = value[0].get('subfile') or value[0].get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/':
subtitles.setdefault('English', []).append({
info['subtitiles'].setdefault('English', []).append({
'ext': 'srt',
'url': subfile,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
return info
class DramaFeverSeriesIE(DramaFeverBaseIE):

View File

@@ -7,11 +7,11 @@ import socket
from .common import InfoExtractor
from ..compat import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_parse_unquote,
)
from ..utils import (
error_to_compat_str,
ExtractorError,
limit_length,
sanitized_Request,
@@ -116,7 +116,7 @@ class FacebookIE(InfoExtractor):
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning('unable to log in: %s' % compat_str(err))
self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
return
def _real_initialize(self):

View File

@@ -2,6 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
xpath_element,
xpath_text,
int_or_none,
)
class FazIE(InfoExtractor):
@@ -37,31 +42,32 @@ class FazIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
description = self._og_search_description(webpage)
config_xml_url = self._search_regex(
r'writeFLV\(\'(.+?)\',', webpage, 'config xml url')
r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
config = self._download_xml(
config_xml_url, video_id, 'Downloading config xml')
encodings = config.find('ENCODINGS')
encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
formats = []
for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
encoding = encodings.find(code)
if encoding is None:
continue
encoding_url = encoding.find('FILENAME').text
formats.append({
'url': encoding_url,
'format_id': code.lower(),
'quality': pref,
})
encoding = xpath_element(encodings, code)
if encoding:
encoding_url = xpath_text(encoding, 'FILENAME')
if encoding_url:
formats.append({
'url': encoding_url,
'format_id': code.lower(),
'quality': pref,
'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')),
})
self._sort_formats(formats)
descr = self._html_search_regex(
r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': config.find('STILL/STILL_BIG').text,
'description': description.strip() if description else None,
'thumbnail': xpath_text(config, 'STILL/STILL_BIG'),
'duration': int_or_none(xpath_text(config, 'DURATION')),
}

View File

@@ -1,12 +1,10 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
ExtractorError,
js_to_json,
)
@@ -32,24 +30,22 @@ class FKTVIE(InfoExtractor):
'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
matches = re.search(
r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
webpage)
if matches is None:
raise ExtractorError('Unable to extract the video')
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
poster, sources = matches.groups()
if poster is None:
self.report_warning('unable to extract thumbnail')
formats = []
for source in sources:
furl = source.get('src')
if furl:
formats.append({
'url': furl,
'format_id': determine_ext(furl),
})
self._sort_formats(formats)
urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
formats = [{
'url': furl,
'format_id': determine_ext(furl),
} for furl in urls]
return {
'id': episode,
'title': title,
'formats': formats,
'thumbnail': poster,
'thumbnail': thumbnail,
}

View File

@@ -1,67 +1,93 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
find_xpath_attr,
sanitized_Request,
int_or_none,
qualities,
)
class FlickrIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
'md5': '164fe3fa6c22e18d448d4d5af2330f31',
'info_dict': {
'id': '5645318632',
'ext': 'mp4',
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
"uploader_id": "forestwander-nature-pictures",
"title": "Dark Hollow Waterfalls"
'ext': 'mpg',
'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.',
'title': 'Dark Hollow Waterfalls',
'duration': 19,
'timestamp': 1303528740,
'upload_date': '20110423',
'uploader_id': '10922353@N03',
'uploader': 'Forest Wander',
'comment_count': int,
'view_count': int,
'tags': list,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
_API_BASE_URL = 'https://api.flickr.com/services/rest?'
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
req = sanitized_Request(webpage_url)
req.add_header(
'User-Agent',
# it needs a more recent version
'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
webpage = self._download_webpage(req, video_id)
secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
node_id = find_xpath_attr(
first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
'id').text
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
self.report_extraction(video_id)
stream = second_xml.find('.//STREAM')
if stream is None:
raise ExtractorError('Unable to extract video url')
video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader_id': video_uploader_id,
def _call_api(self, method, video_id, api_key, note, secret=None):
query = {
'photo_id': video_id,
'method': 'flickr.%s' % method,
'api_key': api_key,
'format': 'json',
'nojsoncallback': 1,
}
if secret:
query['secret'] = secret
data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note)
if data['stat'] != 'ok':
raise ExtractorError(data['message'])
return data
def _real_extract(self, url):
video_id = self._match_id(url)
api_key = self._download_json(
'https://www.flickr.com/hermes_error_beacon.gne', video_id,
'Downloading api key')['site_key']
video_info = self._call_api(
'photos.getInfo', video_id, api_key, 'Downloading video info')['photo']
if video_info['media'] == 'video':
streams = self._call_api(
'video.getStreamInfo', video_id, api_key,
'Downloading streams info', video_info['secret'])['streams']
preference = qualities(
['288p', 'iphone_wifi', '100', '300', '700', '360p', 'appletv', '720p', '1080p', 'orig'])
formats = []
for stream in streams['stream']:
stream_type = str(stream.get('type'))
formats.append({
'format_id': stream_type,
'url': stream['_content'],
'preference': preference(stream_type),
})
self._sort_formats(formats)
owner = video_info.get('owner', {})
return {
'id': video_id,
'title': video_info['title']['_content'],
'description': video_info.get('description', {}).get('_content'),
'formats': formats,
'timestamp': int_or_none(video_info.get('dateuploaded')),
'duration': int_or_none(video_info.get('video', {}).get('duration')),
'uploader_id': owner.get('nsid'),
'uploader': owner.get('realname'),
'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
'view_count': int_or_none(video_info.get('views')),
'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
}
else:
raise ExtractorError('not a video', expected=True)

View File

@@ -13,6 +13,7 @@ class FootyRoomIE(InfoExtractor):
'title': 'Schalke 04 0 2 Real Madrid',
},
'playlist_count': 3,
'skip': 'Video for this match is not available',
}, {
'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
'info_dict': {

View File

@@ -2,14 +2,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
)
from .amp import AMPIE
class FoxNewsIE(InfoExtractor):
class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
@@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3937480',
'ext': 'flv',
'title': 'Frozen in Time',
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
'description': '16-year-old girl is size of toddler',
'duration': 265,
'timestamp': 1304411491,
'upload_date': '20110503',
# 'timestamp': 1304411491,
# 'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3922535568001',
'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
'description': "Congressman discusses the president's executive action",
'description': "Congressman discusses president's plan",
'duration': 292,
'timestamp': 1417662047,
'upload_date': '20141204',
# 'timestamp': 1417662047,
# 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
host, video_id = re.match(self._VALID_URL, url).groups()
video = self._download_json(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
item = video['channel']['item']
title = item['title']
description = item['description']
timestamp = parse_iso8601(item['dc-date'])
media_group = item['media-group']
duration = None
formats = []
for media in media_group['media-content']:
attributes = media['@attributes']
video_url = attributes['url']
if video_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
elif video_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
elif not video_url.endswith('.smil'):
duration = int_or_none(attributes.get('duration'))
formats.append({
'url': video_url,
'format_id': media['media-category']['@attributes']['label'],
'preference': 1,
'vbr': int_or_none(attributes.get('bitrate')),
'filesize': int_or_none(attributes.get('fileSize'))
})
self._sort_formats(formats)
media_thumbnail = media_group['media-thumbnail']['@attributes']
thumbnails = [{
'url': media_thumbnail['url'],
'width': int_or_none(media_thumbnail.get('width')),
'height': int_or_none(media_thumbnail.get('height')),
}] if media_thumbnail else []
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}
info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
info['id'] = video_id
return info

View File

@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
@@ -23,8 +21,7 @@ class FranceInterIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -33,7 +30,7 @@ class FranceInterIE(InfoExtractor):
video_url = 'http://www.franceinter.fr/' + path
title = self._html_search_regex(
r'<span class="title">(.+?)</span>', webpage, 'title')
r'<span class="title-diffusion">(.+?)</span>', webpage, 'title')
description = self._html_search_regex(
r'<span class="description">(.*?)</span>',
webpage, 'description', fatal=False)

View File

@@ -0,0 +1,193 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
encode_dict,
int_or_none,
sanitized_Request,
ExtractorError,
urlencode_postdata
)
class FunimationIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)'
_NETRC_MACHINE = 'funimation'
_TESTS = [{
'url': 'http://www.funimation.com/shows/air/videos/official/breeze',
'info_dict': {
'id': '658',
'display_id': 'breeze',
'ext': 'mp4',
'title': 'Air - 1 - Breeze',
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
'thumbnail': 're:https?://.*\.jpg',
},
}, {
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
'info_dict': {
'id': '31128',
'display_id': 'role-play',
'ext': 'mp4',
'title': '.hack//SIGN - 1 - Role Play',
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
'thumbnail': 're:https?://.*\.jpg',
},
}, {
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
'info_dict': {
'id': '9635',
'display_id': 'broadcast-dub-preview',
'ext': 'mp4',
'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
'thumbnail': 're:https?://.*\.(?:jpg|png)',
},
}]
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
data = urlencode_postdata(encode_dict({
'email_field': username,
'password_field': password,
}))
login_request = sanitized_Request('http://www.funimation.com/login', data, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0',
'Content-Type': 'application/x-www-form-urlencoded'
})
login_page = self._download_webpage(
login_request, None, 'Logging in as %s' % username)
if any(p in login_page for p in ('funimation.com/logout', '>Log Out<')):
return
error = self._html_search_regex(
r'(?s)<div[^>]+id=["\']errorMessages["\'][^>]*>(.+?)</div>',
login_page, 'error messages', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
def _real_initialize(self):
self._login()
def _real_extract(self, url):
display_id = self._match_id(url)
errors = []
formats = []
ERRORS_MAP = {
'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn',
'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut',
'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut',
'ERROR_VIDEO_EXPIRED': 'videoExpired',
'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable',
'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription',
'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription',
'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding',
'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN',
'ERROR_STREAM_NOT_FOUND': 'streamNotFound',
}
USER_AGENTS = (
# PC UA is served with m3u8 that provides some bonus lower quality formats
('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'),
# Mobile UA allows to extract direct links and also does not fail when
# PC UA fails with hulu error (e.g.
# http://www.funimation.com/shows/hacksign/videos/official/role-play)
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
)
for kind, user_agent in USER_AGENTS:
request = sanitized_Request(url)
request.add_header('User-Agent', user_agent)
webpage = self._download_webpage(
request, display_id, 'Downloading %s webpage' % kind)
playlist = self._parse_json(
self._search_regex(
r'var\s+playersData\s*=\s*(\[.+?\]);\n',
webpage, 'players data'),
display_id)[0]['playlist']
items = next(item['items'] for item in playlist if item.get('items'))
item = next(item for item in items if item.get('itemAK') == display_id)
error_messages = {}
video_error_messages = self._search_regex(
r'var\s+videoErrorMessages\s*=\s*({.+?});\n',
webpage, 'error messages', default=None)
if video_error_messages:
error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False)
if error_messages_json:
for _, error in error_messages_json.items():
type_ = error.get('type')
description = error.get('description')
content = error.get('content')
if type_ == 'text' and description and content:
error_message = ERRORS_MAP.get(description)
if error_message:
error_messages[error_message] = content
for video in item.get('videoSet', []):
auth_token = video.get('authToken')
if not auth_token:
continue
funimation_id = video.get('FUNImationID') or video.get('videoId')
preference = 1 if video.get('languageMode') == 'dub' else 0
if not auth_token.startswith('?'):
auth_token = '?%s' % auth_token
for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)):
format_url = video.get('%sUrl' % quality)
if not format_url:
continue
if not format_url.startswith(('http', '//')):
errors.append(format_url)
continue
if determine_ext(format_url) == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native',
preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
tbr = int_or_none(self._search_regex(
r'-(\d+)[Kk]', format_url, 'tbr', default=None))
formats.append({
'url': format_url + auth_token,
'format_id': '%s-http-%dp' % (funimation_id, height),
'height': height,
'tbr': tbr,
'preference': preference,
})
if not formats and errors:
raise ExtractorError(
'%s returned error: %s'
% (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))),
expected=True)
self._sort_formats(formats)
title = item['title']
artist = item.get('artist')
if artist:
title = '%s - %s' % (artist, title)
description = self._og_search_description(webpage) or item.get('description')
thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl')
video_id = item.get('itemId') or display_id
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -44,7 +44,6 @@ from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
from .senateisvp import SenateISVPIE
from .bliptv import BlipTVIE
from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
@@ -55,6 +54,8 @@ from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
class GenericIE(InfoExtractor):
@@ -1440,11 +1441,6 @@ class GenericIE(InfoExtractor):
'id': match.group('id')
}
# Look for embedded blip.tv player
bliptv_url = BlipTVIE._extract_url(webpage)
if bliptv_url:
return self.url_result(bliptv_url, 'BlipTV')
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
if svt_url:
@@ -1769,6 +1765,11 @@ class GenericIE(InfoExtractor):
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
# Look for Google Drive embeds
google_drive_url = GoogleDriveIE._extract_url(webpage)
if google_drive_url:
return self.url_result(google_drive_url, 'GoogleDrive')
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
@@ -1796,6 +1797,11 @@ class GenericIE(InfoExtractor):
if snagfilms_url:
return self.url_result(snagfilms_url)
# Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage)
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
# Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None:

View File

@@ -0,0 +1,88 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
)
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = {
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'duration': 46,
}
}
_FORMATS_EXT = {
'5': 'flv',
'6': 'flv',
'13': '3gp',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'34': 'flv',
'35': 'flv',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
'59': 'mp4',
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
webpage)
if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason)
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
duration = int_or_none(self._search_regex(
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
fmt_stream_map = self._search_regex(
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
formats = []
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
fmt_id, fmt_url = fmt_stream.split('|')
resolution = fmt.split('/')[1]
width, height = resolution.split('x')
formats.append({
'url': fmt_url,
'format_id': fmt_id,
'resolution': resolution,
'width': int_or_none(width),
'height': int_or_none(height),
'ext': self._FORMATS_EXT[fmt_id],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'formats': formats,
}

View File

@@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
xpath_element,
xpath_text,
int_or_none,
parse_duration,
)
class GPUTechConfIE(InfoExtractor):
_VALID_URL = r'https?://on-demand\.gputechconf\.com/gtc/2015/video/S(?P<id>\d+)\.html'
_TEST = {
'url': 'http://on-demand.gputechconf.com/gtc/2015/video/S5156.html',
'md5': 'a8862a00a0fd65b8b43acc5b8e33f798',
'info_dict': {
'id': '5156',
'ext': 'mp4',
'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis',
'duration': 1219,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
metadata = xpath_element(doc, 'metadata')
http_host = xpath_text(metadata, 'httpHost', 'http host', True)
mbr_videos = xpath_element(metadata, 'MBRVideos')
formats = []
for mbr_video in mbr_videos.findall('MBRVideo'):
stream_name = xpath_text(mbr_video, 'streamName')
if stream_name:
formats.append({
'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': xpath_text(metadata, 'title'),
'duration': parse_duration(xpath_text(metadata, 'endTime')),
'creator': xpath_text(metadata, 'speaker'),
'formats': formats,
}

View File

@@ -13,7 +13,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -21,7 +21,7 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
'description': 'Imgur: The most awesome images on the Internet.',
},
}, {
'url': 'https://imgur.com/A61SaA1',
@@ -29,8 +29,20 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
'description': 'Imgur: The most awesome images on the Internet.',
},
}, {
'url': 'https://imgur.com/gallery/YcAQlkx',
'info_dict': {
'id': 'YcAQlkx',
'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
'description': 'Imgur: The most awesome images on the Internet.'
}
}, {
'url': 'http://imgur.com/topic/Funny/N8rOudd',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -100,25 +112,38 @@ class ImgurIE(InfoExtractor):
class ImgurAlbumIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
_TEST = {
_TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': {
'id': 'Q95ko',
},
'playlist_count': 25,
}
}, {
'url': 'http://imgur.com/a/j6Orj',
'only_matching': True,
}, {
'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True,
}]
def _real_extract(self, url):
album_id = self._match_id(url)
album_images = self._download_json(
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
album_id)['data']['images']
album_id, fatal=False)
entries = [
self.url_result('http://imgur.com/%s' % image['hash'])
for image in album_images if image.get('hash')]
if album_images:
data = album_images.get('data')
if data and isinstance(data, dict):
images = data.get('images')
if images and isinstance(images, list):
entries = [
self.url_result('http://imgur.com/%s' % image['hash'])
for image in images if image.get('hash')]
return self.playlist_result(entries, album_id)
return self.playlist_result(entries, album_id)
# Fallback to single video
return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())

View File

@@ -1,3 +1,5 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
@@ -5,8 +7,9 @@ import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urlparse,
compat_parse_qs,
)
from ..utils import determine_ext
class InfoQIE(InfoExtractor):
@@ -16,7 +19,7 @@ class InfoQIE(InfoExtractor):
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
'info_dict': {
'id': '12-jan-pythonthings',
'id': 'A-Few-of-My-Favorite-Python-Things',
'ext': 'mp4',
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
'title': 'A Few of My Favorite [Python] Things',
@@ -24,8 +27,71 @@ class InfoQIE(InfoExtractor):
}, {
'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
'only_matching': True,
}, {
'url': 'http://www.infoq.com/cn/presentations/openstack-continued-delivery',
'md5': '4918d0cca1497f2244572caf626687ef',
'info_dict': {
'id': 'openstack-continued-delivery',
'title': 'OpenStack持续交付之路',
'ext': 'flv',
'description': 'md5:308d981fb28fa42f49f9568322c683ff',
},
}]
def _extract_bokecc_videos(self, webpage, video_id):
# TODO: bokecc.com is a Chinese video cloud platform
# It should have an independent extractor but I don't have other
# examples using bokecc
player_params_str = self._html_search_regex(
r'<script[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
webpage, 'player params', default=None)
player_params = compat_parse_qs(player_params_str)
info_xml = self._download_xml(
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
player_params['siteid'][0], player_params['vid'][0]), video_id)
return [{
'format_id': 'bokecc',
'url': quality.find('./copy').attrib['playurl'],
'preference': int(quality.attrib['value']),
} for quality in info_xml.findall('./video/quality')]
def _extract_rtmp_videos(self, webpage):
# The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/'
# Extract video URL
encoded_id = self._search_regex(
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
playpath = 'mp4:' + real_id
return [{
'format_id': 'rtmp',
'url': video_url,
'ext': determine_ext(playpath),
'play_path': playpath,
}]
def _extract_http_videos(self, webpage):
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
return [{
'format_id': 'http',
'url': http_video_url,
'http_headers': {
'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
policy, signature, key_pair_id),
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -33,31 +99,12 @@ class InfoQIE(InfoExtractor):
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
video_description = self._html_search_meta('description', webpage, 'description')
# The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/'
if '/cn/' in url:
# for China videos, HTTP video URL exists but always fails with 403
formats = self._extract_bokecc_videos(webpage, video_id)
else:
formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
# Extract video URL
encoded_id = self._search_regex(
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
playpath = 'mp4:' + real_id
video_filename = playpath.split('/')[-1]
video_id, extension = video_filename.split('.')
http_base = self._search_regex(
r'EXPRESSINSTALL_SWF\s*=\s*[^"]*"((?:https?:)?//[^/"]+/)', webpage,
'HTTP base URL')
formats = [{
'format_id': 'rtmp',
'url': video_url,
'ext': extension,
'play_path': playpath,
}, {
'format_id': 'http',
'url': compat_urlparse.urljoin(url, http_base) + real_id,
}]
self._sort_formats(formats)
return {

View File

@@ -205,8 +205,8 @@ class IqiyiIE(InfoExtractor):
def get_enc_key(self, swf_url, video_id):
# TODO: automatic key extraction
# last update at 2015-12-06 for Zombie::bite
enc_key = '3719f6a1da83ee0aee3488d8802d7696'[::-1]
# last update at 2015-12-18 for Zombie::bite
enc_key = '8b6b683780897eb8d9a48a02ccc4817d'[::-1]
return enc_key
def _real_extract(self, url):

View File

@@ -0,0 +1,71 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class JWPlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TEST = {
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
'info_dict': {
'id': 'nPripu9l',
'ext': 'mov',
'title': 'Big Buck Bunny Trailer',
'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
'upload_date': '20081127',
'timestamp': 1227796140,
}
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
video_data = json_data['playlist'][0]
subtitles = {}
for track in video_data['tracks']:
if track['kind'] == 'captions':
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
formats = []
for source in video_data['sources']:
source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl':
m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
if m3u8_formats:
formats.extend(m3u8_formats)
elif source_type.startswith('audio'):
formats.append({
'url': source_url,
'vcodec': 'none',
})
else:
formats.append({
'url': source_url,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'],
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'subtitles': subtitles,
'formats': formats,
}

View File

@@ -0,0 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
'info_dict': {
'id': 'Fh3QgymL9gsc',
'ext': 'mp4',
'title': 'Maze Runner: The Scorch Trials Official Movie Review',
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
'upload_date': '20150918',
'timestamp': 1442549540,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'jwplatform:%s' % jwplatform_id,
'ie_key': 'JWPlatform',
}

View File

@@ -3,10 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_HTTPError,
)
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
find_xpath_attr,
@@ -189,7 +186,7 @@ class NBCNewsIE(InfoExtractor):
'title': info.find('headline').text,
'ext': 'flv',
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
'description': compat_str(info.find('caption').text),
'description': info.find('caption').text,
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}
else:

View File

@@ -88,10 +88,10 @@ class NDRIE(NDRBaseIE):
'embedURL', webpage, 'embed URL', fatal=True)
description = self._search_regex(
r'<p[^>]+itemprop="description">([^<]+)</p>',
webpage, 'description', fatal=False)
webpage, 'description', default=None) or self._og_search_description(webpage)
timestamp = parse_iso8601(
self._search_regex(
r'<span itemprop="datePublished" content="([^"]+)">',
r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
webpage, 'upload date', fatal=False))
return {
'_type': 'url_transparent',

View File

@@ -9,6 +9,7 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
clean_html,
@@ -82,14 +83,21 @@ class NocoIE(InfoExtractor):
if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
@staticmethod
def _ts():
return int(time.time() * 1000)
def _call_api(self, path, video_id, note, sub_lang=None):
ts = compat_str(int(time.time() * 1000))
ts = compat_str(self._ts() + self._ts_offset)
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
url = self._API_URL_TEMPLATE % (path, ts, tk)
if sub_lang:
url += self._SUB_LANG_TEMPLATE % sub_lang
resp = self._download_json(url, video_id, note)
request = sanitized_Request(url)
request.add_header('Referer', self._referer)
resp = self._download_json(request, video_id, note)
if isinstance(resp, dict) and resp.get('error'):
self._raise_error(resp['error'], resp['description'])
@@ -102,8 +110,22 @@ class NocoIE(InfoExtractor):
expected=True)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
# Timestamp adjustment offset between server time and local time
# must be calculated in order to use timestamps closest to server's
# in all API requests (see https://github.com/rg3/youtube-dl/issues/7864)
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1',
webpage, 'noco player', group='player',
default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf')
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
ts = int_or_none(qs.get('ts', [None])[0])
self._ts_offset = ts - self._ts() if ts else 0
self._referer = player_url
medias = self._call_api(
'shows/%s/medias' % video_id,
@@ -155,8 +177,8 @@ class NocoIE(InfoExtractor):
'format_id': format_id_extended,
'width': int_or_none(fmt.get('res_width')),
'height': int_or_none(fmt.get('res_lines')),
'abr': int_or_none(fmt.get('audiobitrate')),
'vbr': int_or_none(fmt.get('videobitrate')),
'abr': int_or_none(fmt.get('audiobitrate'), 1000),
'vbr': int_or_none(fmt.get('videobitrate'), 1000),
'filesize': int_or_none(fmt.get('filesize')),
'format_note': qualities[format_id].get('quality_name'),
'quality': qualities[format_id].get('priority'),

View File

@@ -17,15 +17,16 @@ class NovaMovIE(InfoExtractor):
IE_NAME = 'novamov'
IE_DESC = 'NovaMov'
_VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
_VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video|mobile/#/videos)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
_VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
_HOST = 'www.novamov.com'
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
_FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";'
_FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);'
_TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
_DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
_URL_TEMPLATE = 'http://%s/video/%s'
_TEST = {
'url': 'http://www.novamov.com/video/4rurhn9x446jj',
@@ -39,20 +40,28 @@ class NovaMovIE(InfoExtractor):
'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
}
def _check_existence(self, webpage, video_id):
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
url = 'http://%s/video/%s' % (self._HOST, video_id)
url = self._URL_TEMPLATE % (self._HOST, video_id)
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
self._check_existence(webpage, video_id)
def extract_filekey(default=NO_DEFAULT):
return self._search_regex(
filekey = self._search_regex(
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'):
return self._search_regex(
r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default)
else:
return filekey
filekey = extract_filekey(default=None)
@@ -69,6 +78,7 @@ class NovaMovIE(InfoExtractor):
request.add_header('Referer', post_url)
webpage = self._download_webpage(
request, video_id, 'Downloading continue to the video page')
self._check_existence(webpage, video_id)
filekey = extract_filekey()
@@ -127,19 +137,18 @@ class NowVideoIE(NovaMovIE):
_HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.nowvideo.to/video/0mw0yow7b6dxa',
'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b',
'md5': '12c82cad4f2084881d8bc60ee29df092',
'info_dict': {
'id': '0mw0yow7b6dxa',
'id': 'f1d6fce9a968b',
'ext': 'flv',
'title': 'youtubedl test video _BaW_jenozKc.mp4',
'title': 'youtubedl test video BaWjenozKc',
'description': 'Description',
}
},
}
@@ -153,6 +162,7 @@ class VideoWeedIE(NovaMovIE):
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_URL_TEMPLATE = 'http://%s/file/%s'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',

View File

@@ -1,7 +1,10 @@
# encoding: utf-8
from __future__ import unicode_literals
from .brightcove import BrightcoveLegacyIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -23,9 +26,12 @@ class NownessBaseIE(InfoExtractor):
note='Downloading player JavaScript',
errnote='Unable to download player JavaScript')
bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
if bc_url is None:
raise ExtractorError('Could not find player definition')
return self.url_result(bc_url, 'BrightcoveLegacy')
if bc_url:
return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
bc_url = BrightcoveNewIE._extract_url(player_code)
if bc_url:
return self.url_result(bc_url, BrightcoveNewIE.ie_key())
raise ExtractorError('Could not find player definition')
elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube':

View File

@@ -27,6 +27,7 @@ class OoyalaBaseIE(InfoExtractor):
'duration': float_or_none(metadata.get('duration'), 1000),
}
urls = []
formats = []
for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
auth_data = self._download_json(
@@ -38,20 +39,28 @@ class OoyalaBaseIE(InfoExtractor):
if cur_auth_data['authorized']:
for stream in cur_auth_data['streams']:
url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8')
if url in urls:
continue
urls.append(url)
delivery_type = stream['delivery_type']
if delivery_type == 'remote_asset':
video_info['url'] = url
return video_info
if delivery_type == 'hls':
formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
elif delivery_type == 'hds':
formats.extend(self._extract_f4m_formats(url, embed_code, -1, 'hds', fatal=False))
if delivery_type == 'hls' or '.m3u8' in url:
m3u8_formats = self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif delivery_type == 'hds' or '.f4m' in url:
f4m_formats = self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif '.smil' in url:
smil_formats = self._extract_smil_formats(url, embed_code, fatal=False)
if smil_formats:
formats.extend(smil_formats)
else:
formats.append({
'url': url,
'ext': stream.get('delivery_type'),
'vcodec': stream.get('video_codec'),
'format_id': '%s-%s-%sp' % (stream.get('profile'), delivery_type, stream.get('height')),
'format_id': delivery_type,
'width': int_or_none(stream.get('width')),
'height': int_or_none(stream.get('height')),
'abr': int_or_none(stream.get('audio_bitrate')),

View File

@@ -16,165 +16,165 @@ from ..utils import (
class PBSIE(InfoExtractor):
_STATIONS = (
('video.pbs.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
('video.aptv.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
('video.gpb.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
('video.mpbonline.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
('video.wnpt.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org
('video.wfsu.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/
('video.wsre.org', 'WSRE (WSRE)'), # http://www.wsre.org
('video.wtcitv.org', 'WTCI (WTCI)'), # http://www.wtcitv.org
('video.pba.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/
('video.alaskapublic.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm
# ('kuac.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/
# ('ktoo.org', '360 North (KTOO)'), # http://www.ktoo.org/
# ('azpm.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/
('video.azpbs.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org
('portal.knme.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/
('video.vegaspbs.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/
('watch.aetn.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/
('video.ket.org', 'KET (WKLE)'), # http://www.ket.org/
('video.wkno.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/
('video.lpb.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/
('videos.oeta.tv', 'OETA (KETA)'), # http://www.oeta.tv
('video.optv.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/
('watch.wsiu.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/
('video.keet.org', 'KEET TV (KEET)'), # http://www.keet.org
('pbs.kixe.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/
('video.kpbs.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
('video.kqed.org', 'KQED (KQED)'), # http://www.kqed.org
('vids.kvie.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
('video.pbssocal.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
('video.valleypbs.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
('video.cptv.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
('watch.knpb.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
('video.soptv.org', 'SOPTV (KSYS)'), # http://www.soptv.org
# ('klcs.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org
# ('krcb.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org
# ('kvcr.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org
('video.rmpbs.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org
('video.kenw.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org
('video.kued.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org
('video.wyomingpbs.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
('video.cpt12.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
('video.kbyueleven.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
('video.thirteen.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
('video.wgbh.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
('video.wgby.org', 'WGBY (WGBY)'), # http://www.wgby.org
('watch.njtvonline.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
# ('ripbs.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/
('watch.wliw.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/
('video.mpt.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org
('watch.weta.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org
('video.whyy.org', 'WHYY (WHYY)'), # http://www.whyy.org
('video.wlvt.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/
('video.wvpt.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net
('video.whut.org', 'Howard University Television (WHUT)'), # http://www.whut.org
('video.wedu.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org
('video.wgcu.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/
# ('wjct.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org
('video.wpbt2.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org
('video.wucftv.org', 'WUCF TV (WUCF)'), # http://wucftv.org
('video.wuft.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org
('watch.wxel.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/
('video.wlrn.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/
('video.wusf.usf.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/
('video.scetv.org', 'ETV (WRLK)'), # http://www.scetv.org
('video.unctv.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/
# ('pbsguam.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/
('video.pbshawaii.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/
('video.idahoptv.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org
('video.ksps.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/
('watch.opb.org', 'OPB (KOPB)'), # http://www.opb.org
('watch.nwptv.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org
('video.will.illinois.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/
('video.networkknowledge.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv
('video.wttw.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/
# ('wtvp.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/
('video.iptv.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/
('video.ninenet.org', 'Nine Network (KETC)'), # http://www.ninenet.org
('video.wfwa.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/
('video.wfyi.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org
('video.mptv.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org
('video.wnin.org', 'WNIN (WNIN)'), # http://www.wnin.org/
('video.wnit.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/
('video.wpt.org', 'WPT (WPNE)'), # http://www.wpt.org/
('video.wvut.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/
('video.weiu.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net
('video.wqpt.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org
('video.wycc.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org
# ('lakeshorepublicmedia.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/
('video.wipb.org', 'WIPB-TV (WIPB)'), # http://wipb.org
('video.indianapublicmedia.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/
('watch.cetconnect.org', 'CET (WCET)'), # http://www.cetconnect.org
('video.thinktv.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org
('video.wbgu.org', 'WBGU-TV (WBGU)'), # http://wbgu.org
('video.wgvu.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/
('video.netnebraska.org', 'NET1 (KUON)'), # http://netnebraska.org
('video.pioneer.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org
('watch.sdpb.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org
('video.tpt.org', 'TPT (KTCA)'), # http://www.tpt.org
('watch.ksmq.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/
('watch.kpts.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/
('watch.ktwu.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org
# ('shptv.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org
# ('kcpt.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/
# ('blueridgepbs.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/
('watch.easttennesseepbs.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org
('video.wcte.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org
('video.wljt.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/
('video.wosu.org', 'WOSU TV (WOSU)'), # http://wosu.org/
('video.woub.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5
('video.wvpublic.org', 'WVPB (WVPB)'), # http://wvpublic.org/
('video.wkyupbs.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org
# ('wyes.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org
('video.kera.org', 'KERA 13 (KERA)'), # http://www.kera.org/
('video.mpbn.net', 'MPBN (WCBB)'), # http://www.mpbn.net/
('video.mountainlake.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/
('video.nhptv.org', 'NHPTV (WENH)'), # http://nhptv.org/
('video.vpt.org', 'Vermont PBS (WETK)'), # http://www.vpt.org
('video.witf.org', 'witf (WITF)'), # http://www.witf.org
('watch.wqed.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/
('video.wmht.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/
('video.deltabroadcasting.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org
('video.dptv.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/
('video.wcmu.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org
('video.wkar.org', 'WKAR-TV (WKAR)'), # http://wkar.org/
('wnmuvideo.nmu.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu
('video.wdse.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/
('video.wgte.org', 'WGTE TV (WGTE)'), # http://www.wgte.org
('video.lptv.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org
# ('prairiepublic.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/
('video.kmos.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/
('watch.montanapbs.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org
('video.krwg.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org
('video.kacvtv.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/
('video.kcostv.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org
('video.wcny.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org
('video.wned.org', 'WNED (WNED)'), # http://www.wned.org/
('watch.wpbstv.org', 'WPBS (WPBS)'), # http://www.wpbstv.org
('video.wskg.org', 'WSKG Public TV (WSKG)'), # http://wskg.org
('video.wxxi.org', 'WXXI (WXXI)'), # http://wxxi.org
('video.wpsu.org', 'WPSU (WPSU)'), # http://www.wpsu.org
# ('wqln.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org
('on-demand.wvia.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/
('video.wtvi.org', 'WTVI (WTVI)'), # http://www.wtvi.org/
# ('whro.org', 'WHRO (WHRO)'), # http://whro.org
('video.westernreservepublicmedia.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/
('video.ideastream.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/
('video.kcts9.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/
('video.basinpbs.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org
('video.houstonpbs.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/
# ('tamu.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu
# ('kedt.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org
('video.klrn.org', 'KLRN (KLRN)'), # http://www.klrn.org
('video.klru.tv', 'KLRU (KLRU)'), # http://www.klru.org
# ('kmbh.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org
# ('knct.org', 'KNCT (KNCT)'), # http://www.knct.org
# ('ktxt.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org
('video.wtjx.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/
('video.ideastations.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/
('video.kbtc.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org
(r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
(r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
(r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
(r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
(r'video\.wnpt\.org', 'Nashville Public Television (WNPT)'), # http://www.wnpt.org
(r'video\.wfsu\.org', 'WFSU-TV (WFSU)'), # http://wfsu.org/
(r'video\.wsre\.org', 'WSRE (WSRE)'), # http://www.wsre.org
(r'video\.wtcitv\.org', 'WTCI (WTCI)'), # http://www.wtcitv.org
(r'video\.pba\.org', 'WPBA/Channel 30 (WPBA)'), # http://pba.org/
(r'video\.alaskapublic\.org', 'Alaska Public Media (KAKM)'), # http://alaskapublic.org/kakm
# (r'kuac\.org', 'KUAC (KUAC)'), # http://kuac.org/kuac-tv/
# (r'ktoo\.org', '360 North (KTOO)'), # http://www.ktoo.org/
# (r'azpm\.org', 'KUAT 6 (KUAT)'), # http://www.azpm.org/
(r'video\.azpbs\.org', 'Arizona PBS (KAET)'), # http://www.azpbs.org
(r'portal\.knme\.org', 'KNME-TV/Channel 5 (KNME)'), # http://www.newmexicopbs.org/
(r'video\.vegaspbs\.org', 'Vegas PBS (KLVX)'), # http://vegaspbs.org/
(r'watch\.aetn\.org', 'AETN/ARKANSAS ETV NETWORK (KETS)'), # http://www.aetn.org/
(r'video\.ket\.org', 'KET (WKLE)'), # http://www.ket.org/
(r'video\.wkno\.org', 'WKNO/Channel 10 (WKNO)'), # http://www.wkno.org/
(r'video\.lpb\.org', 'LPB/LOUISIANA PUBLIC BROADCASTING (WLPB)'), # http://www.lpb.org/
(r'videos\.oeta\.tv', 'OETA (KETA)'), # http://www.oeta.tv
(r'video\.optv\.org', 'Ozarks Public Television (KOZK)'), # http://www.optv.org/
(r'watch\.wsiu\.org', 'WSIU Public Broadcasting (WSIU)'), # http://www.wsiu.org/
(r'video\.keet\.org', 'KEET TV (KEET)'), # http://www.keet.org
(r'pbs\.kixe\.org', 'KIXE/Channel 9 (KIXE)'), # http://kixe.org/
(r'video\.kpbs\.org', 'KPBS San Diego (KPBS)'), # http://www.kpbs.org/
(r'video\.kqed\.org', 'KQED (KQED)'), # http://www.kqed.org
(r'vids\.kvie\.org', 'KVIE Public Television (KVIE)'), # http://www.kvie.org
(r'video\.pbssocal\.org', 'PBS SoCal/KOCE (KOCE)'), # http://www.pbssocal.org/
(r'video\.valleypbs\.org', 'ValleyPBS (KVPT)'), # http://www.valleypbs.org/
(r'video\.cptv\.org', 'CONNECTICUT PUBLIC TELEVISION (WEDH)'), # http://cptv.org
(r'watch\.knpb\.org', 'KNPB Channel 5 (KNPB)'), # http://www.knpb.org/
(r'video\.soptv\.org', 'SOPTV (KSYS)'), # http://www.soptv.org
# (r'klcs\.org', 'KLCS/Channel 58 (KLCS)'), # http://www.klcs.org
# (r'krcb\.org', 'KRCB Television & Radio (KRCB)'), # http://www.krcb.org
# (r'kvcr\.org', 'KVCR TV/DT/FM :: Vision for the Future (KVCR)'), # http://kvcr.org
(r'video\.rmpbs\.org', 'Rocky Mountain PBS (KRMA)'), # http://www.rmpbs.org
(r'video\.kenw\.org', 'KENW-TV3 (KENW)'), # http://www.kenw.org
(r'video\.kued\.org', 'KUED Channel 7 (KUED)'), # http://www.kued.org
(r'video\.wyomingpbs\.org', 'Wyoming PBS (KCWC)'), # http://www.wyomingpbs.org
(r'video\.cpt12\.org', 'Colorado Public Television / KBDI 12 (KBDI)'), # http://www.cpt12.org/
(r'video\.kbyueleven\.org', 'KBYU-TV (KBYU)'), # http://www.kbyutv.org/
(r'video\.thirteen\.org', 'Thirteen/WNET New York (WNET)'), # http://www.thirteen.org
(r'video\.wgbh\.org', 'WGBH/Channel 2 (WGBH)'), # http://wgbh.org
(r'video\.wgby\.org', 'WGBY (WGBY)'), # http://www.wgby.org
(r'watch\.njtvonline\.org', 'NJTV Public Media NJ (WNJT)'), # http://www.njtvonline.org/
# (r'ripbs\.org', 'Rhode Island PBS (WSBE)'), # http://www.ripbs.org/home/
(r'watch\.wliw\.org', 'WLIW21 (WLIW)'), # http://www.wliw.org/
(r'video\.mpt\.tv', 'mpt/Maryland Public Television (WMPB)'), # http://www.mpt.org
(r'watch\.weta\.org', 'WETA Television and Radio (WETA)'), # http://www.weta.org
(r'video\.whyy\.org', 'WHYY (WHYY)'), # http://www.whyy.org
(r'video\.wlvt\.org', 'PBS 39 (WLVT)'), # http://www.wlvt.org/
(r'video\.wvpt\.net', 'WVPT - Your Source for PBS and More! (WVPT)'), # http://www.wvpt.net
(r'video\.whut\.org', 'Howard University Television (WHUT)'), # http://www.whut.org
(r'video\.wedu\.org', 'WEDU PBS (WEDU)'), # http://www.wedu.org
(r'video\.wgcu\.org', 'WGCU Public Media (WGCU)'), # http://www.wgcu.org/
# (r'wjct\.org', 'WJCT Public Broadcasting (WJCT)'), # http://www.wjct.org
(r'video\.wpbt2\.org', 'WPBT2 (WPBT)'), # http://www.wpbt2.org
(r'video\.wucftv\.org', 'WUCF TV (WUCF)'), # http://wucftv.org
(r'video\.wuft\.org', 'WUFT/Channel 5 (WUFT)'), # http://www.wuft.org
(r'watch\.wxel\.org', 'WXEL/Channel 42 (WXEL)'), # http://www.wxel.org/home/
(r'video\.wlrn\.org', 'WLRN/Channel 17 (WLRN)'), # http://www.wlrn.org/
(r'video\.wusf\.usf\.edu', 'WUSF Public Broadcasting (WUSF)'), # http://wusf.org/
(r'video\.scetv\.org', 'ETV (WRLK)'), # http://www.scetv.org
(r'video\.unctv\.org', 'UNC-TV (WUNC)'), # http://www.unctv.org/
# (r'pbsguam\.org', 'PBS Guam (KGTF)'), # http://www.pbsguam.org/
(r'video\.pbshawaii\.org', 'PBS Hawaii - Oceanic Cable Channel 10 (KHET)'), # http://www.pbshawaii.org/
(r'video\.idahoptv\.org', 'Idaho Public Television (KAID)'), # http://idahoptv.org
(r'video\.ksps\.org', 'KSPS (KSPS)'), # http://www.ksps.org/home/
(r'watch\.opb\.org', 'OPB (KOPB)'), # http://www.opb.org
(r'watch\.nwptv\.org', 'KWSU/Channel 10 & KTNW/Channel 31 (KWSU)'), # http://www.kwsu.org
(r'video\.will\.illinois\.edu', 'WILL-TV (WILL)'), # http://will.illinois.edu/
(r'video\.networkknowledge\.tv', 'Network Knowledge - WSEC/Springfield (WSEC)'), # http://www.wsec.tv
(r'video\.wttw\.com', 'WTTW11 (WTTW)'), # http://www.wttw.com/
# (r'wtvp\.org', 'WTVP & WTVP.org, Public Media for Central Illinois (WTVP)'), # http://www.wtvp.org/
(r'video\.iptv\.org', 'Iowa Public Television/IPTV (KDIN)'), # http://www.iptv.org/
(r'video\.ninenet\.org', 'Nine Network (KETC)'), # http://www.ninenet.org
(r'video\.wfwa\.org', 'PBS39 Fort Wayne (WFWA)'), # http://wfwa.org/
(r'video\.wfyi\.org', 'WFYI Indianapolis (WFYI)'), # http://www.wfyi.org
(r'video\.mptv\.org', 'Milwaukee Public Television (WMVS)'), # http://www.mptv.org
(r'video\.wnin\.org', 'WNIN (WNIN)'), # http://www.wnin.org/
(r'video\.wnit\.org', 'WNIT Public Television (WNIT)'), # http://www.wnit.org/
(r'video\.wpt\.org', 'WPT (WPNE)'), # http://www.wpt.org/
(r'video\.wvut\.org', 'WVUT/Channel 22 (WVUT)'), # http://wvut.org/
(r'video\.weiu\.net', 'WEIU/Channel 51 (WEIU)'), # http://www.weiu.net
(r'video\.wqpt\.org', 'WQPT-TV (WQPT)'), # http://www.wqpt.org
(r'video\.wycc\.org', 'WYCC PBS Chicago (WYCC)'), # http://www.wycc.org
# (r'lakeshorepublicmedia\.org', 'Lakeshore Public Television (WYIN)'), # http://lakeshorepublicmedia.org/
(r'video\.wipb\.org', 'WIPB-TV (WIPB)'), # http://wipb.org
(r'video\.indianapublicmedia\.org', 'WTIU (WTIU)'), # http://indianapublicmedia.org/tv/
(r'watch\.cetconnect\.org', 'CET (WCET)'), # http://www.cetconnect.org
(r'video\.thinktv\.org', 'ThinkTVNetwork (WPTD)'), # http://www.thinktv.org
(r'video\.wbgu\.org', 'WBGU-TV (WBGU)'), # http://wbgu.org
(r'video\.wgvu\.org', 'WGVU TV (WGVU)'), # http://www.wgvu.org/
(r'video\.netnebraska\.org', 'NET1 (KUON)'), # http://netnebraska.org
(r'video\.pioneer\.org', 'Pioneer Public Television (KWCM)'), # http://www.pioneer.org
(r'watch\.sdpb\.org', 'SDPB Television (KUSD)'), # http://www.sdpb.org
(r'video\.tpt\.org', 'TPT (KTCA)'), # http://www.tpt.org
(r'watch\.ksmq\.org', 'KSMQ (KSMQ)'), # http://www.ksmq.org/
(r'watch\.kpts\.org', 'KPTS/Channel 8 (KPTS)'), # http://www.kpts.org/
(r'watch\.ktwu\.org', 'KTWU/Channel 11 (KTWU)'), # http://ktwu.org
# (r'shptv\.org', 'Smoky Hills Public Television (KOOD)'), # http://www.shptv.org
# (r'kcpt\.org', 'KCPT Kansas City Public Television (KCPT)'), # http://kcpt.org/
# (r'blueridgepbs\.org', 'Blue Ridge PBS (WBRA)'), # http://www.blueridgepbs.org/
(r'watch\.easttennesseepbs\.org', 'East Tennessee PBS (WSJK)'), # http://easttennesseepbs.org
(r'video\.wcte\.tv', 'WCTE-TV (WCTE)'), # http://www.wcte.org
(r'video\.wljt\.org', 'WLJT, Channel 11 (WLJT)'), # http://wljt.org/
(r'video\.wosu\.org', 'WOSU TV (WOSU)'), # http://wosu.org/
(r'video\.woub\.org', 'WOUB/WOUC (WOUB)'), # http://woub.org/tv/index.php?section=5
(r'video\.wvpublic\.org', 'WVPB (WVPB)'), # http://wvpublic.org/
(r'video\.wkyupbs\.org', 'WKYU-PBS (WKYU)'), # http://www.wkyupbs.org
# (r'wyes\.org', 'WYES-TV/New Orleans (WYES)'), # http://www.wyes.org
(r'video\.kera\.org', 'KERA 13 (KERA)'), # http://www.kera.org/
(r'video\.mpbn\.net', 'MPBN (WCBB)'), # http://www.mpbn.net/
(r'video\.mountainlake\.org', 'Mountain Lake PBS (WCFE)'), # http://www.mountainlake.org/
(r'video\.nhptv\.org', 'NHPTV (WENH)'), # http://nhptv.org/
(r'video\.vpt\.org', 'Vermont PBS (WETK)'), # http://www.vpt.org
(r'video\.witf\.org', 'witf (WITF)'), # http://www.witf.org
(r'watch\.wqed\.org', 'WQED Multimedia (WQED)'), # http://www.wqed.org/
(r'video\.wmht\.org', 'WMHT Educational Telecommunications (WMHT)'), # http://www.wmht.org/home/
(r'video\.deltabroadcasting\.org', 'Q-TV (WDCQ)'), # http://www.deltabroadcasting.org
(r'video\.dptv\.org', 'WTVS Detroit Public TV (WTVS)'), # http://www.dptv.org/
(r'video\.wcmu\.org', 'CMU Public Television (WCMU)'), # http://www.wcmu.org
(r'video\.wkar\.org', 'WKAR-TV (WKAR)'), # http://wkar.org/
(r'wnmuvideo\.nmu\.edu', 'WNMU-TV Public TV 13 (WNMU)'), # http://wnmutv.nmu.edu
(r'video\.wdse\.org', 'WDSE - WRPT (WDSE)'), # http://www.wdse.org/
(r'video\.wgte\.org', 'WGTE TV (WGTE)'), # http://www.wgte.org
(r'video\.lptv\.org', 'Lakeland Public Television (KAWE)'), # http://www.lakelandptv.org
# (r'prairiepublic\.org', 'PRAIRIE PUBLIC (KFME)'), # http://www.prairiepublic.org/
(r'video\.kmos\.org', 'KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS)'), # http://www.kmos.org/
(r'watch\.montanapbs\.org', 'MontanaPBS (KUSM)'), # http://montanapbs.org
(r'video\.krwg\.org', 'KRWG/Channel 22 (KRWG)'), # http://www.krwg.org
(r'video\.kacvtv\.org', 'KACV (KACV)'), # http://www.panhandlepbs.org/home/
(r'video\.kcostv\.org', 'KCOS/Channel 13 (KCOS)'), # www.kcostv.org
(r'video\.wcny\.org', 'WCNY/Channel 24 (WCNY)'), # http://www.wcny.org
(r'video\.wned\.org', 'WNED (WNED)'), # http://www.wned.org/
(r'watch\.wpbstv\.org', 'WPBS (WPBS)'), # http://www.wpbstv.org
(r'video\.wskg\.org', 'WSKG Public TV (WSKG)'), # http://wskg.org
(r'video\.wxxi\.org', 'WXXI (WXXI)'), # http://wxxi.org
(r'video\.wpsu\.org', 'WPSU (WPSU)'), # http://www.wpsu.org
# (r'wqln\.org', 'WQLN/Channel 54 (WQLN)'), # http://www.wqln.org
(r'on-demand\.wvia\.org', 'WVIA Public Media Studios (WVIA)'), # http://www.wvia.org/
(r'video\.wtvi\.org', 'WTVI (WTVI)'), # http://www.wtvi.org/
# (r'whro\.org', 'WHRO (WHRO)'), # http://whro.org
(r'video\.westernreservepublicmedia\.org', 'Western Reserve PBS (WNEO)'), # http://www.WesternReservePublicMedia.org/
(r'video\.ideastream\.org', 'WVIZ/PBS ideastream (WVIZ)'), # http://www.wviz.org/
(r'video\.kcts9\.org', 'KCTS 9 (KCTS)'), # http://kcts9.org/
(r'video\.basinpbs\.org', 'Basin PBS (KPBT)'), # http://www.basinpbs.org
(r'video\.houstonpbs\.org', 'KUHT / Channel 8 (KUHT)'), # http://www.houstonpublicmedia.org/
# (r'tamu\.edu', 'KAMU - TV (KAMU)'), # http://KAMU.tamu.edu
# (r'kedt\.org', 'KEDT/Channel 16 (KEDT)'), # http://www.kedt.org
(r'video\.klrn\.org', 'KLRN (KLRN)'), # http://www.klrn.org
(r'video\.klru\.tv', 'KLRU (KLRU)'), # http://www.klru.org
# (r'kmbh\.org', 'KMBH-TV (KMBH)'), # http://www.kmbh.org
# (r'knct\.org', 'KNCT (KNCT)'), # http://www.knct.org
# (r'ktxt\.org', 'KTTZ-TV (KTXT)'), # http://www.ktxt.org
(r'video\.wtjx\.org', 'WTJX Channel 12 (WTJX)'), # http://www.wtjx.org/
(r'video\.ideastations\.org', 'WCVE PBS (WCVE)'), # http://ideastations.org/
(r'video\.kbtc\.org', 'KBTC Public Television (KBTC)'), # http://kbtc.org
)
IE_NAME = 'pbs'
@@ -189,7 +189,7 @@ class PBSIE(InfoExtractor):
# Player
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
''' % '|'.join(re.escape(p) for p in list(zip(*_STATIONS))[0])
''' % '|'.join(list(zip(*_STATIONS))[0])
_TESTS = [
{

View File

@@ -31,9 +31,8 @@ class PeriscopeIE(InfoExtractor):
}]
def _call_api(self, method, value):
attribute = 'token' if len(value) > 13 else 'broadcast_id'
return self._download_json(
'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value)
'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value)
def _real_extract(self, url):
token = self._match_id(url)

View File

@@ -17,9 +17,9 @@ from ..utils import (
class RutubeIE(InfoExtractor):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'
_VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P<id>[\da-z]{32})'
_TEST = {
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
@@ -36,7 +36,10 @@ class RutubeIE(InfoExtractor):
# It requires ffmpeg (m3u8 download)
'skip_download': True,
},
}
}, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -6,12 +6,12 @@ import re
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
sanitized_Request,
smuggle_url,
std_headers,
urlencode_postdata,
)
@@ -57,7 +57,7 @@ class SafariBaseIE(InfoExtractor):
}
request = sanitized_Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)

View File

@@ -1,146 +0,0 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .crunchyroll import CrunchyrollIE
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
remove_start,
xpath_text,
)
class SoompiBaseIE(InfoExtractor):
def _get_episodes(self, webpage, episode_filter=None):
episodes = self._parse_json(
self._search_regex(
r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'),
None)
return list(filter(episode_filter, episodes))
class SoompiIE(SoompiBaseIE, CrunchyrollIE):
IE_NAME = 'soompi'
_VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://tv.soompi.com/en/watch/29235',
'info_dict': {
'id': '29235',
'ext': 'mp4',
'title': 'Episode 1096',
'description': '2015-05-20'
},
'params': {
'skip_download': True,
},
}]
def _get_episode(self, webpage, video_id):
return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0]
def _get_subtitles(self, config, video_id):
sub_langs = {}
for subtitle in config.findall('./{default}preload/subtitles/subtitle'):
sub_langs[subtitle.attrib['id']] = subtitle.attrib['title']
subtitles = {}
for s in config.findall('./{default}preload/subtitle'):
lang_code = sub_langs.get(s.attrib['id'])
if not lang_code:
continue
sub_id = s.get('id')
data = xpath_text(s, './data', 'data')
iv = xpath_text(s, './iv', 'iv')
if not id or not iv or not data:
continue
subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8')
subtitles[lang_code] = self._extract_subtitles(subtitle)
return subtitles
def _real_extract(self, url):
video_id = self._match_id(url)
try:
webpage = self._download_webpage(
url, video_id, 'Downloading episode page')
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
webpage = ee.cause.read()
block_message = self._html_search_regex(
r'(?s)<div class="block-message">(.+?)</div>', webpage,
'block message', default=None)
if block_message:
raise ExtractorError(block_message, expected=True)
raise
formats = []
config = None
for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage):
config = self._download_xml(
'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id),
video_id, 'Downloading %s XML' % format_id)
m3u8_url = xpath_text(
config, './{default}preload/stream_info/file',
'%s m3u8 URL' % format_id)
if not m3u8_url:
continue
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id=format_id))
self._sort_formats(formats)
episode = self._get_episode(webpage, video_id)
title = episode['name']
description = episode.get('description')
duration = int_or_none(episode.get('duration'))
thumbnails = [{
'id': thumbnail_id,
'url': thumbnail_url,
} for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()]
subtitles = self.extract_subtitles(config, video_id)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnails': thumbnails,
'duration': duration,
'formats': formats,
'subtitles': subtitles
}
class SoompiShowIE(SoompiBaseIE):
IE_NAME = 'soompi:show'
_VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)'
_TESTS = [{
'url': 'http://tv.soompi.com/en/shows/liar-game',
'info_dict': {
'id': 'liar-game',
'title': 'Liar Game',
'description': 'md5:52c02bce0c1a622a95823591d0589b66',
},
'playlist_count': 14,
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(
url, show_id, 'Downloading show page')
title = remove_start(self._og_search_title(webpage), 'SoompiTV | ')
description = self._og_search_description(webpage)
entries = [
self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi')
for episode in self._get_episodes(webpage)]
return self.playlist_result(entries, show_id, title, description)

View File

@@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
js_to_json,
qualities,
determine_ext,
)
class Tele13IE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_TESTS = [
{
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda',
},
'params': {
# HTTP Error 404: Not Found
'skip_download': True,
},
},
{
'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
'md5': '867adf6a3b3fef932c68a71d70b70946',
'info_dict': {
'id': 'rOoKv2OMpOw',
'ext': 'mp4',
'title': 'Shooting star seen on 7-Sep-2015',
'description': 'md5:7292ff2a34b2f673da77da222ae77e1e',
'uploader': 'Porjai Jaturongkhakun',
'upload_date': '20150906',
'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
},
'add_ie': ['Youtube'],
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
setup_js = self._search_regex(r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", webpage, 'setup code')
sources = self._parse_json(self._search_regex(r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), display_id, js_to_json)
preference = qualities(['Móvil', 'SD', 'HD'])
formats = []
urls = []
for f in sources:
format_url = f['file']
if format_url and format_url not in urls:
ext = determine_ext(format_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif YoutubeIE.suitable(format_url):
return self.url_result(format_url, 'Youtube')
else:
formats.append({
'url': format_url,
'format_id': f.get('label'),
'preference': preference(f.get('label')),
'ext': ext,
})
urls.append(format_url)
self._sort_formats(formats)
return {
'id': display_id,
'title': self._search_regex(r'title\s*:\s*"([^"]+)"', setup_js, 'title'),
'description': self._html_search_meta('description', webpage, 'description'),
'thumbnail': self._search_regex(r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None),
'formats': formats,
}

View File

@@ -6,7 +6,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
_VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
_VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
_TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
@@ -22,7 +22,7 @@ class TF1IE(InfoExtractor):
}, {
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
'info_dict': {
'id': '12043945',
'id': 'le-grand-mysterioso-chuggington-7085291-739',
'ext': 'mp4',
'title': 'Le grand Mystérioso - Chuggington',
'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
@@ -32,22 +32,24 @@ class TF1IE(InfoExtractor):
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
'skip': 'HTTP Error 410: Gone',
}, {
'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
'only_matching': True,
}, {
'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html',
'only_matching': True,
}, {
'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._html_search_regex(
r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed player page')
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
wat_id = self._html_search_regex(
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
webpage, 'wat id', group='id')
wat_info = self._download_json(
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
return self.url_result(wat_info['media']['url'], 'Wat')

View File

@@ -0,0 +1,194 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
sanitized_Request,
)
class ToggleIE(InfoExtractor):
IE_NAME = 'toggle'
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': {
'id': '343115',
'ext': 'mp4',
'title': 'Lion Moms Premiere',
'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
'upload_date': '20150910',
'timestamp': 1441858274,
},
'params': {
'skip_download': 'm3u8 download',
}
}, {
'note': 'DRM-protected video',
'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413',
'info_dict': {
'id': '341413',
'ext': 'wvm',
'title': 'Dug\'s Special Mission',
'description': 'md5:e86c6f4458214905c1772398fabc93e0',
'upload_date': '20150827',
'timestamp': 1440644006,
},
'params': {
'skip_download': 'DRM-protected wvm download',
}
}, {
# this also tests correct video id extraction
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
'info_dict': {
'id': '332861',
'ext': 'mp4',
'title': '28th SEA Games (5 Show) - Episode 11',
'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
'upload_date': '20150605',
'timestamp': 1433480166,
},
'params': {
'skip_download': 'DRM-protected wvm download',
},
'skip': 'm3u8 links are geo-restricted'
}, {
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
'only_matching': True,
}, {
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
'only_matching': True,
}]
_FORMAT_PREFERENCES = {
'wvm-STBMain': -10,
'wvm-iPadMain': -20,
'wvm-iPhoneMain': -30,
'wvm-Android': -40,
}
_API_USER = 'tvpapi_147'
_API_PASS = '11111'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, note='Downloading video page')
api_user = self._search_regex(
r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
default=self._API_USER, group='user')
api_pass = self._search_regex(
r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
default=self._API_PASS, group='pass')
params = {
'initObj': {
'Locale': {
'LocaleLanguage': '',
'LocaleCountry': '',
'LocaleDevice': '',
'LocaleUserState': 0
},
'Platform': 0,
'SiteGuid': 0,
'DomainID': '0',
'UDID': '',
'ApiUser': api_user,
'ApiPass': api_pass
},
'MediaID': video_id,
'mediaType': 0,
}
req = sanitized_Request(
'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
json.dumps(params).encode('utf-8'))
info = self._download_json(req, video_id, 'Downloading video info json')
title = info['MediaName']
formats = []
for video_file in info.get('Files', []):
video_url, vid_format = video_file.get('URL'), video_file.get('Format')
if not video_url or not vid_format:
continue
ext = determine_ext(video_url)
vid_format = vid_format.replace(' ', '')
# if geo-restricted, m3u8 is inaccessible, but mp4 is okay
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, ext='mp4', m3u8_id=vid_format,
note='Downloading %s m3u8 information' % vid_format,
errnote='Failed to download %s m3u8 information' % vid_format,
fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext in ('mp4', 'wvm'):
# wvm are drm-protected files
formats.append({
'ext': ext,
'url': video_url,
'format_id': vid_format,
'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
'format_note': 'DRM-protected video' if ext == 'wvm' else None
})
if not formats:
# Most likely because geo-blocked
raise ExtractorError('No downloadable videos found', expected=True)
self._sort_formats(formats)
duration = int_or_none(info.get('Duration'))
description = info.get('Description')
created_at = parse_iso8601(info.get('CreationDate') or None)
average_rating = float_or_none(info.get('Rating'))
view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
thumbnails = []
for picture in info.get('Pictures', []):
if not isinstance(picture, dict):
continue
pic_url = picture.get('URL')
if not pic_url:
continue
thumbnail = {
'url': pic_url,
}
pic_size = picture.get('PicSize', '')
m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
if m:
thumbnail.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
thumbnails.append(thumbnail)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': created_at,
'average_rating': average_rating,
'view_count': view_count,
'like_count': like_count,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@@ -3,7 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..compat import (
compat_etree_fromstring,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
@@ -67,6 +70,17 @@ class VevoIE(InfoExtractor):
'params': {
'skip_download': 'true',
}
}, {
'note': 'No video_info',
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
'info_dict': {
'id': 'USUV71503000',
'ext': 'mp4',
'title': 'Till I Die - K Camp ft. T.I.',
'duration': 193,
},
'expected_warnings': ['Unable to download SMIL file'],
}]
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
@@ -81,11 +95,17 @@ class VevoIE(InfoExtractor):
if webpage is False:
self._oauth_token = None
else:
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
self._oauth_token = self._search_regex(
r'access_token":\s*"([^"]+)"',
webpage, 'access token', fatal=False)
def _formats_from_json(self, video_info):
if not video_info:
return []
last_version = {'version': -1}
for version in video_info['videoVersions']:
# These are the HTTP downloads, other types are for different manifests
@@ -110,9 +130,8 @@ class VevoIE(InfoExtractor):
})
return formats
def _formats_from_smil(self, smil_xml):
def _formats_from_smil(self, smil_doc):
formats = []
smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
for el in els:
src = el.attrib['src']
@@ -145,14 +164,14 @@ class VevoIE(InfoExtractor):
})
return formats
def _download_api_formats(self, video_id):
def _download_api_formats(self, video_id, video_url):
if not self._oauth_token:
self._downloader.report_warning(
'No oauth token available, skipping API HLS download')
return []
api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
video_id, self._oauth_token)
api_url = compat_urlparse.urljoin(video_url, '//apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
video_id, self._oauth_token))
api_data = self._download_json(
api_url, video_id,
note='Downloading HLS formats',
@@ -166,18 +185,26 @@ class VevoIE(InfoExtractor):
preference=0)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = None
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
response = self._download_json(json_url, video_id)
video_info = response['video']
video_info = response['video'] or {}
if not video_info:
if not video_info and response.get('statusCode') != 909:
if 'statusMessage' in response:
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)
raise ExtractorError('Unable to extract videos')
if not video_info:
if url.startswith('vevo:'):
raise ExtractorError('Please specify full Vevo URL for downloading', expected=True)
webpage = self._download_webpage(url, video_id)
title = video_info.get('title') or self._og_search_title(webpage)
formats = self._formats_from_json(video_info)
is_explicit = video_info.get('isExplicit')
@@ -189,11 +216,11 @@ class VevoIE(InfoExtractor):
age_limit = None
# Download via HLS API
formats.extend(self._download_api_formats(video_id))
formats.extend(self._download_api_formats(video_id, url))
# Download SMIL
smil_blocks = sorted((
f for f in video_info['videoVersions']
f for f in video_info.get('videoVersions', [])
if f['sourceType'] == 13),
key=lambda f: f['version'])
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
@@ -205,23 +232,26 @@ class VevoIE(InfoExtractor):
if smil_url_m is not None:
smil_url = smil_url_m
if smil_url:
smil_xml = self._download_webpage(
smil_url, video_id, 'Downloading SMIL info', fatal=False)
if smil_xml:
formats.extend(self._formats_from_smil(smil_xml))
smil_doc = self._download_smil(smil_url, video_id, fatal=False)
if smil_doc:
formats.extend(self._formats_from_smil(smil_doc))
self._sort_formats(formats)
timestamp_ms = int_or_none(self._search_regex(
timestamp = int_or_none(self._search_regex(
r'/Date\((\d+)\)/',
video_info['launchDate'], 'launch date', fatal=False))
video_info['launchDate'], 'launch date', fatal=False),
scale=1000) if video_info else None
duration = video_info.get('duration') or int_or_none(
self._html_search_meta('video:duration', webpage))
return {
'id': video_id,
'title': video_info['title'],
'title': title,
'formats': formats,
'thumbnail': video_info['imageUrl'],
'timestamp': timestamp_ms // 1000,
'uploader': video_info['mainArtists'][0]['artistName'],
'duration': video_info['duration'],
'thumbnail': video_info.get('imageUrl'),
'timestamp': timestamp,
'uploader': video_info['mainArtists'][0]['artistName'] if video_info else None,
'duration': duration,
'age_limit': age_limit,
}

View File

@@ -4,26 +4,48 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .xstream import XstreamIE
from ..utils import (
ExtractorError,
float_or_none,
)
class VGTVIE(InfoExtractor):
IE_DESC = 'VGTV and BTTV'
class VGTVIE(XstreamIE):
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
_HOST_TO_APPNAME = {
'vgtv.no': 'vgtv',
'bt.no/tv': 'bttv',
'aftenbladet.no/tv': 'satv',
'fvn.no/fvntv': 'fvntv',
'aftenposten.no/webtv': 'aptv',
}
_APP_NAME_TO_VENDOR = {
'vgtv': 'vgtv',
'bttv': 'bt',
'satv': 'sa',
'fvntv': 'fvn',
'aptv': 'ap',
}
_VALID_URL = r'''(?x)
(?:
vgtv:|
http://(?:www\.)?
(?:https?://(?:www\.)?
(?P<host>
%s
)
(?P<host>vgtv|bt)
/
(?:
:|
\.no/(?:tv/)?\#!/(?:video|live)/
)
(?P<id>[0-9]+)
'''
\#!/(?:video|live)/|
embed?.*id=
)|
(?P<appname>
%s
):)
(?P<id>\d+)
''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
_TESTS = [
{
# streamType: vod
@@ -59,17 +81,18 @@ class VGTVIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
'skip': 'Video is no longer available',
},
{
# streamType: live
# streamType: wasLive
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
'info_dict': {
'id': '113063',
'ext': 'flv',
'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'ext': 'mp4',
'title': 'V75 fra Solvalla 30.05.15',
'description': 'md5:b3743425765355855f88e096acc93231',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 0,
'duration': 25966,
'timestamp': 1432975582,
'upload_date': '20150530',
'view_count': int,
@@ -79,6 +102,20 @@ class VGTVIE(InfoExtractor):
'skip_download': True,
},
},
{
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'duration': 66,
'timestamp': 1417002452,
'upload_date': '20141126',
'view_count': int,
}
},
{
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True,
@@ -89,21 +126,27 @@ class VGTVIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
HOST_WEBSITES = {
'vgtv': 'vgtv',
'bt': 'bttv',
}
appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
vendor = self._APP_NAME_TO_VENDOR[appname]
data = self._download_json(
'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
% (host, video_id, HOST_WEBSITES[host]),
% (vendor, video_id, appname),
video_id, 'Downloading media JSON')
if data.get('status') == 'inactive':
raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True)
info = {
'formats': [],
}
if len(video_id) == 5:
if appname == 'bttv':
info = self._extract_video_info('btno', video_id)
elif appname == 'aptv':
info = self._extract_video_info('ap', video_id)
streams = data['streamUrls']
stream_type = data.get('streamType')
@@ -111,48 +154,53 @@ class VGTVIE(InfoExtractor):
hls_url = streams.get('hls')
if hls_url:
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, 'mp4', m3u8_id='hls'))
m3u8_formats = self._extract_m3u8_formats(
hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
hds_url = streams.get('hds')
# wasLive hds are always 404
if hds_url and stream_type != 'wasLive':
formats.extend(self._extract_f4m_formats(
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds'))
f4m_formats = self._extract_f4m_formats(
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
mp4_urls = streams.get('pseudostreaming') or []
mp4_url = streams.get('mp4')
if mp4_url:
_url = hls_url or hds_url
MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
for mp4_format in _url.split(','):
m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
if not m:
continue
width = int(m.group('width'))
height = int(m.group('height'))
vbr = int(m.group('vbr'))
formats.append({
'url': MP4_URL_TEMPLATE % mp4_format,
'format_id': 'mp4-%s' % vbr,
'width': width,
'height': height,
'vbr': vbr,
'preference': 1,
mp4_urls.append(mp4_url)
for mp4_url in mp4_urls:
format_info = {
'url': mp4_url,
}
mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url)
if mobj:
tbr = int(mobj.group(3))
format_info.update({
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
'tbr': tbr,
'format_id': 'mp4-%s' % tbr,
})
self._sort_formats(formats)
formats.append(format_info)
return {
info['formats'].extend(formats)
self._sort_formats(info['formats'])
info.update({
'id': video_id,
'title': self._live_title(data['title']),
'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
'description': data['description'],
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
'timestamp': data['published'],
'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'],
'formats': formats,
'is_live': True if stream_type == 'live' else False,
}
})
return info
class BTArticleIE(InfoExtractor):
@@ -161,7 +209,7 @@ class BTArticleIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
_TEST = {
'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
'md5': '2acbe8ad129b3469d5ae51b1158878df',
'info_dict': {
'id': '23199',
'ext': 'mp4',
@@ -178,15 +226,15 @@ class BTArticleIE(InfoExtractor):
def _real_extract(self, url):
webpage = self._download_webpage(url, self._match_id(url))
video_id = self._search_regex(
r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
return self.url_result('bttv:%s' % video_id, 'VGTV')
class BTVestlendingenIE(InfoExtractor):
IE_NAME = 'bt:vestlendingen'
IE_DESC = 'Bergens Tidende - Vestlendingen'
_VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
'info_dict': {
@@ -197,7 +245,19 @@ class BTVestlendingenIE(InfoExtractor):
'timestamp': 1430473209,
'upload_date': '20150501',
},
}
'skip': '404 Error',
}, {
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
'info_dict': {
'id': '86255',
'ext': 'mov',
'title': 'Du må tåle å fryse og være sulten',
'description': 'md5:b8046f4d022d5830ddab04865791d063',
'upload_date': '20150321',
'timestamp': 1426942023,
},
}]
def _real_extract(self, url):
return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')

View File

@@ -30,6 +30,12 @@ class VikiBaseIE(InfoExtractor):
_token = None
_ERRORS = {
'geo': 'Sorry, this content is not available in your region.',
'upcoming': 'Sorry, this content is not yet available.',
# 'paywall': 'paywall',
}
def _prepare_call(self, path, timestamp=None, post_data=None):
path += '?' if '?' not in path else '&'
if not timestamp:
@@ -67,6 +73,12 @@ class VikiBaseIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error),
expected=True)
def _check_errors(self, data):
for reason, status in data.get('blocking', {}).items():
if status and reason in self._ERRORS:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, self._ERRORS[reason]), expected=True)
def _real_initialize(self):
self._login()
@@ -193,6 +205,7 @@ class VikiIE(VikiBaseIE):
'timestamp': 1321985454,
'description': 'md5:44b1e46619df3a072294645c770cef36',
'title': 'Love In Magic',
'age_limit': 13,
},
}]
@@ -202,6 +215,8 @@ class VikiIE(VikiBaseIE):
video = self._call_api(
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
self._check_errors(video)
title = self.dict_selection(video.get('titles', {}), 'en')
if not title:
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
@@ -262,8 +277,11 @@ class VikiIE(VikiBaseIE):
r'^(\d+)[pP]$', format_id, 'height', default=None))
for protocol, format_dict in stream_dict.items():
if format_id == 'm3u8':
formats = self._extract_m3u8_formats(
format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
m3u8_formats = self._extract_m3u8_formats(
format_dict['url'], video_id, 'mp4', 'm3u8_native',
m3u8_id='m3u8-%s' % protocol, fatal=None)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
formats.append({
'url': format_dict['url'],
@@ -315,6 +333,8 @@ class VikiChannelIE(VikiBaseIE):
'containers/%s.json' % channel_id, channel_id,
'Downloading channel JSON')
self._check_errors(channel)
title = self.dict_selection(channel['titles'], 'en')
description = self.dict_selection(channel['descriptions'], 'en')

View File

@@ -23,6 +23,7 @@ from ..utils import (
unsmuggle_url,
urlencode_postdata,
unescapeHTML,
parse_filesize,
)
@@ -184,6 +185,20 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'user28849593',
},
},
{
# contains original format
'url': 'https://vimeo.com/33951933',
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
'info_dict': {
'id': '33951933',
'ext': 'mp4',
'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
'uploader': 'The DMCI',
'uploader_id': 'dmci',
'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
},
},
{
'url': 'https://vimeo.com/109815029',
'note': 'Video not completely processed, "failed" seed status',
@@ -392,6 +407,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
comment_count = None
formats = []
download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={
'X-Requested-With': 'XMLHttpRequest'})
download_data = self._download_json(download_request, video_id, fatal=False)
if download_data:
source_file = download_data.get('source_file')
if source_file and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
formats.append({
'url': source_file['download_url'],
'ext': source_file['extension'].lower(),
'width': int_or_none(source_file.get('width')),
'height': int_or_none(source_file.get('height')),
'filesize': parse_filesize(source_file.get('size')),
'format_id': source_file.get('public_name', 'Original'),
'preference': 1,
})
config_files = config['video'].get('files') or config['request'].get('files', {})
for f in config_files.get('progressive', []):
video_url = f.get('url')
@@ -408,12 +438,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
m3u8_url = config_files.get('hls', {}).get('url')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False)
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified. This lead to wrong sorting.
self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id'))
self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
subtitles = {}
text_tracks = config['request'].get('text_tracks')

View File

@@ -10,8 +10,8 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
determine_ext,
unified_strdate,
qualities,
)
@@ -33,6 +33,7 @@ class WDRIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'Page Not Found',
},
{
'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
@@ -47,6 +48,7 @@ class WDRIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'Page Not Found',
},
{
'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
@@ -71,6 +73,7 @@ class WDRIE(InfoExtractor):
'upload_date': '20140717',
'is_live': False
},
'skip': 'Page Not Found',
},
{
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
@@ -83,10 +86,10 @@ class WDRIE(InfoExtractor):
'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
'info_dict': {
'id': 'mdb-103364',
'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
'ext': 'flv',
'upload_date': '20150212',
'upload_date': '20150101',
'is_live': True
},
'params': {
@@ -150,25 +153,52 @@ class WDRIE(InfoExtractor):
if upload_date:
upload_date = unified_strdate(upload_date)
formats = []
preference = qualities(['S', 'M', 'L', 'XL'])
if video_url.endswith('.f4m'):
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
ext = 'flv'
f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif video_url.endswith('.smil'):
fmt = self._extract_smil_formats(video_url, page_id)[0]
video_url = fmt['url']
sep = '&' if '?' in video_url else '?'
video_url += sep
video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
ext = fmt['ext']
smil_formats = self._extract_smil_formats(video_url, page_id, False, {
'hdcore': '3.3.0',
'plugin': 'aasp-3.3.0.99.43',
})
if smil_formats:
formats.extend(smil_formats)
else:
ext = determine_ext(video_url)
formats.append({
'url': video_url,
'http_headers': {
'User-Agent': 'mobile',
},
})
m3u8_url = self._search_regex(r'rel="adaptiv"[^>]+href="([^"]+)"', webpage, 'm3u8 url', default=None)
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage)
if direct_urls:
for quality, video_url in direct_urls:
formats.append({
'url': video_url,
'preference': preference(quality),
'http_headers': {
'User-Agent': 'mobile',
},
})
self._sort_formats(formats)
description = self._html_search_meta('Description', webpage, 'description')
return {
'id': page_id,
'url': video_url,
'ext': ext,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,

View File

@@ -42,11 +42,7 @@ class XstreamIE(InfoExtractor):
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
def _extract_video_info(self, partner_id, video_id):
data = self._download_xml(
'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
% (partner_id, video_id),
@@ -97,6 +93,7 @@ class XstreamIE(InfoExtractor):
formats.append({
'url': link.get('href'),
'format_id': link.get('rel'),
'preference': 1,
})
thumbnails = [{
@@ -113,3 +110,10 @@ class XstreamIE(InfoExtractor):
'formats': formats,
'thumbnails': thumbnails,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
return self._extract_video_info(partner_id, video_id)

View File

@@ -25,8 +25,8 @@ class YoukuIE(InfoExtractor):
'''
_TESTS = [{
# MD5 is unstable
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
'md5': '5f3af4192eabacc4501508d54a8cabd7',
'info_dict': {
'id': 'XMTc1ODE5Njcy_part1',
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
@@ -42,6 +42,7 @@ class YoukuIE(InfoExtractor):
'title': '武媚娘传奇 85',
},
'playlist_count': 11,
'skip': 'Available in China only',
}, {
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
'info_dict': {
@@ -49,7 +50,6 @@ class YoukuIE(InfoExtractor):
'title': '花千骨 04',
},
'playlist_count': 13,
'skip': 'Available in China only',
}, {
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
'note': 'Video protected with password',
@@ -63,7 +63,7 @@ class YoukuIE(InfoExtractor):
},
}]
def construct_video_urls(self, data1, data2):
def construct_video_urls(self, data):
# get sid, token
def yk_t(s1, s2):
ls = list(range(256))
@@ -81,34 +81,24 @@ class YoukuIE(InfoExtractor):
return bytes(s)
sid, token = yk_t(
b'becaf9be', base64.b64decode(data2['ep'].encode('ascii'))
b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
).decode('ascii').split('_')
# get oip
oip = data2['ip']
# get fileid
string_ls = list(
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
shuffled_string_ls = []
seed = data1['seed']
N = len(string_ls)
for ii in range(N):
seed = (seed * 0xd3 + 0x754f) % 0x10000
idx = seed * len(string_ls) // 0x10000
shuffled_string_ls.append(string_ls[idx])
del string_ls[idx]
oip = data['security']['ip']
fileid_dict = {}
for format in data1['streamtypes']:
streamfileid = [
int(i) for i in data1['streamfileids'][format].strip('*').split('*')]
fileid = ''.join(
[shuffled_string_ls[i] for i in streamfileid])
fileid_dict[format] = fileid[:8] + '%s' + fileid[10:]
for stream in data['stream']:
format = stream.get('stream_type')
fileid = stream['stream_fileid']
fileid_dict[format] = fileid
def get_fileid(format, n):
fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2)
number = hex(int(str(n), 10))[2:].upper()
if len(number) == 1:
number = '0' + number
streamfileids = fileid_dict[format]
fileid = streamfileids[0:8] + number + streamfileids[10:]
return fileid
# get ep
@@ -123,15 +113,15 @@ class YoukuIE(InfoExtractor):
# generate video_urls
video_urls_dict = {}
for format in data1['streamtypes']:
for stream in data['stream']:
format = stream.get('stream_type')
video_urls = []
for dt in data1['segs'][format]:
n = str(int(dt['no']))
for dt in stream['segs']:
n = str(stream['segs'].index(dt))
param = {
'K': dt['k'],
'K': dt['key'],
'hd': self.get_hd(format),
'myp': 0,
'ts': dt['seconds'],
'ypp': 0,
'ctype': 12,
'ev': 1,
@@ -142,7 +132,7 @@ class YoukuIE(InfoExtractor):
video_url = \
'http://k.youku.com/player/getFlvPath/' + \
'sid/' + sid + \
'_' + str(int(n) + 1).zfill(2) + \
'_00' + \
'/st/' + self.parse_ext_l(format) + \
'/fileid/' + get_fileid(format, n) + '?' + \
compat_urllib_parse.urlencode(param)
@@ -153,23 +143,31 @@ class YoukuIE(InfoExtractor):
def get_hd(self, fm):
hd_id_dict = {
'3gp': '0',
'3gphd': '1',
'flv': '0',
'flvhd': '0',
'mp4': '1',
'mp4hd': '1',
'mp4hd2': '1',
'mp4hd3': '1',
'hd2': '2',
'hd3': '3',
'3gp': '0',
'3gphd': '1'
}
return hd_id_dict[fm]
def parse_ext_l(self, fm):
ext_dict = {
'3gp': 'flv',
'3gphd': 'mp4',
'flv': 'flv',
'flvhd': 'flv',
'mp4': 'mp4',
'mp4hd': 'mp4',
'mp4hd2': 'flv',
'mp4hd3': 'flv',
'hd2': 'flv',
'hd3': 'flv',
'3gp': 'flv',
'3gphd': 'mp4'
}
return ext_dict[fm]
@@ -178,9 +176,13 @@ class YoukuIE(InfoExtractor):
'3gp': 'h6',
'3gphd': 'h5',
'flv': 'h4',
'flvhd': 'h4',
'mp4': 'h3',
'mp4hd': 'h3',
'mp4hd2': 'h4',
'mp4hd3': 'h4',
'hd2': 'h2',
'hd3': 'h1'
'hd3': 'h1',
}
return _dict[fm]
@@ -188,45 +190,46 @@ class YoukuIE(InfoExtractor):
video_id = self._match_id(url)
def retrieve_data(req_url, note):
req = sanitized_Request(req_url)
headers = {
'Referer': req_url,
}
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
req = sanitized_Request(req_url, headers=headers)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
raw_data = self._download_json(req, video_id, note=note)
return raw_data['data'][0]
return raw_data['data']
video_password = self._downloader.params.get('videopassword', None)
# request basic data
basic_data_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id
if video_password:
basic_data_url += '?password=%s' % video_password
basic_data_url += '&pwd=%s' % video_password
data1 = retrieve_data(
basic_data_url,
'Downloading JSON metadata 1')
data2 = retrieve_data(
'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
'Downloading JSON metadata 2')
data = retrieve_data(basic_data_url, 'Downloading JSON metadata')
error_code = data1.get('error_code')
if error_code:
error = data1.get('error')
if error is not None and '因版权原因无法观看此视频' in error:
error = data.get('error')
if error:
error_note = error.get('note')
if error_note is not None and '因版权原因无法观看此视频' in error_note:
raise ExtractorError(
'Youku said: Sorry, this video is available in China only', expected=True)
else:
msg = 'Youku server reported error %i' % error_code
if error is not None:
msg += ': ' + error
msg = 'Youku server reported error %i' % error.get('code')
if error_note is not None:
msg += ': ' + error_note
raise ExtractorError(msg)
title = data1['title']
# get video title
title = data['video']['title']
# generate video_urls_dict
video_urls_dict = self.construct_video_urls(data1, data2)
video_urls_dict = self.construct_video_urls(data)
# construct info
entries = [{
@@ -235,10 +238,11 @@ class YoukuIE(InfoExtractor):
'formats': [],
# some formats are not available for all parts, we have to detect
# which one has all
} for i in range(max(len(v) for v in data1['segs'].values()))]
for fm in data1['streamtypes']:
} for i in range(max(len(v.get('segs')) for v in data['stream']))]
for stream in data['stream']:
fm = stream.get('stream_type')
video_urls = video_urls_dict[fm]
for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries):
for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
entry['formats'].append({
'url': video_url,
'format_id': self.get_format_name(fm),

View File

@@ -26,6 +26,7 @@ from ..compat import (
from ..utils import (
clean_html,
encode_dict,
error_to_compat_str,
ExtractorError,
float_or_none,
get_element_by_attribute,
@@ -33,6 +34,7 @@ from ..utils import (
int_or_none,
orderedSet,
parse_duration,
remove_quotes,
remove_start,
sanitized_Request,
smuggle_url,
@@ -395,12 +397,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
'alt_title': 'I Love It (feat. Charli XCX)',
'description': 'md5:782e8651347686cba06e58f71ab51773',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'],
'uploader': 'Icona Pop',
'uploader_id': 'IconaPop',
'creator': 'Icona Pop',
}
},
{
@@ -411,9 +415,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20130703',
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
'alt_title': 'Tunnel Vision',
'description': 'md5:64249768eec3bc4276236606ea996373',
'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO',
'creator': 'Justin Timberlake',
'age_limit': 18,
}
},
@@ -492,10 +498,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'nfWlot6h_JM',
'ext': 'm4a',
'title': 'Taylor Swift - Shake It Off',
'alt_title': 'Shake It Off',
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818',
'creator': 'Taylor Swift',
},
'params': {
'youtube_include_dash_manifest': True,
@@ -551,9 +559,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'upload_date': '20100430',
'uploader_id': 'deadmau5',
'creator': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)',
'alt_title': 'Some Chords',
},
'expected_warnings': [
'DASH manifest missing',
@@ -701,10 +711,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'lsguqyKfVQg',
'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
'alt_title': 'Dark Walk',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
'uploader': 'IronSoulElf',
'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
},
'params': {
'skip_download': True,
@@ -892,7 +904,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
video_id, note=False)
except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
return {}
sub_lang_list = {}
@@ -1308,6 +1320,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date)
m_music = re.search(
r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
video_webpage)
if m_music:
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
video_creator = clean_html(m_music.group('creator'))
else:
video_alt_title = video_creator = None
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None)
@@ -1537,7 +1558,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
'creator': video_creator,
'title': video_title,
'alt_title': video_alt_title,
'thumbnail': video_thumbnail,
'description': video_description,
'categories': video_categories,
@@ -1752,6 +1775,10 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
},
}]
@classmethod
def suitable(cls, url):
return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
def _real_extract(self, url):
channel_id = self._match_id(url)
@@ -1825,10 +1852,10 @@ class YoutubeUserIE(YoutubeChannelIE):
return super(YoutubeUserIE, cls).suitable(url)
class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
IE_DESC = 'YouTube.com user playlists'
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists'
IE_NAME = 'youtube:user:playlists'
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
IE_DESC = 'YouTube.com user/channel playlists'
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
IE_NAME = 'youtube:playlists'
_TESTS = [{
'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
@@ -1845,6 +1872,13 @@ class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
'id': 'igorkle1',
'title': 'Игорь Клейнер',
},
}, {
'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
'playlist_mincount': 17,
'info_dict': {
'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
'title': 'Chem Player',
},
}]

View File

@@ -232,7 +232,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
func_m = re.search(
r'''(?x)
(?:function\s+%s|[{;]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
(?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
\((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname), re.escape(funcname)),

View File

@@ -52,7 +52,7 @@ class FFmpegPostProcessor(PostProcessor):
def _determine_executables(self):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
prefer_ffmpeg = False
self.basename = None
self.probe_basename = None
@@ -60,6 +60,7 @@ class FFmpegPostProcessor(PostProcessor):
self._paths = None
self._versions = None
if self._downloader:
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
location = self._downloader.params.get('ffmpeg_location')
if location is not None:
if not os.path.exists(location):

View File

@@ -9,7 +9,7 @@ import subprocess
import sys
from zipimport import zipimporter
from .compat import compat_str
from .utils import encode_compat_str
from .version import __version__
@@ -61,7 +61,7 @@ def update_self(to_screen, verbose, opener):
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
return
if newversion == __version__:
@@ -74,7 +74,7 @@ def update_self(to_screen, verbose, opener):
versions_info = json.loads(versions_info)
except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
return
if 'signature' not in versions_info:
@@ -123,7 +123,7 @@ def update_self(to_screen, verbose, opener):
urlh.close()
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
return
@@ -137,7 +137,7 @@ def update_self(to_screen, verbose, opener):
outf.write(newcontent)
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to write the new version')
return
@@ -157,7 +157,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
return # Do not show premature success messages
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to overwrite current version')
return
@@ -169,7 +169,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
urlh.close()
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to download latest version')
return
@@ -183,7 +183,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
outf.write(newcontent)
except (IOError, OSError):
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(encode_compat_str(traceback.format_exc()))
to_screen('ERROR: unable to overwrite current version')
return

View File

@@ -1406,6 +1406,15 @@ def remove_end(s, end):
return s
def remove_quotes(s):
if s is None or len(s) < 2:
return s
for quote in ('"', "'", ):
if s[0] == quote and s[-1] == quote:
return s[1:-1]
return s
def url_basename(url):
path = compat_urlparse.urlparse(url).path
return path.strip('/').split('/')[-1]
@@ -1703,6 +1712,10 @@ def encode_dict(d, encoding='utf-8'):
return dict((encode(k), encode(v)) for k, v in d.items())
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
US_RATINGS = {
'G': 0,
'PG': 10,
@@ -1797,6 +1810,15 @@ def args_to_str(args):
return ' '.join(shlex_quote(a) for a in args)
def error_to_compat_str(err):
err_str = str(err)
# On python 2 error byte string must be decoded with proper
# encoding rather than ascii
if sys.version_info[0] < 3:
err_str = err_str.decode(preferredencoding())
return err_str
def mimetype2ext(mt):
_, _, res = mt.rpartition('/')
@@ -1967,15 +1989,15 @@ def match_filter_func(filter_str):
def parse_dfxp_time_expr(time_expr):
if not time_expr:
return 0.0
return
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
if mobj:
return float(mobj.group('time_offset'))
mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
if mobj:
return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
def srt_subtitles_timecode(seconds):
@@ -2011,10 +2033,15 @@ def dfxp2srt(dfxp_data):
raise ValueError('Invalid dfxp/TTML subtitle')
for para, index in zip(paras, itertools.count(1)):
begin_time = parse_dfxp_time_expr(para.attrib['begin'])
begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
dur = parse_dfxp_time_expr(para.attrib.get('dur'))
if begin_time is None:
continue
if not end_time:
end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
if not dur:
continue
end_time = begin_time + dur
out.append('%d\n%s --> %s\n%s\n\n' % (
index,
srt_subtitles_timecode(begin_time),

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.12.10'
__version__ = '2015.12.23'