Compare commits

...

213 Commits

Author SHA1 Message Date
Philipp Hagemeister
d85187eb74 release 2015.09.09 2015-09-09 21:20:01 +02:00
Sergey M․
cc1ac11017 [vier:videos] Fix extraction with old approach (Closes #6806) 2015-09-09 23:59:17 +06:00
Sergey M․
73f536439e Credit @reddraggone9 for fixing youtube 2-factor authentication (#6553) 2015-09-09 23:49:36 +06:00
Sergey M․
b17e7d9a9b Credit @anssih for missing metadata extraction on youtube (#5993) 2015-09-09 23:44:49 +06:00
Sergey M․
2f29b758e0 Credit @lalinsky for vidme improvement (#6812) 2015-09-09 23:41:44 +06:00
Sergey M․
482aa3fecc [vidme] Simplify, make more robust, extract more metadata and capture errors (Closes #6812) 2015-09-09 22:04:05 +06:00
Lukáš Lalinský
d9c19db340 Extract all vid.me formats 2015-09-09 22:03:53 +06:00
Sergey M.
6c4d243de5 Merge pull request #6804 from remitamine/xuite-tests
[xuite] fix test md5
2015-09-09 20:48:52 +06:00
Sergey M.
d1561ef777 Merge pull request #6805 from remitamine/washingtonpost-tests
[washingtonpost] fix test info
2015-09-09 20:48:01 +06:00
remitamine
1072336249 [washingtonpost] fix test info 2015-09-08 20:40:23 +01:00
remitamine
75bb5c7028 [xuite] fix test md5 2015-09-08 20:12:30 +01:00
Sergey M․
47004d9579 [crunchyroll] Fix uploader and upload date extraction 2015-09-08 14:37:53 +06:00
Sergey M․
12810c9cd3 [crunchyroll] Force Accept-Language to any for all requests (Closes #6797) 2015-09-08 14:11:20 +06:00
Sergey M․
7a459170fa [__init__] Remove musicvault 2015-09-08 05:40:00 +06:00
Sergey M.
3cf0df568a Merge pull request #6791 from remitamine/musicvault
[musicvault] remove extractor
2015-09-08 05:37:24 +06:00
remitamine
b88ebd472e [musicvault] remove extractor 2015-09-07 21:25:32 +01:00
Sergey M.
3ecc527209 Merge pull request #6786 from remitamine/wimp-tests
[wimp] fix tests info
2015-09-07 20:39:24 +06:00
remitamine
b1b7d1ffba [wimp] fix tests info 2015-09-07 12:49:59 +01:00
Jaime Marquínez Ferrándiz
4003bd82b0 [youtube:shows] Use https 2015-09-07 12:56:16 +02:00
remitamine
8801255d7d [youtube] fix show extraction (fixes #4841)
Closes #6782)
2015-09-07 12:53:52 +02:00
remitamine
c67a055d16 [test/test_write_annotations] fix test filename
Closes #6781
2015-09-07 11:18:55 +02:00
Yen Chi Hsuan
bc973e06d0 Merge pull request #6783 from remitamine/yahoo-tests
[yahoo] change test md5 checksum
2015-09-07 16:24:08 +08:00
Yen Chi Hsuan
aeb3c8a0e8 Merge pull request #6784 from remitamine/aljazeera
[ajazzera] skip test
2015-09-07 16:14:35 +08:00
remitamine
cf33a47df0 [ajazzera] skip test 2015-09-07 08:22:38 +01:00
remitamine
daeb0f04cd [yahoo] change test md5 checksum 2015-09-07 08:09:34 +01:00
Sergey M․
97243fe395 [ceskatelevize] Extract playlists (Closes #6772) 2015-09-07 04:52:26 +06:00
Sergey M․
9dbdb65abe [techtvmit] Update test 2015-09-06 10:32:42 +06:00
Sergey M․
9af461de35 [techtvmit] Fix extraction 2015-09-06 10:28:40 +06:00
Sergey M.
4d71e200c6 Merge pull request #6774 from remitamine/academicearth
[academicearth] fix test info
2015-09-06 10:22:08 +06:00
Sergey M.
8e0bdabed2 Merge pull request #6773 from remitamine/airmozilla
[airmozilla] fix test info
2015-09-06 10:21:47 +06:00
Sergey M․
bca553caac [eagleplatform] Fix extraction 2015-09-06 10:21:15 +06:00
remitamine
a2f42a3baf [academicearth] fix test info 2015-09-06 04:52:27 +01:00
remitamine
7465222a9c [airmozilla] fix test info 2015-09-06 04:45:13 +01:00
Sergey M․
e28034c5ac [utils] Comment cookie processing until result from travis and some more testing 2015-09-06 08:16:39 +06:00
Sergey M․
12bbd32ad0 [kuwo:singer] Skip test 2015-09-06 08:00:16 +06:00
Sergey M․
266e466ee4 [utils] Simplify cookie processor 2015-09-06 07:53:11 +06:00
Sergey M․
cf83f532ae [niconico] Use encode_dict 2015-09-06 07:43:05 +06:00
Sergey M․
cd019668dc [niconico] Use encode_dict 2015-09-06 07:42:45 +06:00
Sergey M․
515fc8776f [youtube] Use encode_dict 2015-09-06 07:42:15 +06:00
Sergey M․
c7c0996d8c [gorillavid] Add test for filehoot.com (Closes #6771) 2015-09-06 07:32:06 +06:00
Sergey M․
b3e64671cc [gorillavid] Improve title 2015-09-06 07:30:16 +06:00
Sergey M․
4abe214499 [gorillavid] Encode fields 2015-09-06 07:30:02 +06:00
Sergey M․
e94cb5ae7e [gorillavid] Add support for filehoot.com 2015-09-06 07:24:47 +06:00
Sergey M․
e213c98df1 [gorillavid] Build correct URL 2015-09-06 07:23:53 +06:00
Sergey M․
1639282434 [utils] Add encode_dict 2015-09-06 07:22:20 +06:00
Sergey M․
be0e5dbd83 [extractor/common] Extract submit inputs 2015-09-06 07:20:47 +06:00
Sergey M․
ad72917274 [utils] Add issue URL in comment for #6457 2015-09-06 06:23:44 +06:00
Sergey M․
6a3f4c3f82 [YoutubeDL] Use custom cookie processor 2015-09-06 06:21:33 +06:00
Sergey M․
a6420bf50c [utils] Add cookie processor for cookie correction (Closes #6769) 2015-09-06 06:20:48 +06:00
Sergey M․
eb387896e9 [adultswim] Fix extraction on python 2.6 2015-09-06 02:24:14 +06:00
Sergey M․
8df5ae15d1 [compat] Fix python version check for compat_shlex_split 2015-09-05 22:21:06 +06:00
felix
12439dd5ec [generic] Capture <audio> tags in addition to <video> tags 2015-09-05 21:53:10 +06:00
Sergey M․
3513d41436 [test_compat] Fix typo 2015-09-05 21:45:52 +06:00
Sergey M․
cab792abe5 [options] Use compat_shlex_split 2015-09-05 21:42:41 +06:00
Sergey M․
8870358b1b [__init__] Use compat_shlex_split 2015-09-05 21:42:22 +06:00
Sergey M․
ee087c79ad [test_compat] Add test for compat_shlex_split 2015-09-05 21:41:34 +06:00
Sergey M․
51f579b635 [compat] Add compat shlex.split 2015-09-05 21:40:44 +06:00
Sergey M․
4abf617b9c [YoutubeDL] Ensure bool params always present in downloader 2015-09-05 19:17:30 +06:00
Jaime Marquínez Ferrándiz
3026164b16 [postprocessor/common] Use 'self._downloader.params' instead of 'self.params'
'self.params' is not defined
2015-09-05 12:11:36 +02:00
Yen Chi Hsuan
9dd73ef4a4 [dcn] PEP8 2015-09-05 15:38:05 +08:00
Yen Chi Hsuan
75c72a1e67 Merge branch 'ping-vlive' 2015-09-05 15:35:19 +08:00
Yen Chi Hsuan
08354db47b [vlive] PEP8 2015-09-05 15:25:19 +08:00
Yen Chi Hsuan
027eb5a6b0 Merge branch 'vlive' of https://github.com/ping/youtube-dl into ping-vlive 2015-09-05 15:24:09 +08:00
Sergey M․
f71264490c [test_utils] Add tests for cli option converters 2015-09-05 03:07:19 +06:00
Sergey M․
6270239a6d [postprocessor/common] Use generalized cli option converters 2015-09-05 03:06:53 +06:00
Sergey M․
1195a38f46 [downloader/external] Use generalized cli option converters 2015-09-05 03:06:28 +06:00
Sergey M․
66e289bab4 [utils] Generalize cli option converters 2015-09-05 03:05:11 +06:00
Sergey M.
52c6f26cab Merge pull request #6755 from remitamine/external_downloader_options
[downloader/external] Respect --no-check-certificate for curl and aria2c and --proxy for curl
2015-09-05 02:19:41 +06:00
remitamine
dc534b674f [downloader/external] change _argless_option function to _valueless_option 2015-09-04 21:12:13 +01:00
remitamine
f30c2e8e98 [downloader/external] add _argless_option for option without arguments 2015-09-04 20:57:19 +01:00
Jaime Marquínez Ferrándiz
c482b3c69a [ruutu] Update test url 2015-09-04 21:11:29 +02:00
remitamine
266b0ad676 [downloader/external] add _bool_option to pass value to bool option 2015-09-04 20:07:36 +01:00
Sergey M․
87f70ab39d [test_utils] Add more tests for xpath 2015-09-05 00:36:16 +06:00
Sergey M․
8e636da499 [utils] Improve xpath_text 2015-09-05 00:34:49 +06:00
Sergey M․
22889ab175 [ruutu] Fix extraction (Closes #6759) 2015-09-05 00:01:40 +06:00
Sergey M․
5d2354f177 [utils] Relax attribute key assert 2015-09-04 23:57:27 +06:00
Sergey M․
a41fb80ce1 [utils] Add xpath_element and xpath_attr 2015-09-04 23:56:45 +06:00
Sergey M.
2e2575e213 Merge pull request #6760 from ferama/master
Added ec extension to nowvideo extractor
2015-09-04 21:49:25 +06:00
Marco Ferragina
26c61e0809 Added ec extension to nowvideo extractor 2015-09-04 12:59:13 +02:00
remitamine
e7a8c3032d [downloader/external] Respect --no-check-certificate for curl and aria2c and --proxy for curl 2015-09-03 22:25:33 +01:00
Jaime Marquínez Ferrándiz
725d1c58aa [crunchyroll] Extract width and height (closes #6749) 2015-09-03 13:15:02 +02:00
Philipp Hagemeister
bd6742137f release 2015.09.03 2015-09-03 12:34:20 +02:00
Sergey M․
e8dcfa3d69 [ok] Sort formats 2015-09-02 22:09:33 +06:00
Sergey M․
88720ed09b [ok] Add support for youtube embeds 2015-09-02 22:08:50 +06:00
Sergey M․
1e804244d0 [ok] Improve player regex 2015-09-02 21:38:56 +06:00
Sergey M․
198492bbf0 [ok] Extract correct data-options (Closes #6726) 2015-09-02 21:36:22 +06:00
Sergey M.
8f9d522f62 [README.md] Remove superfluous word 2015-09-02 21:09:31 +06:00
Sergey M.
cbae233aba Merge pull request #6736 from remitamine/readme
[README] link to developer instructions and correct common.py path
2015-09-02 21:08:22 +06:00
Sergey M.
b17ca9c945 Merge pull request #6731 from dstftw/strict-http-response
[utils] Strict HTTP responses (Closes #6727)
2015-09-02 20:22:59 +06:00
remitamine
ebf4ca39ba link to developer instructions and correct common.py path 2015-09-02 01:24:59 +01:00
Sergey M․
e5e78797e6 [utils] Strict HTTP responses (Closes #6727) 2015-09-02 02:16:04 +06:00
Jaime Marquínez Ferrándiz
080997b808 [rtve] Fix extraction (fixes #6723)
Adding the 'Referer' header to the png request is enough.
2015-09-01 20:30:58 +02:00
Sergey M․
77306e8b97 [francetv] Update tests (2) 2015-09-01 22:42:43 +06:00
Sergey M․
6917d2a2f0 [francetv] Update tests 2015-09-01 22:39:26 +06:00
Sergey M․
36c15522c1 [francetv] Use subtitle when present (Closes #6715) 2015-09-01 22:37:42 +06:00
Sergey M․
804c343a4f [options] Clarify extractors supporting videopassword 2015-09-01 22:30:28 +06:00
Sergey M․
cd5d75427e [youku] Fix videopassword test 2015-09-01 22:28:03 +06:00
Sergey M․
5ddc127da6 [youku] Simplify (Closes #6717) 2015-09-01 22:26:17 +06:00
PSJay
f859695b49 Update README
Add Youku in video password description.
2015-09-01 22:25:10 +06:00
PSJay
cb3d2eb9e9 Fix coding styple in YouKuIE. 2015-09-01 22:25:03 +06:00
PSJay
33eae08f04 Support video-password on Youku. 2015-09-01 22:24:57 +06:00
Jaime Marquínez Ferrándiz
aa3f98677d [dumpert] Support 'embed/' urls (fixes #6712) 2015-09-01 18:13:33 +02:00
Sergey M․
fffccaaf41 [globo] Fix extraction and make more robust (Closes #6728) 2015-09-01 20:41:52 +06:00
Yen Chi Hsuan
cdc8d0c373 [odnoklassniki] Fix extraction and extend _VALID_URL (closes #6726) 2015-09-01 16:34:05 +08:00
ping
d14f0c45fc [vlive] Add info about www. to m. redirects for mobile 2015-09-01 11:11:14 +08:00
Sergey M․
39955b0451 [fc2] Fix refer 2015-08-31 21:12:21 +06:00
Sergey M․
52dfb7ffe2 [fc2] Add test for #3171 2015-08-31 21:05:37 +06:00
Sergey M․
93462856e1 [fc2] Relax _VALID_URL (Closes #3171) 2015-08-31 21:05:23 +06:00
ping
615f155a3a [vlive] Fixes for review 2015-08-31 12:46:28 +08:00
Sergey M․
fcd9e423ec [hls] Properly encode fragment filename 2015-08-31 02:28:36 +06:00
Sergey M․
db8f2bfd99 [f4m] Add missing imports 2015-08-31 02:20:29 +06:00
Sergey M.
55801fc76e Merge pull request #5588 from aajanki/encode_frag_filenames
[f4m] Fix encode error by sanitizing fragment filenames
2015-08-31 02:18:15 +06:00
Sergey M․
d3d89c3256 [YoutubeDL] Encode thumbnail filename (Closes #6714) 2015-08-31 02:01:13 +06:00
Sergey M․
8875b3d572 [imgut:album] Add extractor (Closes #6711) 2015-08-30 19:03:21 +06:00
Sergey M․
aabc2be693 [youtube] Simplify and extract more metadata from url_encoded_fmt_stream_map (Closes #5993) 2015-08-30 08:07:07 +06:00
Anssi Hannula
c9afb51cea [youtube] Fix missing format details for 60fps DASH formats
60fps DASH formats do not appear in the DASH manifest, but the non-DASH
video info page does contain additional parameters for DASH formats that
we can parse.

Use those when they exist.

This fixes "bestvideo" not selecting 60fps formats over similar 30fps
formats just because the file size is unknown.
2015-08-30 08:05:16 +06:00
Yen Chi Hsuan
c0a656876c [abc] Use int_or_none 2015-08-30 01:11:56 +08:00
Yen Chi Hsuan
17a647630b [abc] Support YouTube embeds (fixes #6595) 2015-08-30 01:08:55 +08:00
Sergey M․
c88e118b3c [nowtv] Fix display_id (Closes #6702) 2015-08-29 22:41:49 +06:00
Sergey M․
ae6a802106 [kaltura] Fix _VALID_URL formatting 2015-08-29 20:26:51 +06:00
Sergey M․
b184f94413 [kaltura] Clarify html5 player comment 2015-08-29 20:23:06 +06:00
Sergey M․
ee3ec091f4 [kaltura] Strictify _VALID_URL 2015-08-29 20:21:59 +06:00
sourcerect
ef49b59053 [kaltura] add html5 player urls 2015-08-29 19:29:07 +06:00
Sergey M․
1f8125805e [generic] Add test for screenwavemedia embed 2015-08-29 19:12:38 +06:00
Sergey M․
efd712c69b [generic] Use screenwavemedia embed pattern 2015-08-29 19:07:31 +06:00
Sergey M․
109a4156e1 [cinemassacre] Use screenwavemedia embed pattern 2015-08-29 19:06:55 +06:00
Sergey M․
678d33295b [screenwavemedia] Improve _VALID_URL 2015-08-29 19:05:11 +06:00
Sergey M․
5e58956d0a [screenwavemedia] Add EMBED_PATTERN 2015-08-29 19:00:38 +06:00
felix
e276fd2cb3 [screenwavemedia] warn when falling back to a hardcoded list of streams 2015-08-29 18:38:41 +06:00
felix
9b22cb10c4 [screenwavemedia] use the true JS value instead of a comment 2015-08-29 18:38:34 +06:00
felix
8ca31a0e05 [generic] Look for ScreenwaveMedia embeds 2015-08-29 18:38:23 +06:00
Sergey M․
20149a5da1 [mtvservices] Fix title extraction under python 2 2015-08-28 22:10:49 +06:00
Sergey M․
054d43bb11 [mtvservices] Catch more georestriction errors 2015-08-28 21:45:29 +06:00
Sergey M․
65488b820c [mtvde] Add support for news 2015-08-28 21:41:54 +06:00
Sergey M․
c3c9f87954 [mtvde] Add test for mediagen URL without query 2015-08-28 21:31:38 +06:00
Sergey M․
56f447be9f [mtvservices] Fix mediagen URL generation 2015-08-28 21:27:36 +06:00
Sergey M․
79fa9db0da [mtvde] Simplify (Closes #6673) 2015-08-28 21:24:54 +06:00
Paul Hartmann
071c10137b [MTV] move German mtv site to new class 2015-08-28 21:23:00 +06:00
Philipp Hagemeister
a4962b80d6 release 2015.08.28 2015-08-28 05:04:39 +02:00
Sergey M․
5307c33232 [foxnews] Clarify IE_DESC 2015-08-27 21:48:47 +06:00
Sergey M․
1b660cce12 [foxnews] Simplify (Closes #6694) 2015-08-27 21:48:03 +06:00
nmrugg
8df8c278b6 Added matching test for FoxBusiness. 2015-08-27 21:41:16 +06:00
nmrugg
d7e8264517 Make FoxBusiness work. 2015-08-27 21:41:08 +06:00
Sergey M․
f11c316347 [shared] Add filesize to test 2015-08-26 22:06:10 +06:00
Sergey M․
f62e02c24f [shared] Add test for vivo 2015-08-26 22:05:45 +06:00
Sergey M․
70113c38c9 [shared] Clarify IE_DESC 2015-08-26 22:04:39 +06:00
Sergey M․
3d8132f5e2 [shared] Extend _VALID_URL to support vivo.sx (Closes #6681) 2015-08-26 22:03:58 +06:00
Sergey M․
39affb5aa4 [crunchyroll] Fix typo 2015-08-26 21:27:57 +06:00
Sergey M․
a882c5f474 [udemy] Use raise_login_required 2015-08-26 21:27:07 +06:00
Sergey M․
61a7ff1622 [tubitv] Use raise_login_required 2015-08-26 21:26:47 +06:00
Sergey M․
42e7373bd3 [smotri] Use raise_login_required 2015-08-26 21:26:35 +06:00
Sergey M․
e269d3ae7d [safari] Use raise_login_required 2015-08-26 21:26:24 +06:00
Sergey M․
e7ddaef5bd [pluralsight] Use raise_login_required 2015-08-26 21:26:09 +06:00
Sergey M․
62984e4584 [lynda] Use raise_login_required 2015-08-26 21:25:53 +06:00
Sergey M․
3c53455d15 [eroprofile] Use raise_login_required 2015-08-26 21:25:37 +06:00
Sergey M․
bbb43a39fd [crunchyroll] Use raise_login_required 2015-08-26 21:25:04 +06:00
Sergey M․
43e7d3c945 [extractor/common] Add raise_login_required 2015-08-26 21:24:47 +06:00
Sergey M․
2f72e83bbd [crunchyroll] Detect required login (#6677) 2015-08-26 20:47:57 +06:00
Sergey M.
57179b4ca1 Merge pull request #6679 from jaimeMF/youtube-fix-player-version-regex
[youtube] Adapt player version regex to handle urls ending in '/html5player-new.js'
2015-08-26 20:17:52 +06:00
Jaime Marquínez Ferrándiz
4bc8eec4eb [youtube] Adapt player version regex to handle urls ending in '/html5player-new.js'
It was always extracting 'new' as the version, breaking the cache system.
2015-08-26 15:21:55 +02:00
Sergey M․
baf510bf8c [yandexmusic:playlist] Handle playlists with more than 150 tracks (Closes #6666) 2015-08-26 00:11:15 +06:00
Sergey M․
6d53cdd6ce [yandexmusic] Skip removed tracks (#6666) 2015-08-25 23:29:02 +06:00
Sergey M․
ebbf078c7d [krasview] Skip download for test 2015-08-25 21:19:21 +06:00
Sergey M․
95e431e9ec [mailru] Skip tests 2015-08-25 21:08:38 +06:00
ping
eba470f2f2 [vlive] Remove upload_date extraction & cleanup 2015-08-24 16:30:00 +08:00
Philipp Hagemeister
11addc50ff release 2015.08.23 2015-08-23 23:52:47 +02:00
Sergey M․
e4df2f98cc [yandexmusic:track] Eliminate base class 2015-08-24 00:36:54 +06:00
Sergey M․
e7c14660d3 [yandexmusic] Defer link resolve till actual download time to prevent link expiry (Closes #6650) 2015-08-24 00:36:24 +06:00
Sergey M․
90076b6172 [spankwire] Preserve old uploader pattern 2015-08-23 22:33:26 +06:00
Sergey M․
28b83495d8 [spankwire] Simplify 2015-08-23 22:32:44 +06:00
Sergey M․
551c7837ac [spankwire] Simplify and properly format 2015-08-23 22:32:20 +06:00
clauderains
59e6acc757 [spankwire] Support new cdn video url format 2015-08-23 22:21:59 +06:00
clauderains
9990c960f2 [spankwire] Fixed uploader_id field extraction so that test case passes 2015-08-23 22:21:54 +06:00
Sergey M․
2006a06eff [kontrtube] Fix extraction (Closes #6644) 2015-08-23 21:43:28 +06:00
Sergey M․
2b6bda1ed8 [pluralsight] Do not yet rely on userMayViewClip 2015-08-23 11:21:56 +06:00
Sergey M․
468083d2f5 [pluralsight] Remove unused const 2015-08-23 10:44:10 +06:00
Sergey M․
483fc223bb [pluralsight] Add extractor (Closes #6090) 2015-08-23 10:42:34 +06:00
Sergey M․
66ce97024d [soundcloud:user] Update tests 2015-08-22 06:30:00 +06:00
Jaime Marquínez Ferrándiz
8c97f81943 [common] Follow convention of using 'cls' in classmethods 2015-08-21 11:35:51 +02:00
Yen Chi Hsuan
d7c1630570 [rtl2] Remove MD5 checksums 2015-08-21 13:21:21 +08:00
Yen Chi Hsuan
5e1a5ac8de [rtl2] Fix extraction for test_RTL2_1 2015-08-21 13:20:51 +08:00
Yen Chi Hsuan
9eb4ab6ad9 [rtl2] Remove an unused line 2015-08-21 13:04:25 +08:00
Yen Chi Hsuan
4932a817a0 [rtl2] Add skip_download for test 2015-08-21 13:00:08 +08:00
Sergey M․
5d003e29b1 [rtp] Add skip_download for test 2015-08-21 08:56:05 +06:00
Sergey M․
dc95bd503e [folketinget] Add skip_download for test 2015-08-21 08:54:28 +06:00
Yen Chi Hsuan
f738dd7b7c [common] Remove debugging codes 2015-08-21 01:43:22 +08:00
Yen Chi Hsuan
f908b74fa3 [test/subtitles] Add test for ThePlatformFeedIE 2015-08-21 01:38:57 +08:00
Yen Chi Hsuan
c687ac745b [theplatform] Use subtitles from SMIL, too 2015-08-21 01:37:43 +08:00
Yen Chi Hsuan
912e0b7e46 [common] Add _merge_subtitles() 2015-08-21 01:37:07 +08:00
Yen Chi Hsuan
03bc7237ad [common] _parse_smil_subtitles: accept lang as the subtitle language 2015-08-20 23:18:58 +08:00
ping
061f62da54 [vlive] New extractor for vlive.tv 2015-08-20 12:56:11 +08:00
Yen Chi Hsuan
dd565ac1ad [theplatform] Use _download_json 2015-08-20 03:07:04 +08:00
Sergey M․
5cdefc4625 [extractor/common] Add more subtitle mime types for guess when ext is missing 2015-08-20 01:02:50 +06:00
Sergey M․
ce00af8767 [extractor/common] Add default subtitles lang 2015-08-20 00:56:17 +06:00
Yen Chi Hsuan
51047444aa Merge branch 'master' into HEAD 2015-08-20 01:56:08 +08:00
Yen Chi Hsuan
aa6cd05ed8 [theplatform] Fix Python 2: declare coding 2015-08-20 01:47:55 +08:00
Yen Chi Hsuan
dac14bf311 [nbc] Add MSNBCIE 2015-08-20 01:41:18 +08:00
Yen Chi Hsuan
05fe2594e4 [theplatform] Support URLs with 'guid=' 2015-08-20 01:38:39 +08:00
Yen Chi Hsuan
26e1c3514f [theplatform] Add ThePlatformFeedIE 2015-08-20 01:24:32 +08:00
Sergey M․
22c83245c5 [mwave] Improve 2015-08-19 23:07:41 +06:00
ping
7900aede14 [mwave] New extractor for mwave.interest.me 2015-08-19 22:40:40 +06:00
Yen Chi Hsuan
f877c6ae5a [theplatform] Use InfoExtractor._parse_smil_formats() 2015-08-19 23:11:25 +08:00
Sergey M․
ca681f7041 [videobam] Remove extractor
videobam.com redirects to sendvid.com now
2015-08-19 20:52:36 +06:00
Sergey M․
a01da8bbf8 [crunchyroll] Workaround fplive.net rtmp URLs (Closes #5881) 2015-08-18 23:02:57 +06:00
Sergey M․
f3a65d9636 [travis] Move to new infrastructure
We don't use rtmpdump in tests anyway
2015-08-18 21:10:52 +06:00
Sergey M․
559f4c550f [playtvak] PEP 8 2015-08-18 20:27:58 +06:00
Sergey M․
03c635a4b5 [twitch] Fix login (Closes #6599) 2015-08-18 20:26:45 +06:00
Sergey M․
34a4cd0a34 [telecinco] Relax _VALID_URL (Closes #6601) 2015-08-18 20:02:56 +06:00
Philipp Hagemeister
3b9b32f404 [libsyn] Strip options from player URL 2015-08-18 13:02:41 +02:00
Sergey M․
9c724a9802 [twitch:vod] Add test for #6585 2015-08-17 20:23:52 +06:00
Sergey M․
7a6e8a1b17 [twitch] Make more robust 2015-08-17 20:20:04 +06:00
Sergey M․
369c12e038 [twitch] Allow untitled videos (Closes #6585) 2015-08-17 20:16:43 +06:00
Antti Ajanki
233c1c0e76 [downloader/f4m] Fragment filenames must be sanitized
because the fragment was written to a file with a sanitized name by
http_dl.download()
2015-05-03 11:31:42 +03:00
85 changed files with 1788 additions and 710 deletions

View File

@@ -5,9 +5,7 @@ python:
- "3.2"
- "3.3"
- "3.4"
before_install:
- sudo apt-get update -qq
- sudo apt-get install -yqq rtmpdump
sudo: false
script: nosetests test --verbose
notifications:
email:

View File

@@ -140,3 +140,6 @@ Behrouz Abbasi
ngld
nyuszika7h
Shaun Walbridge
Lee Jenkins
Anssi Hannula
Lukáš Lalinský

View File

@@ -125,7 +125,7 @@ If you want to add support for a new site, you can follow this quick list (assum
```
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:

View File

@@ -34,7 +34,7 @@ You can also use pip:
sudo pip install youtube-dl
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
# DESCRIPTION
**youtube-dl** is a small command-line program to download videos from
@@ -207,7 +207,7 @@ which means you can modify it, redistribute it or use it however you like.
-p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
-2, --twofactor TWOFACTOR Two-factor auth code
-n, --netrc Use .netrc authentication data
--video-password PASSWORD Video password (vimeo, smotri)
--video-password PASSWORD Video password (vimeo, smotri, youku)
## Post-processing Options:
-x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
@@ -552,7 +552,7 @@ If you want to add support for a new site, you can follow this quick list (assum
```
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:

View File

@@ -166,7 +166,7 @@
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **FootyRoom**
- **Foxgay**
- **FoxNews**
- **FoxNews**: Fox News and Fox Business Video
- **FoxSports**
- **france2.fr:generation-quoi**
- **FranceCulture**
@@ -195,7 +195,7 @@
- **GodTube**
- **GoldenMoustache**
- **Golem**
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
- **Goshgay**
- **Groupon**
- **Hark**
@@ -220,6 +220,7 @@
- **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists
- **Imgur**
- **ImgurAlbum**
- **Ina**
- **Indavideo**
- **IndavideoEmbed**
@@ -301,13 +302,15 @@
- **Moviezine**
- **movshare**: MovShare
- **MPORA**
- **MSNBC**
- **MTV**
- **mtv.de**
- **mtviggy.com**
- **mtvservices:embedded**
- **MuenchenTV**: münchen.tv
- **MusicPlayOn**
- **MusicVault**
- **muzu.tv**
- **Mwave**
- **MySpace**
- **MySpace:album**
- **MySpass**
@@ -392,6 +395,8 @@
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid**
- **Playwire**
- **pluralsight**
- **pluralsight:course**
- **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic**
@@ -461,7 +466,7 @@
- **Sexu**
- **SexyKarma**: Sexy Karma and Watch Indian Porn
- **Shahid**
- **Shared**
- **Shared**: shared.sx and vivo.sx
- **ShareSix**
- **Sina**
- **Slideshare**
@@ -534,6 +539,7 @@
- **TF1**
- **TheOnion**
- **ThePlatform**
- **ThePlatformFeed**
- **TheSixtyOne**
- **ThisAmericanLife**
- **ThisAV**
@@ -599,7 +605,6 @@
- **Viddler**
- **video.google:search**: Google Video search
- **video.mit.edu**
- **VideoBam**
- **VideoDetective**
- **videofy.me**
- **videolectures.net**
@@ -627,6 +632,7 @@
- **vine:user**
- **vk**: VK
- **vk:uservideos**: VK - User's Videos
- **vlive**
- **Vodlocker**
- **VoiceRepublic**
- **Vporn**

View File

@@ -14,6 +14,7 @@ from youtube_dl.utils import get_filesystem_encoding
from youtube_dl.compat import (
compat_getenv,
compat_expanduser,
compat_shlex_split,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
)
@@ -67,5 +68,8 @@ class TestCompat(unittest.TestCase):
self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
def test_compat_shlex_split(self):
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
if __name__ == '__main__':
unittest.main()

View File

@@ -25,6 +25,7 @@ from youtube_dl.extractor import (
RaiIE,
VikiIE,
ThePlatformIE,
ThePlatformFeedIE,
RTVEALaCartaIE,
FunnyOrDieIE,
)
@@ -307,6 +308,18 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
IE = ThePlatformFeedIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
class TestRtveSubtitles(BaseTestSubtitles):
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
IE = RTVEALaCartaIE

View File

@@ -57,11 +57,16 @@ from youtube_dl.utils import (
urlencode_postdata,
version_tuple,
xpath_with_ns,
xpath_element,
xpath_text,
xpath_attr,
render_table,
match_str,
parse_dfxp_time_expr,
dfxp2srt,
cli_option,
cli_valueless_option,
cli_bool_option,
)
@@ -264,6 +269,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(find('media:song/media:author').text, 'The Author')
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
def test_xpath_element(self):
doc = xml.etree.ElementTree.Element('root')
div = xml.etree.ElementTree.SubElement(doc, 'div')
p = xml.etree.ElementTree.SubElement(div, 'p')
p.text = 'Foo'
self.assertEqual(xpath_element(doc, 'div/p'), p)
self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
self.assertTrue(xpath_element(doc, 'div/bar') is None)
self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
def test_xpath_text(self):
testxml = '''<root>
<div>
@@ -272,9 +287,25 @@ class TestUtil(unittest.TestCase):
</root>'''
doc = xml.etree.ElementTree.fromstring(testxml)
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
self.assertTrue(xpath_text(doc, 'div/bar') is None)
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
def test_xpath_attr(self):
testxml = '''<root>
<div>
<p x="a">Foo</p>
</div>
</root>'''
doc = xml.etree.ElementTree.fromstring(testxml)
self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default')
self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default')
self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True)
self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True)
def test_smuggle_url(self):
data = {"ö": "ö", "abc": [3]}
url = 'https://foo.bar/baz?x=y#a'
@@ -646,6 +677,51 @@ The first line
'''
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
def test_cli_option(self):
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
def test_cli_valueless_option(self):
self.assertEqual(cli_valueless_option(
{'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader'])
self.assertEqual(cli_valueless_option(
{'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), [])
self.assertEqual(cli_valueless_option(
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate'])
self.assertEqual(cli_valueless_option(
{'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), [])
self.assertEqual(cli_valueless_option(
{'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), [])
self.assertEqual(cli_valueless_option(
{'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate'])
def test_cli_bool_option(self):
self.assertEqual(
cli_bool_option(
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'),
['--no-check-certificate', 'true'])
self.assertEqual(
cli_bool_option(
{'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='),
['--no-check-certificate=true'])
self.assertEqual(
cli_bool_option(
{'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
['--check-certificate', 'false'])
self.assertEqual(
cli_bool_option(
{'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
['--check-certificate=false'])
self.assertEqual(
cli_bool_option(
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'),
['--check-certificate', 'true'])
self.assertEqual(
cli_bool_option(
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
['--check-certificate=true'])
if __name__ == '__main__':
unittest.main()

View File

@@ -33,7 +33,7 @@ params = get_params({
TEST_ID = 'gr51aVj-mLg'
ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
ANNOTATIONS_FILE = TEST_ID + '.annotations.xml'
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']

View File

@@ -69,6 +69,7 @@ from .utils import (
version_tuple,
write_json_file,
write_string,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
prepend_extension,
replace_extension,
@@ -284,7 +285,11 @@ class YoutubeDL(object):
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self._err_file = sys.stderr
self.params = params
self.params = {
# Default parameters
'nocheckcertificate': False,
}
self.params.update(params)
self.cache = Cache(self)
if params.get('bidi_workaround', False):
@@ -1939,8 +1944,7 @@ class YoutubeDL(object):
if os.access(opts_cookiefile, os.R_OK):
self.cookiejar.load()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
self.cookiejar)
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
if opts_proxy is not None:
if opts_proxy == '':
proxies = {}
@@ -2009,7 +2013,7 @@ class YoutubeDL(object):
(info_dict['extractor'], info_dict['id'], thumb_display_id))
try:
uf = self.urlopen(t['url'])
with open(thumb_filename, 'wb') as thumbf:
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))

View File

@@ -9,7 +9,6 @@ import codecs
import io
import os
import random
import shlex
import sys
@@ -20,6 +19,7 @@ from .compat import (
compat_expanduser,
compat_getpass,
compat_print,
compat_shlex_split,
workaround_optparse_bug9161,
)
from .utils import (
@@ -262,10 +262,10 @@ def _real_main(argv=None):
parser.error('setting filesize xattr requested but python-xattr is not available')
external_downloader_args = None
if opts.external_downloader_args:
external_downloader_args = shlex.split(opts.external_downloader_args)
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
postprocessor_args = None
if opts.postprocessor_args:
postprocessor_args = shlex.split(opts.postprocessor_args)
postprocessor_args = compat_shlex_split(opts.postprocessor_args)
match_filter = (
None if opts.match_filter is None
else match_filter_func(opts.match_filter))

View File

@@ -5,6 +5,7 @@ import getpass
import optparse
import os
import re
import shlex
import shutil
import socket
import subprocess
@@ -227,6 +228,17 @@ except ImportError: # Python < 3.3
return "'" + s.replace("'", "'\"'\"'") + "'"
if sys.version_info >= (2, 7, 3):
compat_shlex_split = shlex.split
else:
# Working around shlex issue with unicode strings on some python 2
# versions (see http://bugs.python.org/issue1548891)
def compat_shlex_split(s, comments=False, posix=True):
if isinstance(s, unicode):
s = s.encode('utf-8')
return shlex.split(s, comments, posix)
def compat_ord(c):
if type(c) is int:
return c
@@ -459,6 +471,7 @@ __all__ = [
'compat_ord',
'compat_parse_qs',
'compat_print',
'compat_shlex_split',
'compat_socket_create_connection',
'compat_str',
'compat_subprocess_get_DEVNULL',

View File

@@ -5,6 +5,10 @@ import subprocess
from .common import FileDownloader
from ..utils import (
cli_option,
cli_valueless_option,
cli_bool_option,
cli_configuration_args,
encodeFilename,
encodeArgument,
)
@@ -46,19 +50,16 @@ class ExternalFD(FileDownloader):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
def _option(self, command_option, param):
param = self.params.get(param)
if param is None:
return []
if isinstance(param, bool):
return [command_option]
return [command_option, param]
return cli_option(self.params, command_option, param)
def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
def _valueless_option(self, command_option, param, expected_value=True):
return cli_valueless_option(self.params, command_option, param, expected_value)
def _configuration_args(self, default=[]):
ex_args = self.params.get('external_downloader_args')
if ex_args is None:
return default
assert isinstance(ex_args, list)
return ex_args
return cli_configuration_args(self.params, 'external_downloader_args', default)
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
@@ -80,6 +81,8 @@ class CurlFD(ExternalFD):
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--interface', 'source_address')
cmd += self._option('--proxy', 'proxy')
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -102,7 +105,7 @@ class WgetFD(ExternalFD):
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--bind-address', 'source_address')
cmd += self._option('--proxy', 'proxy')
cmd += self._option('--no-check-certificate', 'nocheckcertificate')
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -121,6 +124,7 @@ class Aria2cFD(ExternalFD):
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += ['--', info_dict['url']]
return cmd

View File

@@ -13,6 +13,8 @@ from ..compat import (
compat_urllib_error,
)
from ..utils import (
encodeFilename,
sanitize_open,
struct_pack,
struct_unpack,
xpath_text,
@@ -343,18 +345,19 @@ class F4mFD(FragmentFD):
success = ctx['dl'].download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
down_data = down.read()
down.close()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
if live:
os.remove(frag_filename)
os.remove(encodeFilename(frag_sanitized))
else:
frags_filenames.append(frag_filename)
frags_filenames.append(frag_sanitized)
except (compat_urllib_error.HTTPError, ) as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
@@ -375,6 +378,6 @@ class F4mFD(FragmentFD):
self._finish_frag_download(ctx)
for frag_file in frags_filenames:
os.remove(frag_file)
os.remove(encodeFilename(frag_file))
return True

View File

@@ -12,6 +12,7 @@ from ..postprocessor.ffmpeg import FFmpegPostProcessor
from ..utils import (
encodeArgument,
encodeFilename,
sanitize_open,
)
@@ -89,13 +90,13 @@ class NativeHlsFD(FragmentFD):
success = ctx['dl'].download(frag_filename, {'url': frag_url})
if not success:
return False
with open(frag_filename, 'rb') as down:
ctx['dest_stream'].write(down.read())
frags_filenames.append(frag_filename)
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
ctx['dest_stream'].write(down.read())
frags_filenames.append(frag_sanitized)
self._finish_frag_download(ctx)
for frag_file in frags_filenames:
os.remove(frag_file)
os.remove(encodeFilename(frag_file))
return True

View File

@@ -241,7 +241,10 @@ from .imdb import (
ImdbIE,
ImdbListIE
)
from .imgur import ImgurIE
from .imgur import (
ImgurIE,
ImgurAlbumIE,
)
from .ina import InaIE
from .indavideo import (
IndavideoIE,
@@ -340,11 +343,12 @@ from .mtv import (
MTVIE,
MTVServicesEmbeddedIE,
MTVIggyIE,
MTVDEIE,
)
from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
from .musicvault import MusicVaultIE
from .muzu import MuzuTVIE
from .mwave import MwaveIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvi import MyviIE
@@ -358,6 +362,7 @@ from .nbc import (
NBCNewsIE,
NBCSportsIE,
NBCSportsVPlayerIE,
MSNBCIE,
)
from .ndr import (
NDRIE,
@@ -452,6 +457,10 @@ from .playfm import PlayFMIE
from .playtvak import PlaytvakIE
from .playvid import PlayvidIE
from .playwire import PlaywireIE
from .pluralsight import (
PluralsightIE,
PluralsightCourseIE,
)
from .podomatic import PodomaticIE
from .porn91 import Porn91IE
from .pornhd import PornHdIE
@@ -612,7 +621,10 @@ from .testurl import TestURLIE
from .testtube import TestTubeIE
from .tf1 import TF1IE
from .theonion import TheOnionIE
from .theplatform import ThePlatformIE
from .theplatform import (
ThePlatformIE,
ThePlatformFeedIE,
)
from .thesixtyone import TheSixtyOneIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
@@ -696,7 +708,6 @@ from .vgtv import (
from .vh1 import VH1IE
from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE
@@ -731,6 +742,7 @@ from .vk import (
VKIE,
VKUserVideosIE,
)
from .vlive import VLiveIE
from .vodlocker import VodlockerIE
from .voicerepublic import VoiceRepublicIE
from .vporn import VpornIE

View File

@@ -1,16 +1,20 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
js_to_json,
int_or_none,
)
class ABCIE(InfoExtractor):
IE_NAME = 'abc.net.au'
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
'md5': 'cb3dd03b18455a661071ee1e28344d9f',
'info_dict': {
@@ -19,22 +23,47 @@ class ABCIE(InfoExtractor):
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
},
}
}, {
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
'md5': 'db2a5369238b51f9811ad815b69dc086',
'info_dict': {
'id': 'NvqvPeNZsHU',
'ext': 'mp4',
'upload_date': '20150816',
'uploader': 'ABC News (Australia)',
'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
'uploader_id': 'NewsOnABC',
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
urls_info_json = self._search_regex(
r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
flags=re.DOTALL)
urls_info = json.loads(urls_info_json.replace('\'', '"'))
mobj = re.search(
r'inline(?P<type>Video|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
webpage)
if mobj is None:
raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json(
mobj.group('json_data'), video_id, transform_source=js_to_json)
if not isinstance(urls_info, list):
urls_info = [urls_info]
if mobj.group('type') == 'YouTube':
return self.playlist_result([
self.url_result(url_info['url']) for url_info in urls_info])
formats = [{
'url': url_info['url'],
'width': int(url_info['width']),
'height': int(url_info['height']),
'tbr': int(url_info['bitrate']),
'filesize': int(url_info['filesize']),
'width': int_or_none(url_info.get('width')),
'height': int_or_none(url_info.get('height')),
'tbr': int_or_none(url_info.get('bitrate')),
'filesize': int_or_none(url_info.get('filesize')),
} for url_info in urls_info]
self._sort_formats(formats)

View File

@@ -15,7 +15,7 @@ class AcademicEarthCourseIE(InfoExtractor):
'title': 'Laws of Nature',
'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
},
'playlist_count': 4,
'playlist_count': 3,
}
def _real_extract(self, url):

View File

@@ -156,7 +156,7 @@ class AdultSwimIE(InfoExtractor):
xpath_text(idoc, './/trt', 'segment duration').strip())
formats = []
file_els = idoc.findall('.//files/file')
file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
for file_el in file_els:
bitrate = file_el.attrib.get('bitrate')

View File

@@ -20,14 +20,14 @@ class AirMozillaIE(InfoExtractor):
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
'thumbnail': 're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
'location': 'SFO Commons',
'duration': 3780,
'view_count': int,
'categories': ['Main'],
'categories': ['Main', 'Privacy'],
}
}

View File

@@ -16,6 +16,7 @@ class AlJazeeraIE(InfoExtractor):
'uploader': 'Al Jazeera English',
},
'add_ie': ['Brightcove'],
'skip': 'Not accessible from Travis CI server',
}
def _real_extract(self, url):

View File

@@ -17,55 +17,81 @@ from ..utils import (
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
_TESTS = [
{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
'id': '214411058091220',
'ext': 'mp4',
'title': 'Hyde Park Civilizace',
'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 3350,
},
'params': {
# m3u8 download
'skip_download': True,
},
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
'id': '61924494876951776',
'ext': 'mp4',
'title': 'Hyde Park Civilizace',
'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 3350,
},
{
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
'info_dict': {
'id': '14716',
'ext': 'mp4',
'title': 'První republika: Zpěvačka z Dupárny Bobina',
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 88.4,
},
'params': {
# m3u8 download
'skip_download': True,
},
'params': {
# m3u8 download
'skip_download': True,
},
]
}, {
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
'info_dict': {
'id': '61924494876844374',
'ext': 'mp4',
'title': 'První republika: Zpěvačka z Dupárny Bobina',
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 88.4,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, playlist_id)
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
typ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
data = {
'playlist[0][type]': typ,
@@ -83,7 +109,7 @@ class CeskaTelevizeIE(InfoExtractor):
req.add_header('X-Requested-With', 'XMLHttpRequest')
req.add_header('Referer', url)
playlistpage = self._download_json(req, video_id)
playlistpage = self._download_json(req, playlist_id)
playlist_url = playlistpage['url']
if playlist_url == 'error_region':
@@ -92,33 +118,43 @@ class CeskaTelevizeIE(InfoExtractor):
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist = self._download_json(req, video_id)
playlist_title = self._og_search_title(webpage)
playlist_description = self._og_search_description(webpage)
item = playlist['playlist'][0]
formats = []
for format_id, stream_url in item['streamUrls'].items():
formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
self._sort_formats(formats)
playlist = self._download_json(req, playlist_id)['playlist']
playlist_len = len(playlist)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
duration = float_or_none(item.get('duration'))
thumbnail = item.get('previewImageUrl')
entries = []
for item in playlist:
formats = []
for format_id, stream_url in item['streamUrls'].items():
formats.extend(self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native'))
self._sort_formats(formats)
subtitles = {}
subs = item.get('subtitles')
if subs:
subtitles = self.extract_subtitles(episode_id, subs)
item_id = item.get('id') or item['assetId']
title = item['title']
return {
'id': episode_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
duration = float_or_none(item.get('duration'))
thumbnail = item.get('previewImageUrl')
subtitles = {}
if item.get('type') == 'VOD':
subs = item.get('subtitles')
if subs:
subtitles = self.extract_subtitles(episode_id, subs)
entries.append({
'id': item_id,
'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title),
'description': playlist_description if playlist_len == 1 else None,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
})
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
def _get_subtitles(self, episode_id, subs):
original_subtitles = self._download_webpage(

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import ExtractorError
from .bliptv import BlipTVIE
from .screenwavemedia import ScreenwaveMediaIE
class CinemassacreIE(InfoExtractor):
@@ -83,10 +84,10 @@ class CinemassacreIE(InfoExtractor):
playerdata_url = self._search_regex(
[
r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
ScreenwaveMediaIE.EMBED_PATTERN,
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
],
webpage, 'player data URL', default=None)
webpage, 'player data URL', default=None, group='url')
if not playerdata_url:
playerdata_url = BlipTVIE._extract_url(webpage)
if not playerdata_url:

View File

@@ -510,6 +510,12 @@ class InfoExtractor(object):
"""Report attempt to log in."""
self.to_screen('Logging in')
@staticmethod
def raise_login_required(msg='This video is only available for registered users'):
raise ExtractorError(
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
expected=True)
# Methods for following #608
@staticmethod
def url_result(url, ie=None, video_id=None, video_title=None):
@@ -727,7 +733,7 @@ class InfoExtractor(object):
def _hidden_inputs(html):
hidden_inputs = {}
for input in re.findall(r'<input([^>]+)>', html):
if not re.search(r'type=(["\'])hidden\1', input):
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
continue
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
if not name:
@@ -1052,7 +1058,7 @@ class InfoExtractor(object):
return self._search_regex(
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base = smil_url
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
b = meta.get('base') or meta.get('httpBase')
@@ -1091,6 +1097,12 @@ class InfoExtractor(object):
'width': width,
'height': height,
})
if transform_rtmp_url:
streamer, src = transform_rtmp_url(streamer, src)
formats[-1].update({
'url': streamer,
'play_path': src,
})
continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
@@ -1129,7 +1141,7 @@ class InfoExtractor(object):
return formats
def _parse_smil_subtitles(self, smil, namespace=None):
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
subtitles = {}
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
src = textstream.get('src')
@@ -1138,9 +1150,14 @@ class InfoExtractor(object):
ext = textstream.get('ext') or determine_ext(src)
if not ext:
type_ = textstream.get('type')
if type_ == 'text/srt':
ext = 'srt'
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
SUBTITLES_TYPES = {
'text/vtt': 'vtt',
'text/srt': 'srt',
'application/smptett+xml': 'tt',
}
if type_ in SUBTITLES_TYPES:
ext = SUBTITLES_TYPES[type_]
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
subtitles.setdefault(lang, []).append({
'url': src,
'ext': ext,
@@ -1268,6 +1285,23 @@ class InfoExtractor(object):
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
@staticmethod
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
""" Merge subtitle items for one language. Items with duplicated URLs
will be dropped. """
list1_urls = set([item['url'] for item in subtitle_list1])
ret = list(subtitle_list1)
ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
return ret
@classmethod
def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
""" Merge two subtitle dictionaries, language by language. """
ret = dict(subtitle_dict1)
for lang in subtitle_dict2:
ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
return ret
def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) or
self._downloader.params.get('listsubtitles')):

View File

@@ -14,20 +14,40 @@ from ..compat import (
compat_urllib_parse,
compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
bytes_to_intlist,
intlist_to_bytes,
int_or_none,
remove_end,
unified_strdate,
urlencode_postdata,
xpath_text,
)
from ..aes import (
aes_cbc_decrypt,
)
class CrunchyrollIE(InfoExtractor):
class CrunchyrollBaseIE(InfoExtractor):
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else compat_urllib_request.Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/rg3/youtube-dl/issues/6797.
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
# should be imposed or not (from what I can see it just takes the first language
# ignoring the priority and requires it to correspond the IP). By the way this causes
# Crunchyroll to not work in georestriction cases in some browsers that don't place
# the locale lang first in header. However allowing any language seems to workaround the issue.
request.add_header('Accept-Language', '*')
return super(CrunchyrollBaseIE, self)._download_webpage(
request, video_id, note, errnote, fatal, tries, timeout, encoding)
class CrunchyrollIE(CrunchyrollBaseIE):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_NETRC_MACHINE = 'crunchyroll'
_TESTS = [{
@@ -235,7 +255,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
webpage_url = 'http://www.' + mobj.group('url')
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
note_m = self._html_search_regex(
r'<div class="showmedia-trailer-notice">(.+?)</div>',
webpage, 'trailer-notice', default='')
if note_m:
raise ExtractorError(note_m)
@@ -245,15 +267,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
if msg.get('type') == 'error':
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
if 'To view this, please log in to verify you are 18 or older.' in webpage:
self.raise_login_required()
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
video_title = re.sub(r' {2,}', ' ', video_title)
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
if not video_description:
video_description = None
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
video_upload_date = self._html_search_regex(
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
video_uploader = self._html_search_regex(
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
'video_uploader', fatal=False)
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
@@ -279,13 +308,33 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
stream_info = streamdata.find('./{default}preload/stream_info')
video_url = stream_info.find('./host').text
video_play_path = stream_info.find('./file').text
formats.append({
metadata = stream_info.find('./metadata')
format_info = {
'format': video_format,
'format_id': video_format,
'height': int_or_none(xpath_text(metadata, './height')),
'width': int_or_none(xpath_text(metadata, './width')),
}
if '.fplive.net/' in video_url:
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
parsed_video_url = compat_urlparse.urlparse(video_url)
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
netloc='v.lvlt.crcdn.net',
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
if self._is_valid_url(direct_video_url, video_id, video_format):
format_info.update({
'url': direct_video_url,
})
formats.append(format_info)
continue
format_info.update({
'url': video_url,
'play_path': video_play_path,
'ext': 'flv',
'format': video_format,
'format_id': video_format,
})
formats.append(format_info)
subtitles = self.extract_subtitles(video_id, webpage)
@@ -301,7 +350,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
}
class CrunchyrollShowPlaylistIE(InfoExtractor):
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
IE_NAME = "crunchyroll:playlist"
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'

View File

@@ -44,8 +44,8 @@ class DCNIE(InfoExtractor):
title = video.get('title_en') or video['title_ar']
webpage = self._download_webpage(
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
+ compat_urllib_parse.urlencode({
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
compat_urllib_parse.urlencode({
'id': video['id'],
'user_id': video['user_id'],
'signature': video['signature'],

View File

@@ -9,8 +9,8 @@ from ..utils import qualities
class DumpertIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
'info_dict': {
@@ -20,11 +20,15 @@ class DumpertIE(InfoExtractor):
'description': 'Niet schrikken hoor',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
}, {
'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
url = 'https://www.dumpert.nl/mediabase/' + video_id
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'nsfw=1; cpc=10')
webpage = self._download_webpage(req, video_id)

View File

@@ -79,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
age_limit = 0 if age_restriction == 'allow_all' else 18
m3u8_data = self._download_json(
media['sources']['secure_m3u8']['auto'],
self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:'),
video_id, 'Downloading m3u8 JSON')
formats = self._extract_m3u8_formats(

View File

@@ -71,8 +71,7 @@ class EroProfileIE(InfoExtractor):
m = re.search(r'You must be logged in to view this video\.', webpage)
if m:
raise ExtractorError(
'This video requires login. Please specify a username and password and try again.', expected=True)
self.raise_login_required('This video requires login')
video_id = self._search_regex(
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],

View File

@@ -10,12 +10,13 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
encode_dict,
ExtractorError,
)
class FC2IE(InfoExtractor):
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
IE_NAME = 'fc2'
_NETRC_MACHINE = 'fc2'
_TESTS = [{
@@ -37,6 +38,9 @@ class FC2IE(InfoExtractor):
'password': '(snip)',
'skip': 'requires actual password'
}
}, {
'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
'only_matching': True,
}]
def _login(self):
@@ -52,10 +56,7 @@ class FC2IE(InfoExtractor):
'Submit': ' Login ',
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
@@ -80,7 +81,7 @@ class FC2IE(InfoExtractor):
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
refer = url.replace('/content/', '/a/content/')
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()

View File

@@ -30,6 +30,10 @@ class FolketingetIE(InfoExtractor):
'upload_date': '20141120',
'duration': 3960,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):

View File

@@ -1,5 +1,7 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
@@ -8,7 +10,8 @@ from ..utils import (
class FoxNewsIE(InfoExtractor):
_VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
{
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
@@ -42,13 +45,19 @@ class FoxNewsIE(InfoExtractor):
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
'only_matching': True,
},
{
'url': 'http://video.foxbusiness.com/v/4442309889001',
'only_matching': True,
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
video = self._download_json(
'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
item = video['channel']['item']
title = item['title']

View File

@@ -78,9 +78,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
})
self._sort_formats(formats)
title = info['titre']
subtitle = info.get('sous_titre')
if subtitle:
title += ' - %s' % subtitle
return {
'id': video_id,
'title': info['titre'],
'title': title,
'description': clean_html(info['synopsis']),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
@@ -214,15 +219,15 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
},
# france5
{
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
'md5': '78f0f4064f9074438e660785bbf2c5d9',
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
'md5': 'f6c577df3806e26471b3d21631241fd0',
'info_dict': {
'id': '108961659',
'id': '123327454',
'ext': 'flv',
'title': 'C à dire ?!',
'description': 'md5:1a4aeab476eb657bf57c4ff122129f81',
'upload_date': '20140915',
'timestamp': 1410795000,
'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
'upload_date': '20150831',
'timestamp': 1441035120,
},
},
# franceo

View File

@@ -48,6 +48,7 @@ from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
class GenericIE(InfoExtractor):
@@ -1001,6 +1002,16 @@ class GenericIE(InfoExtractor):
'description': 'New experience with Acrobat DC',
'duration': 248.667,
},
},
# ScreenwaveMedia embed
{
'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
'md5': '24ace5baba0d35d55c6810b51f34e9e0',
'info_dict': {
'id': 'cinemasnob-55d26273809dd',
'ext': 'mp4',
'title': 'cinemasnob',
},
}
]
@@ -1718,6 +1729,11 @@ class GenericIE(InfoExtractor):
if snagfilms_url:
return self.url_result(snagfilms_url)
# Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None:
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
# Look for AdobeTVVideo embeds
mobj = re.search(
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
@@ -1781,7 +1797,7 @@ class GenericIE(InfoExtractor):
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
if not found:
# HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(

View File

@@ -13,6 +13,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
)
@@ -359,13 +360,8 @@ class GloboIE(InfoExtractor):
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
title = video['title']
duration = float_or_none(video['duration'], 1000)
like_count = video['likes']
uploader = video['channel']
uploader_id = video['channel_id']
formats = []
for resource in video['resources']:
resource_id = resource.get('_id')
if not resource_id:
@@ -407,6 +403,11 @@ class GloboIE(InfoExtractor):
self._sort_formats(formats)
duration = float_or_none(video.get('duration'), 1000)
like_count = int_or_none(video.get('likes'))
uploader = video.get('channel')
uploader_id = video.get('channel_id')
return {
'id': video_id,
'title': title,

View File

@@ -10,15 +10,16 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
encode_dict,
int_or_none,
)
class GorillaVidIE(InfoExtractor):
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net'
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net))/
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
'''
@@ -67,13 +68,22 @@ class GorillaVidIE(InfoExtractor):
}, {
'url': 'http://movpod.in/0wguyyxi1yca',
'only_matching': True,
}, {
'url': 'http://filehoot.com/3ivfabn7573c.html',
'info_dict': {
'id': '3ivfabn7573c',
'ext': 'mp4',
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
'thumbnail': 're:http://.*\.jpg',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id)
url = 'http://%s/%s' % (mobj.group('host'), video_id)
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
@@ -87,7 +97,7 @@ class GorillaVidIE(InfoExtractor):
if countdown:
self._sleep(countdown, video_id)
post = compat_urllib_parse.urlencode(fields)
post = compat_urllib_parse.urlencode(encode_dict(fields))
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
@@ -95,7 +105,7 @@ class GorillaVidIE(InfoExtractor):
webpage = self._download_webpage(req, video_id, 'Downloading video page')
title = self._search_regex(
[r'style="z-index: [0-9]+;">([^<]+)</span>', r'>Watch (.+) '],
[r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'>Watch (.+) '],
webpage, 'title', default=None) or self._og_search_title(webpage)
video_url = self._search_regex(
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')

View File

@@ -13,7 +13,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -97,3 +97,28 @@ class ImgurIE(InfoExtractor):
'description': self._og_search_description(webpage),
'title': self._og_search_title(webpage),
}
class ImgurAlbumIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
_TEST = {
'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': {
'id': 'Q95ko',
},
'playlist_count': 25,
}
def _real_extract(self, url):
album_id = self._match_id(url)
album_images = self._download_json(
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
album_id)['data']['images']
entries = [
self.url_result('http://imgur.com/%s' % image['hash'])
for image in album_images if image.get('hash')]
return self.playlist_result(entries, album_id)

View File

@@ -13,12 +13,24 @@ from ..utils import (
class KalturaIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:kaltura:|
https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
)(?P<partner_id>\d+)
(?::|
/(?:[^/]+/)*?entry_id/
)(?P<id>[0-9a-z_]+)'''
(?:
kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
https?://
(:?(?:www|cdnapisec)\.)?kaltura\.com/
(?:
(?:
# flash player
index\.php/kwidget/
(?:[^/]+/)*?wid/_(?P<partner_id>\d+)/
(?:[^/]+/)*?entry_id/(?P<id>[0-9a-z_]+)|
# html5 player
html5/html5lib/
(?:[^/]+/)*?entry_id/(?P<id_html5>[0-9a-z_]+)
.*\?.*\bwid=_(?P<partner_id_html5>\d+)
)
)
)
'''
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
_TESTS = [
{
@@ -43,6 +55,10 @@ class KalturaIE(InfoExtractor):
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
'only_matching': True,
},
{
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
'only_matching': True,
}
]
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
@@ -105,9 +121,9 @@ class KalturaIE(InfoExtractor):
video_id, actions, note='Downloading video info JSON')
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
info, source_data = self._get_video_info(entry_id, partner_id)
@@ -126,7 +142,7 @@ class KalturaIE(InfoExtractor):
self._sort_formats(formats)
return {
'id': video_id,
'id': entry_id,
'title': info['name'],
'formats': formats,
'description': info.get('description'),

View File

@@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
from ..utils import (
int_or_none,
parse_duration,
)
class KontrTubeIE(InfoExtractor):
@@ -34,33 +37,28 @@ class KontrTubeIE(InfoExtractor):
webpage = self._download_webpage(
url, display_id, 'Downloading page')
video_url = self._html_search_regex(
video_url = self._search_regex(
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
thumbnail = self._html_search_regex(
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
thumbnail = self._search_regex(
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
title = self._html_search_regex(
r'<title>(.+?)</title>', webpage, 'video title')
r'(?s)<h2>(.+?)</h2>', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'video description')
'description', webpage, 'description')
mobj = re.search(
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
webpage)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
duration = self._search_regex(
r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
if duration:
duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
view_count = self._html_search_regex(
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
view_count = self._search_regex(
r'Просмотров: <em>([^<]+)</em>',
webpage, 'view count', fatal=False)
if view_count:
view_count = int_or_none(view_count.replace(' ', ''))
comment_count = None
comment_str = self._html_search_regex(
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
if comment_str.startswith('комментариев нет'):
comment_count = 0
else:
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
if mobj:
comment_count = mobj.group('total')
comment_count = int_or_none(self._search_regex(
r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
return {
'id': video_id,

View File

@@ -25,6 +25,9 @@ class KrasViewIE(InfoExtractor):
'duration': 27,
'thumbnail': 're:^https?://.*\.jpg',
},
'params': {
'skip_download': 'Not accessible from Travis CI server',
},
}
def _real_extract(self, url):

View File

@@ -202,6 +202,7 @@ class KuwoSingerIE(InfoExtractor):
'title': 'Ali',
},
'playlist_mincount': 95,
'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
}]
def _real_extract(self, url):

View File

@@ -8,9 +8,9 @@ from ..utils import unified_strdate
class LibsynIE(InfoExtractor):
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
_TEST = {
_TESTS = [{
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
'md5': '443360ee1b58007bc3dcf09b41d093bb',
'info_dict': {
@@ -19,12 +19,24 @@ class LibsynIE(InfoExtractor):
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
'description': 'md5:601cb790edd05908957dae8aaa866465',
'upload_date': '20150220',
'thumbnail': 're:^https?://.*',
},
}
}, {
'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
'md5': '6c5cb21acd622d754d3b1a92b582ce42',
'info_dict': {
'id': '3727166',
'ext': 'mp3',
'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
'upload_date': '20150818',
'thumbnail': 're:^https?://.*',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
url = m.group('mainurl')
webpage = self._download_webpage(url, video_id)
formats = [{
@@ -32,20 +44,18 @@ class LibsynIE(InfoExtractor):
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
podcast_title = self._search_regex(
r'<h2>([^<]+)</h2>', webpage, 'title')
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
episode_title = self._search_regex(
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
description = self._html_search_regex(
r'<div id="info_text_body">(.+?)</div>', webpage,
'description', fatal=False)
'description', default=None)
thumbnail = self._search_regex(
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
release_date = unified_strdate(self._search_regex(
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))

View File

@@ -118,9 +118,7 @@ class LyndaIE(LyndaBaseIE):
'lynda returned error: %s' % video_json['Message'], expected=True)
if video_json['HasAccess'] is False:
raise ExtractorError(
'Video %s is only available for members. '
% video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
self.raise_login_required('Video %s is only available for members' % video_id)
video_id = compat_str(video_json['ID'])
duration = video_json['DurationInSeconds']

View File

@@ -25,6 +25,7 @@ class MailRuIE(InfoExtractor):
'uploader_id': 'sonypicturesrus@mail.ru',
'duration': 184,
},
'skip': 'Not accessible from Travis CI server',
},
{
'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
@@ -39,6 +40,7 @@ class MailRuIE(InfoExtractor):
'uploader_id': 'hitech@corp.mail.ru',
'duration': 245,
},
'skip': 'Not accessible from Travis CI server',
},
]

View File

@@ -18,12 +18,12 @@ class TechTVMITIE(InfoExtractor):
_TEST = {
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
'md5': '1f8cb3e170d41fd74add04d3c9330e5f',
'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7',
'info_dict': {
'id': '25418',
'ext': 'mp4',
'title': 'MIT DNA Learning Center Set',
'description': 'md5:82313335e8a8a3f243351ba55bc1b474',
'title': 'MIT DNA and Protein Sets',
'description': 'md5:46f5c69ce434f0a97e7c628cc142802d',
},
}
@@ -33,8 +33,8 @@ class TechTVMITIE(InfoExtractor):
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
base_url = self._search_regex(
r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url')
base_url = self._proto_relative_url(self._search_regex(
r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:')
formats_json = self._search_regex(
r'bitrates: (\[.+?\])', raw_page, 'video formats')
formats_mit = json.loads(formats_json)

View File

@@ -67,7 +67,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return [{'url': url, 'ext': 'mp4'}]
def _extract_video_formats(self, mdoc, mtvn_id):
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
self.to_screen('The normal version is not available from your '
'country, trying with the mobile version')
@@ -114,7 +114,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
# Remove the templates, like &device={device}
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
if 'acceptMethods' not in mediagen_url:
mediagen_url += '&acceptMethods=fms'
mediagen_url += '&' if '?' in mediagen_url else '?'
mediagen_url += 'acceptMethods=fms'
mediagen_doc = self._download_xml(mediagen_url, video_id,
'Downloading video urls')
@@ -141,7 +142,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
if title_el is None:
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
if title_el is None:
title_el = itemdoc.find('.//title')
title_el = itemdoc.find('.//title') or itemdoc.find('./title')
if title_el.text is None:
title_el = None
@@ -174,8 +175,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
if self._LANG:
info_url += 'lang=%s&' % self._LANG
info_url += data
return self._get_videos_info_from_url(info_url, video_id)
def _get_videos_info_from_url(self, url, video_id):
idoc = self._download_xml(
info_url, video_id,
url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')])
@@ -288,3 +292,65 @@ class MTVIggyIE(MTVServicesInfoExtractor):
}
}
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
class MTVDEIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv.de'
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
_TESTS = [{
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
'info_dict': {
'id': 'music_video-a50bc5f0b3aa4b3190aa',
'ext': 'mp4',
'title': 'MusicVideo_cro-traum',
'description': 'Cro - Traum',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
'info_dict': {
'id': 'local_playlist-f5ae778b9832cc837189',
'ext': 'mp4',
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
# single video in pagePlaylist with different id
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
'info_dict': {
'id': 'local_playlist-4e760566473c4c8c5344',
'ext': 'mp4',
'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1',
'description': 'MTV Movies Supercut',
},
'params': {
# rtmp download
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
playlist = self._parse_json(
self._search_regex(
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
video_id)
# news pages contain single video in playlist with different id
if len(playlist) == 1:
return self._get_videos_info_from_url(playlist[0]['mrss'], video_id)
for item in playlist:
item_id = item.get('id')
if item_id and compat_str(item_id) == video_id:
return self._get_videos_info_from_url(item['mrss'], video_id)

View File

@@ -1,63 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class MusicVaultIE(InfoExtractor):
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
'md5': '3adcbdb3dcc02d647539e53f284ba171',
'info_dict': {
'id': '1010863',
'ext': 'mp4',
'uploader_id': 'the-allman-brothers-band',
'title': 'Straight from the Heart',
'duration': 244,
'uploader': 'The Allman Brothers Band',
'thumbnail': 're:^https?://.*/thumbnail/.*',
'upload_date': '20131219',
'location': 'Capitol Theatre (Passaic, NJ)',
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
'timestamp': int,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
thumbnail = self._search_regex(
r'<meta itemprop="thumbnail" content="([^"]+)"',
webpage, 'thumbnail', fatal=False)
data_div = self._search_regex(
r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
uploader = self._html_search_regex(
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
title = self._html_search_regex(
r'<h2.*?>(.*?)</h2>', data_div, 'title')
location = self._html_search_regex(
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
kaltura_id = self._search_regex(
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
webpage, 'kaltura ID')
wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
return {
'id': mobj.group('id'),
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (wid, kaltura_id),
'ie_key': 'Kaltura',
'display_id': display_id,
'uploader_id': mobj.group('uploader_id'),
'thumbnail': thumbnail,
'description': self._html_search_meta('description', webpage),
'location': location,
'title': title,
'uploader': uploader,
}

View File

@@ -0,0 +1,58 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_duration,
)
class MwaveIE(InfoExtractor):
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc',
'info_dict': {
'id': '168859',
'ext': 'flv',
'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'M COUNTDOWN',
'duration': 206,
'view_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
vod_info = self._download_json(
'http://mwave.interest.me/onair/vod_info.m?vodtype=CL&sectorid=&endinfo=Y&id=%s' % video_id,
video_id, 'Download vod JSON')
formats = []
for num, cdn_info in enumerate(vod_info['cdn']):
stream_url = cdn_info.get('url')
if not stream_url:
continue
stream_name = cdn_info.get('name') or compat_str(num)
f4m_stream = self._download_json(
stream_url, video_id,
'Download %s stream JSON' % stream_name)
f4m_url = f4m_stream.get('fileurl')
if not f4m_url:
continue
formats.extend(
self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
self._sort_formats(formats)
return {
'id': video_id,
'title': vod_info['title'],
'thumbnail': vod_info.get('cover'),
'uploader': vod_info.get('program_title'),
'duration': parse_duration(vod_info.get('time')),
'view_count': int_or_none(vod_info.get('hit')),
'formats': formats,
}

View File

@@ -236,3 +236,28 @@ class NBCNewsIE(InfoExtractor):
'url': info['videoAssets'][-1]['publicUrl'],
'ie_key': 'ThePlatform',
}
class MSNBCIE(InfoExtractor):
# https URLs redirect to corresponding http ones
_VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
'id': 'n_hayes_Aimm_140801_272214',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1406937606,
'upload_date': '20140802',
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._html_search_meta('embedURL', webpage)
return self.url_result(embed_url)

View File

@@ -12,6 +12,7 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
encode_dict,
ExtractorError,
int_or_none,
parse_duration,
@@ -100,10 +101,7 @@ class NiconicoIE(InfoExtractor):
'mail': username,
'password': password,
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
request = compat_urllib_request.Request(
'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(

View File

@@ -130,10 +130,16 @@ class NowTVIE(InfoExtractor):
}, {
'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
'only_matching': True,
}, {
'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
display_id_split = display_id.split('/')
if len(display_id) > 2:
display_id = '/'.join((display_id_split[0], display_id_split[-1]))
info = self._download_json(
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,

View File

@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co|li)'}
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.ch'

View File

@@ -12,7 +12,7 @@ from ..utils import (
class OdnoklassnikiIE(InfoExtractor):
_VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
_VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
_TESTS = [{
# metadata in JSON
'url': 'http://ok.ru/video/20079905452',
@@ -43,9 +43,27 @@ class OdnoklassnikiIE(InfoExtractor):
'like_count': int,
'age_limit': 0,
},
}, {
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
'url': 'http://ok.ru/video/64211978996595-1',
'md5': '5d7475d428845cd2e13bae6f1a992278',
'info_dict': {
'id': '64211978996595-1',
'ext': 'mp4',
'title': 'Космическая среда от 26 августа 2015',
'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
'duration': 440,
'upload_date': '20150826',
'uploader_id': '750099571',
'uploader': 'Алина П',
'age_limit': 0,
},
}, {
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
'only_matching': True,
}, {
'url': 'http://www.ok.ru/video/20648036891',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -56,7 +74,8 @@ class OdnoklassnikiIE(InfoExtractor):
player = self._parse_json(
unescapeHTML(self._search_regex(
r'data-attributes="([^"]+)"', webpage, 'player')),
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
webpage, 'player', group='player')),
video_id)
flashvars = player['flashvars']
@@ -89,16 +108,7 @@ class OdnoklassnikiIE(InfoExtractor):
like_count = int_or_none(metadata.get('likeCount'))
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
formats = [{
'url': f['url'],
'ext': 'mp4',
'format_id': f['name'],
'quality': quality(f['name']),
} for f in metadata['videos']]
return {
info = {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
@@ -108,5 +118,24 @@ class OdnoklassnikiIE(InfoExtractor):
'uploader_id': uploader_id,
'like_count': like_count,
'age_limit': age_limit,
'formats': formats,
}
if metadata.get('provider') == 'USER_YOUTUBE':
info.update({
'_type': 'url_transparent',
'url': movie['contentId'],
})
return info
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
formats = [{
'url': f['url'],
'ext': 'mp4',
'format_id': f['name'],
'quality': quality(f['name']),
} for f in metadata['videos']]
self._sort_formats(formats)
info['formats'] = formats
return info

View File

@@ -106,7 +106,7 @@ class PlaytvakIE(InfoExtractor):
})
info_url = compat_urlparse.urlunparse(
parsed_url._replace(query = compat_urllib_parse.urlencode(qs, True)))
parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
json_info = self._download_json(
info_url, video_id,

View File

@@ -0,0 +1,207 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
)
class PluralsightIE(InfoExtractor):
IE_NAME = 'pluralsight'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
_LOGIN_URL = 'https://www.pluralsight.com/id/'
_NETRC_MACHINE = 'pluralsight'
_TEST = {
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
'md5': '4d458cf5cf4c593788672419a8dd4cf8',
'info_dict': {
'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
'ext': 'mp4',
'title': 'Management of SQL Server - Demo Monitoring',
'duration': 338,
},
'skip': 'Requires pluralsight account credentials',
}
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
if username is None:
self.raise_login_required('Pluralsight account is required')
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'Username': username.encode('utf-8'),
'Password': password.encode('utf-8'),
})
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = compat_urllib_request.Request(
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
error = self._search_regex(
r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
author = mobj.group('author')
name = mobj.group('name')
clip_id = mobj.group('clip')
course = mobj.group('course')
display_id = '%s-%s' % (name, clip_id)
webpage = self._download_webpage(url, display_id)
collection = self._parse_json(
self._search_regex(
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
webpage, 'modules'),
display_id)
module, clip = None, None
for module_ in collection:
if module_.get('moduleName') == name:
module = module_
for clip_ in module_.get('clips', []):
clip_index = clip_.get('clipIndex')
if clip_index is None:
continue
if compat_str(clip_index) == clip_id:
clip = clip_
break
if not clip:
raise ExtractorError('Unable to resolve clip')
QUALITIES = {
'low': {'width': 640, 'height': 480},
'medium': {'width': 848, 'height': 640},
'high': {'width': 1024, 'height': 768},
}
ALLOWED_QUALITIES = (
('webm', ('high',)),
('mp4', ('low', 'medium', 'high',)),
)
formats = []
for ext, qualities in ALLOWED_QUALITIES:
for quality in qualities:
f = QUALITIES[quality].copy()
clip_post = {
'a': author,
'cap': 'false',
'cn': clip_id,
'course': course,
'lc': 'en',
'm': name,
'mt': ext,
'q': '%dx%d' % (f['width'], f['height']),
}
request = compat_urllib_request.Request(
'http://www.pluralsight.com/training/Player/ViewClip',
json.dumps(clip_post).encode('utf-8'))
request.add_header('Content-Type', 'application/json;charset=utf-8')
format_id = '%s-%s' % (ext, quality)
clip_url = self._download_webpage(
request, display_id, 'Downloading %s URL' % format_id, fatal=False)
if not clip_url:
continue
f.update({
'url': clip_url,
'ext': ext,
'format_id': format_id,
})
formats.append(f)
self._sort_formats(formats)
# TODO: captions
# http://www.pluralsight.com/training/Player/ViewClip + cap = true
# or
# http://www.pluralsight.com/training/Player/Captions
# { a = author, cn = clip_id, lc = end, m = name }
return {
'id': clip['clipName'],
'title': '%s - %s' % (module['title'], clip['title']),
'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
'creator': author,
'formats': formats
}
class PluralsightCourseIE(InfoExtractor):
IE_NAME = 'pluralsight:course'
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
_TEST = {
# Free course from Pluralsight Starter Subscription for Microsoft TechNet
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
'info_dict': {
'id': 'hosting-sql-server-windows-azure-iaas',
'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals',
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
},
'playlist_count': 31,
}
def _real_extract(self, url):
course_id = self._match_id(url)
# TODO: PSM cookie
course = self._download_json(
'http://www.pluralsight.com/data/course/%s' % course_id,
course_id, 'Downloading course JSON')
title = course['title']
description = course.get('description') or course.get('shortDescription')
course_data = self._download_json(
'http://www.pluralsight.com/data/course/content/%s' % course_id,
course_id, 'Downloading course data JSON')
entries = []
for module in course_data:
for clip in module.get('clips', []):
player_parameters = clip.get('playerParameters')
if not player_parameters:
continue
entries.append(self.url_result(
'http://www.pluralsight.com/training/player?%s' % player_parameters,
'Pluralsight'))
return self.playlist_result(entries, course_id, title, description)

View File

@@ -1,6 +1,7 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -8,22 +9,28 @@ class RTL2IE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
_TESTS = [{
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
'md5': 'bfcc179030535b08dc2b36b469b5adc7',
'info_dict': {
'id': 'folge-203-0',
'ext': 'f4v',
'title': 'GRIP sucht den Sommerkönig',
'description': 'Matthias, Det und Helge treten gegeneinander an.'
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
'info_dict': {
'id': '21040-anna-erwischt-alex',
'ext': 'mp4',
'title': 'Anna erwischt Alex!',
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
},
'params': {
# rtmp download
'skip_download': True,
},
}]
def _real_extract(self, url):
@@ -34,12 +41,18 @@ class RTL2IE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
vico_id = self._html_search_regex(
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
vivi_id = self._html_search_regex(
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
mobj = re.search(
r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
webpage)
if mobj:
vico_id = mobj.group('vico_id')
vivi_id = mobj.group('vivi_id')
else:
vico_id = self._html_search_regex(
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
vivi_id = self._html_search_regex(
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
webpage = self._download_webpage(info_url, '')
info = self._download_json(info_url, video_id)
video_info = info['video']

View File

@@ -18,6 +18,10 @@ class RTPIE(InfoExtractor):
'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': 're:^https?://.*\.jpg',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,

View File

@@ -6,7 +6,7 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import compat_urllib_request, compat_urlparse
from ..utils import (
ExtractorError,
float_or_none,
@@ -102,7 +102,9 @@ class RTVEALaCartaIE(InfoExtractor):
if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True)
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png = self._download_webpage(png_url, video_id, 'Downloading url information')
png_request = compat_urllib_request.Request(png_url)
png_request.add_header('Referer', url)
png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
auth_url = video_url.replace(

View File

@@ -6,19 +6,19 @@ from ..compat import compat_urllib_parse_urlparse
from ..utils import (
determine_ext,
int_or_none,
xpath_attr,
xpath_text,
)
class RuutuIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
'url': 'http://www.ruutu.fi/video/2058907',
'md5': 'ab2093f39be1ca8581963451b3c0234f',
'info_dict': {
'id': '2058907',
'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
'ext': 'mp4',
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
@@ -28,14 +28,13 @@ class RuutuIE(InfoExtractor):
},
},
{
'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
'url': 'http://www.ruutu.fi/video/2057306',
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
'info_dict': {
'id': '2057306',
'display_id': 'superpesis-katso-koko-kausi-ruudussa',
'ext': 'mp4',
'title': 'Superpesis: katso koko kausi Ruudussa',
'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77',
'description': 'md5:da2736052fef3b2bd5e0005e63c25eac',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 40,
'age_limit': 0,
@@ -44,29 +43,10 @@ class RuutuIE(InfoExtractor):
]
def _real_extract(self, url):
display_id = self._match_id(url)
video_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-media-id="(\d+)"', webpage, 'media id')
video_xml_url = None
media_data = self._search_regex(
r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage,
'media data', default=None)
if media_data:
media_json = self._parse_json(media_data, display_id, fatal=False)
if media_json:
xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl')
if xml_url:
video_xml_url = xml_url.replace('{ID}', video_id)
if not video_xml_url:
video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id
video_xml = self._download_xml(video_xml_url, video_id)
video_xml = self._download_xml(
'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
formats = []
processed_urls = []
@@ -109,10 +89,9 @@ class RuutuIE(InfoExtractor):
return {
'id': video_id,
'display_id': display_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True),
'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'),
'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'),
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
'formats': formats,

View File

@@ -20,7 +20,6 @@ from ..utils import (
class SafariBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
_NETRC_MACHINE = 'safari'
_API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
@@ -37,9 +36,7 @@ class SafariBaseIE(InfoExtractor):
def _login(self):
(username, password) = self._get_login_info()
if username is None:
raise ExtractorError(
self._ACCOUNT_CREDENTIALS_HINT,
expected=True)
self.raise_login_required('safaribooksonline.com account is required')
headers = std_headers
if 'Referer' not in headers:

View File

@@ -12,8 +12,8 @@ from ..utils import (
class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
_VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
_TESTS = [{
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
'only_matching': True,
@@ -33,7 +33,7 @@ class ScreenwaveMediaIE(InfoExtractor):
'http://player.screenwavemedia.com/player.js',
video_id, 'Downloading playerconfig webpage')
videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver')
videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
sources = self._parse_json(
js_to_json(
@@ -56,6 +56,7 @@ class ScreenwaveMediaIE(InfoExtractor):
# Fallback to hardcoded sources if JS changes again
if not sources:
self.report_warning('Falling back to a hardcoded list of streams')
sources = [{
'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
'type': 'mp4',

View File

@@ -14,17 +14,28 @@ from ..utils import (
class SharedIE(InfoExtractor):
_VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
IE_DESC = 'shared.sx and vivo.sx'
_VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
_TEST = {
_TESTS = [{
'url': 'http://shared.sx/0060718775',
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
'info_dict': {
'id': '0060718775',
'ext': 'mp4',
'title': 'Bmp4',
'filesize': 1720110,
},
}
}, {
'url': 'http://vivo.sx/d7ddda0e78',
'md5': '15b3af41be0b4fe01f4df075c2678b2c',
'info_dict': {
'id': 'd7ddda0e78',
'ext': 'mp4',
'title': 'Chicken',
'filesize': 528031,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -330,10 +330,7 @@ class SmotriBroadcastIE(InfoExtractor):
(username, password) = self._get_login_info()
if username is None:
raise ExtractorError(
'Erotic broadcasts allowed only for registered users, '
'use --username and --password options to provide account credentials.',
expected=True)
self.raise_login_required('Erotic broadcasts allowed only for registered users')
login_form = {
'login-hint53': '1',

View File

@@ -309,7 +309,7 @@ class SoundcloudUserIE(SoundcloudIE):
'id': '114582580',
'title': 'The Akashic Chronicler (All)',
},
'playlist_mincount': 112,
'playlist_mincount': 111,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
'info_dict': {
@@ -330,14 +330,14 @@ class SoundcloudUserIE(SoundcloudIE):
'id': '114582580',
'title': 'The Akashic Chronicler (Reposts)',
},
'playlist_mincount': 9,
'playlist_mincount': 7,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Likes)',
},
'playlist_mincount': 333,
'playlist_mincount': 321,
}, {
'url': 'https://soundcloud.com/grynpyret/spotlight',
'info_dict': {

View File

@@ -16,8 +16,9 @@ from ..aes import aes_decrypt_text
class SpankwireIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
_TESTS = [{
# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
'info_dict': {
@@ -30,14 +31,27 @@ class SpankwireIE(InfoExtractor):
'upload_date': '20070507',
'age_limit': 18,
}
}
}, {
# download URL pattern: */mp4_<format_id>_<video_id>.mp4
'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
'md5': '09b3c20833308b736ae8902db2f8d7e6',
'info_dict': {
'id': '1921551',
'ext': 'mp4',
'title': 'Titcums Compiloation I',
'description': 'cum on tits',
'uploader': 'dannyh78999',
'uploader_id': '3056053',
'upload_date': '20150822',
'age_limit': 18,
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
video_id = mobj.group('id')
req = compat_urllib_request.Request(url)
req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
@@ -54,7 +68,7 @@ class SpankwireIE(InfoExtractor):
r'by:\s*<a [^>]*>(.+?)</a>',
webpage, 'uploader', fatal=False)
uploader_id = self._html_search_regex(
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"',
r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
webpage, 'uploader id', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
r'</a> on (.+?) at \d+:\d+',
@@ -67,9 +81,10 @@ class SpankwireIE(InfoExtractor):
r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
webpage, 'comment count', fatal=False))
video_urls = list(map(
compat_urllib_parse_unquote,
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
videos = re.findall(
r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
heights = [int(video[0]) for video in videos]
video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
if webpage.find('flashvars\.encrypted = "true"') != -1:
password = self._search_regex(
r'flashvars\.video_title = "([^"]+)',
@@ -79,21 +94,22 @@ class SpankwireIE(InfoExtractor):
video_urls))
formats = []
for video_url in video_urls:
for height, video_url in zip(heights, video_urls):
path = compat_urllib_parse_urlparse(video_url).path
format = path.split('/')[4].split('_')[:2]
resolution, bitrate_str = format
format = "-".join(format)
height = int(resolution.rstrip('Pp'))
tbr = int(bitrate_str.rstrip('Kk'))
formats.append({
_, quality = path.split('/')[4].split('_')[:2]
f = {
'url': video_url,
'resolution': resolution,
'format': format,
'tbr': tbr,
'height': height,
'format_id': format,
})
}
tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
if tbr:
f.update({
'tbr': int(tbr),
'format_id': '%dp' % height,
})
else:
f['format_id'] = quality
formats.append(f)
self._sort_formats(formats)
age_limit = self._rta_search(webpage)

View File

@@ -6,7 +6,7 @@ from .mitele import MiTeleIE
class TelecincoIE(MiTeleIE):
IE_NAME = 'telecinco.es'
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
_VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html'
_TESTS = [{
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
@@ -23,4 +23,7 @@ class TelecincoIE(MiTeleIE):
}, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True,
}, {
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
'only_matching': True,
}]

View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import json
import time
import hmac
import binascii
@@ -10,7 +10,8 @@ import hashlib
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
@@ -18,12 +19,69 @@ from ..utils import (
xpath_with_ns,
unsmuggle_url,
int_or_none,
url_basename,
float_or_none,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformIE(InfoExtractor):
class ThePlatformBaseIE(InfoExtractor):
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
meta = self._download_xml(smil_url, video_id, note=note)
try:
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration:
pass
else:
raise ExtractorError(error_msg, expected=True)
formats = self._parse_smil_formats(
meta, smil_url, video_id, namespace=default_ns,
# the parameters are from syfy.com, other sites may use others,
# they also work for nbc.com
f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
for _format in formats:
ext = determine_ext(_format['url'])
if ext == 'once':
_format['ext'] = 'mp4'
self._sort_formats(formats)
subtitles = self._parse_smil_subtitles(meta, default_ns)
return formats, subtitles
def get_metadata(self, path, video_id):
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
info = self._download_json(info_url, video_id)
subtitles = {}
captions = info.get('captions')
if isinstance(captions, list):
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
subtitles[lang] = [{
'ext': 'srt' if mime == 'text/srt' else 'ttml',
'url': src,
}]
return {
'title': info['title'],
'subtitles': subtitles,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
'duration': int_or_none(info.get('duration'), 1000),
}
class ThePlatformIE(ThePlatformBaseIE):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
@@ -67,6 +125,20 @@ class ThePlatformIE(InfoExtractor):
}, {
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
'only_matching': True,
}, {
'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
'md5': '734f3790fb5fc4903da391beeebc4836',
'info_dict': {
'id': 'tdy_or_siri_150701',
'ext': 'mp4',
'title': 'iPhone Siris sassy response to a math question has people talking',
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
'duration': 83.0,
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1435752600,
'upload_date': '20150701',
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
},
}]
@staticmethod
@@ -101,6 +173,24 @@ class ThePlatformIE(InfoExtractor):
path += '/media'
path += '/' + video_id
qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
if 'guid' in qs_dict:
webpage = self._download_webpage(url, video_id)
scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
feed_id = None
# feed id usually locates in the last script.
# Seems there's no pattern for the interested script filename, so
# I try one by one
for script in reversed(scripts):
feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
if feed_id is not None:
break
if feed_id is None:
raise ExtractorError('Unable to find feed id')
return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
provider_id, feed_id, qs_dict['guid'][0]))
if smuggled_data.get('force_smil_url', False):
smil_url = url
elif mobj.group('config'):
@@ -120,95 +210,85 @@ class ThePlatformIE(InfoExtractor):
if sig:
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
meta = self._download_xml(smil_url, video_id)
try:
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration:
pass
else:
raise ExtractorError(error_msg, expected=True)
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)
subtitles = {}
captions = info.get('captions')
if isinstance(captions, list):
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
subtitles[lang] = [{
'ext': 'srt' if mime == 'text/srt' else 'ttml',
'url': src,
}]
head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body'))
f4m_node = body.find(_x('smil:seq//smil:video'))
if f4m_node is None:
f4m_node = body.find(_x('smil:seq/smil:video'))
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url:
f4m_url += '?'
# the parameters are from syfy.com, other sites may use others,
# they also work for nbc.com
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
formats = self._extract_f4m_formats(f4m_url, video_id)
else:
formats = []
switch = body.find(_x('smil:switch'))
if switch is None:
switch = body.find(_x('smil:par//smil:switch'))
if switch is None:
switch = body.find(_x('smil:par/smil:switch'))
if switch is None:
switch = body.find(_x('smil:par'))
if switch is not None:
base_url = head.find(_x('smil:meta')).attrib['base']
for f in switch.findall(_x('smil:video')):
attr = f.attrib
width = int_or_none(attr.get('width'))
height = int_or_none(attr.get('height'))
vbr = int_or_none(attr.get('system-bitrate'), 1000)
format_id = '%dx%d_%dk' % (width, height, vbr)
formats.append({
'format_id': format_id,
'url': base_url,
'play_path': 'mp4:' + attr['src'],
'ext': 'flv',
'width': width,
'height': height,
'vbr': vbr,
})
else:
switch = body.find(_x('smil:seq//smil:switch'))
if switch is None:
switch = body.find(_x('smil:seq/smil:switch'))
for f in switch.findall(_x('smil:video')):
attr = f.attrib
vbr = int_or_none(attr.get('system-bitrate'), 1000)
ext = determine_ext(attr['src'])
if ext == 'once':
ext = 'mp4'
formats.append({
'format_id': compat_str(vbr),
'url': attr['src'],
'vbr': vbr,
'ext': ext,
})
self._sort_formats(formats)
return {
ret = self.get_metadata(path, video_id)
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
ret.update({
'id': video_id,
'title': info['title'],
'subtitles': subtitles,
'formats': formats,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
'duration': int_or_none(info.get('duration'), 1000),
}
'subtitles': combined_subtitles,
})
return ret
class ThePlatformFeedIE(ThePlatformBaseIE):
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
_TEST = {
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
'md5': '22d2b84f058d3586efcd99e57d59d314',
'info_dict': {
'id': 'n_hardball_5biden_140207',
'ext': 'mp4',
'title': 'The Biden factor: will Joe run in 2016?',
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20140208',
'timestamp': 1391824260,
'duration': 467.0,
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
provider_id = mobj.group('provider_id')
feed_id = mobj.group('feed_id')
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
feed = self._download_json(real_url, video_id)
entry = feed['entries'][0]
formats = []
subtitles = {}
first_video_id = None
duration = None
for item in entry['media$content']:
smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
cur_video_id = url_basename(smil_url)
if first_video_id is None:
first_video_id = cur_video_id
duration = float_or_none(item.get('plfile$duration'))
cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
self._sort_formats(formats)
thumbnails = [{
'url': thumbnail['plfile$url'],
'width': int_or_none(thumbnail.get('plfile$width')),
'height': int_or_none(thumbnail.get('plfile$height')),
} for thumbnail in entry.get('media$thumbnails', [])]
timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
categories = [item['media$name'] for item in entry.get('media$categories', [])]
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
ret.update({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
'duration': duration,
'timestamp': timestamp,
'categories': categories,
})
return ret

View File

@@ -60,9 +60,7 @@ class TubiTvIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
raise ExtractorError(
'This video requires login, use --username and --password '
'options to provide account credentials.', expected=True)
self.raise_login_required('This video requires login')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)

View File

@@ -12,9 +12,11 @@ from ..compat import (
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
parse_iso8601,
)
@@ -26,7 +28,7 @@ class TwitchBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv'
_LOGIN_URL = 'https://secure.twitch.tv/login'
_LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
_LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
_NETRC_MACHINE = 'twitch'
def _handle_error(self, response):
@@ -69,8 +71,15 @@ class TwitchBaseIE(InfoExtractor):
'password': password.encode('utf-8'),
})
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
'post url', default=self._LOGIN_POST_URL, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = compat_urllib_request.Request(
self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
@@ -132,14 +141,14 @@ class TwitchItemBaseIE(TwitchBaseIE):
def _extract_info(self, info):
return {
'id': info['_id'],
'title': info['title'],
'description': info['description'],
'duration': info['length'],
'thumbnail': info['preview'],
'uploader': info['channel']['display_name'],
'uploader_id': info['channel']['name'],
'timestamp': parse_iso8601(info['recorded_at']),
'view_count': info['views'],
'title': info.get('title') or 'Untitled Broadcast',
'description': info.get('description'),
'duration': int_or_none(info.get('length')),
'thumbnail': info.get('preview'),
'uploader': info.get('channel', {}).get('display_name'),
'uploader_id': info.get('channel', {}).get('name'),
'timestamp': parse_iso8601(info.get('recorded_at')),
'view_count': int_or_none(info.get('views')),
}
def _real_extract(self, url):
@@ -187,7 +196,7 @@ class TwitchVodIE(TwitchItemBaseIE):
_ITEM_TYPE = 'vod'
_ITEM_SHORTCUT = 'v'
_TEST = {
_TESTS = [{
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
'info_dict': {
'id': 'v6528877',
@@ -206,7 +215,26 @@ class TwitchVodIE(TwitchItemBaseIE):
# m3u8 download
'skip_download': True,
},
}
}, {
# Untitled broadcast (title is None)
'url': 'http://www.twitch.tv/belkao_o/v/11230755',
'info_dict': {
'id': 'v11230755',
'ext': 'mp4',
'title': 'Untitled Broadcast',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 1638,
'timestamp': 1439746708,
'upload_date': '20150816',
'uploader': 'BelkAO_o',
'uploader_id': 'belkao_o',
'view_count': int,
},
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
item_id = self._match_id(url)

View File

@@ -70,9 +70,7 @@ class UdemyIE(InfoExtractor):
def _login(self):
(username, password) = self._get_login_info()
if username is None:
raise ExtractorError(
'Udemy account is required, use --username and --password options to provide account credentials.',
expected=True)
self.raise_login_required('Udemy account is required')
login_popup = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login popup')

View File

@@ -1,81 +0,0 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import int_or_none
class VideoBamIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
_TESTS = [
{
'url': 'http://videobam.com/OiJQM',
'md5': 'db471f27763a531f10416a0c58b5a1e0',
'info_dict': {
'id': 'OiJQM',
'ext': 'mp4',
'title': 'Is Alcohol Worse Than Ecstasy?',
'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
'uploader': 'frihetsvinge',
},
},
{
'url': 'http://videobam.com/pqLvq',
'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
'note': 'HD video',
'info_dict': {
'id': 'pqLvq',
'ext': 'mp4',
'title': '_',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
formats = []
for preference, format_id in enumerate(['low', 'high']):
mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
if not mobj:
continue
formats.append({
'url': mobj.group('url'),
'ext': 'mp4',
'format_id': format_id,
'preference': preference,
})
if not formats:
player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
formats = [{
'url': item['url'],
'ext': 'mp4',
} for item in player_config['playlist'] if 'autoPlay' in item]
self._sort_formats(formats)
title = self._og_search_title(page, default='_', fatal=False)
description = self._og_search_description(page, default=None)
thumbnail = self._og_search_thumbnail(page)
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
view_count = int_or_none(
self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'view_count': view_count,
'formats': formats,
'age_limit': 18,
}

View File

@@ -1,10 +1,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
str_to_int,
parse_iso8601,
)
@@ -12,18 +14,41 @@ class VidmeIE(InfoExtractor):
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'https://vid.me/QNB',
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
'md5': 'c62f1156138dc3323902188c5b5a8bd6',
'info_dict': {
'id': 'QNB',
'ext': 'mp4',
'title': 'Fishing for piranha - the easy way',
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
'duration': 119.92,
'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1406313244,
'upload_date': '20140725',
'thumbnail': 're:^https?://.*\.jpg',
'age_limit': 0,
'duration': 119.92,
'view_count': int,
'like_count': int,
'comment_count': int,
},
}, {
'url': 'https://vid.me/Gc6M',
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
'info_dict': {
'id': 'Gc6M',
'ext': 'mp4',
'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1441211642,
'upload_date': '20150902',
'uploader': 'SunshineM',
'uploader_id': '3552827',
'age_limit': 0,
'duration': 223.72,
'view_count': int,
'like_count': int,
'comment_count': int,
},
'params': {
'skip_download': True,
},
}, {
# tests uploader field
@@ -33,63 +58,94 @@ class VidmeIE(InfoExtractor):
'ext': 'mp4',
'title': 'The Carver',
'description': 'md5:e9c24870018ae8113be936645b93ba3c',
'duration': 97.859999999999999,
'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1433203629,
'upload_date': '20150602',
'uploader': 'Thomas',
'thumbnail': 're:^https?://.*\.jpg',
'uploader_id': '109747',
'age_limit': 0,
'duration': 97.859999999999999,
'view_count': int,
'like_count': int,
'comment_count': int,
},
'params': {
'skip_download': True,
},
}, {
# From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
# nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
'url': 'https://vid.me/e/Wmur',
'only_matching': True,
'info_dict': {
'id': 'Wmur',
'ext': 'mp4',
'title': 'naked smoking & stretching',
'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1430931613,
'upload_date': '20150506',
'uploader': 'naked-yogi',
'uploader_id': '1638622',
'age_limit': 18,
'duration': 653.26999999999998,
'view_count': int,
'like_count': int,
'comment_count': int,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
url = url.replace('vid.me/e/', 'vid.me/')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<source src="([^"]+)"', webpage, 'video URL')
try:
response = self._download_json(
'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
response = self._parse_json(e.cause.read(), video_id)
else:
raise
title = self._og_search_title(webpage)
description = self._og_search_description(webpage, default='')
thumbnail = self._og_search_thumbnail(webpage)
timestamp = int_or_none(self._og_search_property(
'updated_time', webpage, fatal=False))
width = int_or_none(self._og_search_property(
'video:width', webpage, fatal=False))
height = int_or_none(self._og_search_property(
'video:height', webpage, fatal=False))
duration = float_or_none(self._html_search_regex(
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
view_count = str_to_int(self._html_search_regex(
r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?',
webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex(
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
webpage, 'like count', fatal=False))
uploader = self._html_search_regex(
'class="video_author_username"[^>]*>([^<]+)',
webpage, 'uploader', default=None)
error = response.get('error')
if error:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
video = response['video']
formats = [{
'format_id': f.get('type'),
'url': f['uri'],
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
} for f in video.get('formats', []) if f.get('uri')]
self._sort_formats(formats)
title = video['title']
description = video.get('description')
thumbnail = video.get('thumbnail_url')
timestamp = parse_iso8601(video.get('date_created'), ' ')
uploader = video.get('user', {}).get('username')
uploader_id = video.get('user', {}).get('user_id')
age_limit = 18 if video.get('nsfw') is True else 0
duration = float_or_none(video.get('duration'))
view_count = int_or_none(video.get('view_count'))
like_count = int_or_none(video.get('likes_count'))
comment_count = int_or_none(video.get('comment_count'))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'age_limit': age_limit,
'timestamp': timestamp,
'width': width,
'height': height,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'uploader': uploader,
'comment_count': comment_count,
'formats': formats,
}

View File

@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import re
import itertools
from .common import InfoExtractor
@@ -91,31 +92,27 @@ class VierVideosIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
program = mobj.group('program')
webpage = self._download_webpage(url, program)
page_id = mobj.group('page')
if page_id:
page_id = int(page_id)
start_page = page_id
last_page = start_page + 1
playlist_id = '%s-page%d' % (program, page_id)
else:
start_page = 0
last_page = int(self._search_regex(
r'videos\?page=(\d+)">laatste</a>',
webpage, 'last page', default=0)) + 1
playlist_id = program
entries = []
for current_page_id in range(start_page, last_page):
for current_page_id in itertools.count(start_page):
current_page = self._download_webpage(
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
program,
'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
'Downloading page %d' % (current_page_id + 1))
page_entries = [
self.url_result('http://www.vier.be' + video_url, 'Vier')
for video_url in re.findall(
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
entries.extend(page_entries)
if page_id or '>Meer<' not in current_page:
break
return self.playlist_result(entries, playlist_id)

View File

@@ -0,0 +1,86 @@
# coding: utf-8
from __future__ import unicode_literals
import hmac
from hashlib import sha1
from base64 import b64encode
from time import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
determine_ext
)
from ..compat import compat_urllib_parse
class VLiveIE(InfoExtractor):
IE_NAME = 'vlive'
# www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices
_VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://m.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': '[V] Girl\'s Day\'s Broadcast',
'creator': 'Girl\'s Day',
},
}
_SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://m.vlive.tv/video/%s' % video_id,
video_id, note='Download video page')
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
creator = self._html_search_regex(
r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
msgpad = '%.0f' % (time() * 1000)
md = b64encode(
hmac.new(self._SECRET.encode('ascii'),
(url[:255] + msgpad).encode('ascii'), sha1).digest()
)
url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
playinfo = self._download_json(url, video_id, 'Downloading video json')
if playinfo.get('message', '') != 'success':
raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
if not playinfo.get('result'):
raise ExtractorError('No videos found.')
formats = []
for vid in playinfo['result'].get('videos', {}).get('list', []):
formats.append({
'url': vid['source'],
'ext': 'mp4',
'abr': vid.get('bitrate', {}).get('audio'),
'vbr': vid.get('bitrate', {}).get('video'),
'format_id': vid['encodingOption']['name'],
'height': vid.get('height'),
'width': vid.get('width'),
})
self._sort_formats(formats)
subtitles = {}
for caption in playinfo['result'].get('captions', {}).get('list', []):
subtitles[caption['language']] = [
{'ext': determine_ext(caption['source'], default_ext='vtt'),
'url': caption['source']}]
return {
'id': video_id,
'title': title,
'creator': creator,
'thumbnail': thumbnail,
'formats': formats,
'subtitles': subtitles,
}

View File

@@ -19,25 +19,25 @@ class WashingtonPostIE(InfoExtractor):
'title': 'Sinkhole of bureaucracy',
},
'playlist': [{
'md5': '79132cc09ec5309fa590ae46e4cc31bc',
'md5': 'b9be794ceb56c7267d410a13f99d801a',
'info_dict': {
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
'title': 'Breaking Points: The Paper Mine',
'duration': 1287,
'duration': 1290,
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
'uploader': 'The Washington Post',
'timestamp': 1395527908,
'upload_date': '20140322',
},
}, {
'md5': 'e1d5734c06865cc504ad99dc2de0d443',
'md5': '1fff6a689d8770966df78c8cb6c8c17c',
'info_dict': {
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
'title': 'The town bureaucracy sustains',
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
'duration': 2217,
'duration': 2220,
'timestamp': 1395528005,
'upload_date': '20140322',
'uploader': 'The Washington Post',

View File

@@ -1,40 +1,33 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .youtube import YoutubeIE
class WimpIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
_VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)/'
_TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/',
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
'md5': 'ee21217ffd66d058e8b16be340b74883',
'info_dict': {
'id': 'maruexhausted',
'ext': 'flv',
'ext': 'mp4',
'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
}
}, {
# youtube video
'url': 'http://www.wimp.com/clowncar/',
'md5': '4e2986c793694b55b37cf92521d12bb4',
'info_dict': {
'id': 'cG4CEr2aiSg',
'id': 'clowncar',
'ext': 'mp4',
'title': 'Basset hound clown car...incredible!',
'description': 'md5:8d228485e0719898c017203f900b3a35',
'uploader': 'Gretchen Hoey',
'uploader_id': 'gretchenandjeff1',
'upload_date': '20140303',
'title': 'It\'s like a clown car.',
'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
[r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],

View File

@@ -19,7 +19,7 @@ class XuiteIE(InfoExtractor):
_TESTS = [{
# Audio
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
'md5': '63a42c705772aa53fd4c1a0027f86adf',
'md5': 'e79284c87b371424885448d11f6398c8',
'info_dict': {
'id': '3860914',
'ext': 'mp3',

View File

@@ -101,7 +101,7 @@ class YahooIE(InfoExtractor):
}
}, {
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
'md5': '67010fdf3a08d290e060a4dd96baa07b',
'md5': '88e209b417f173d86186bef6e4d1f160',
'info_dict': {
'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
'ext': 'mp4',

View File

@@ -1,18 +1,38 @@
# coding=utf-8
# coding: utf-8
from __future__ import unicode_literals
import re
import hashlib
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
int_or_none,
float_or_none,
)
class YandexMusicBaseIE(InfoExtractor):
class YandexMusicTrackIE(InfoExtractor):
IE_NAME = 'yandexmusic:track'
IE_DESC = 'Яндекс.Музыка - Трек'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
_TEST = {
'url': 'http://music.yandex.ru/album/540508/track/4878838',
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
'info_dict': {
'id': '4878838',
'ext': 'mp3',
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
'filesize': 4628061,
'duration': 193.04,
}
}
def _get_track_url(self, storage_dir, track_id):
data = self._download_json(
'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
@@ -35,24 +55,6 @@ class YandexMusicBaseIE(InfoExtractor):
'duration': float_or_none(track.get('durationMs'), 1000),
}
class YandexMusicTrackIE(YandexMusicBaseIE):
IE_NAME = 'yandexmusic:track'
IE_DESC = 'Яндекс.Музыка - Трек'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
_TEST = {
'url': 'http://music.yandex.ru/album/540508/track/4878838',
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
'info_dict': {
'id': '4878838',
'ext': 'mp3',
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
'filesize': 4628061,
'duration': 193.04,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
album_id, track_id = mobj.group('album_id'), mobj.group('id')
@@ -64,7 +66,15 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
return self._get_track_info(track)
class YandexMusicAlbumIE(YandexMusicBaseIE):
class YandexMusicPlaylistBaseIE(InfoExtractor):
def _build_playlist(self, tracks):
return [
self.url_result(
'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
IE_NAME = 'yandexmusic:album'
IE_DESC = 'Яндекс.Музыка - Альбом'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
@@ -85,7 +95,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
album_id, 'Downloading album JSON')
entries = [self._get_track_info(track) for track in album['volumes'][0]]
entries = self._build_playlist(album['volumes'][0])
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
year = album.get('year')
@@ -95,12 +105,12 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
return self.playlist_result(entries, compat_str(album['id']), title)
class YandexMusicPlaylistIE(YandexMusicBaseIE):
class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
IE_NAME = 'yandexmusic:playlist'
IE_DESC = 'Яндекс.Музыка - Плейлист'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
'info_dict': {
'id': '1245',
@@ -108,20 +118,54 @@ class YandexMusicPlaylistIE(YandexMusicBaseIE):
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
},
'playlist_count': 6,
}
}, {
# playlist exceeding the limit of 150 tracks shipped with webpage (see
# https://github.com/rg3/youtube-dl/issues/6666)
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
'info_dict': {
'id': '1036',
'title': 'Музыка 90-х',
},
'playlist_count': 310,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
playlist = self._parse_json(
mu = self._parse_json(
self._search_regex(
r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
playlist_id)['pageData']['playlist']
playlist_id)
entries = [self._get_track_info(track) for track in playlist['tracks']]
playlist = mu['pageData']['playlist']
tracks, track_ids = playlist['tracks'], playlist['trackIds']
# tracks dictionary shipped with webpage is limited to 150 tracks,
# missing tracks should be retrieved manually.
if len(tracks) < len(track_ids):
present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
request = compat_urllib_request.Request(
'https://music.yandex.ru/handlers/track-entries.jsx',
compat_urllib_parse.urlencode({
'entries': ','.join(missing_track_ids),
'lang': mu.get('settings', {}).get('lang', 'en'),
'external-domain': 'music.yandex.ru',
'overembed': 'false',
'sign': mu.get('authData', {}).get('user', {}).get('sign'),
'strict': 'true',
}).encode('utf-8'))
request.add_header('Referer', url)
request.add_header('X-Requested-With', 'XMLHttpRequest')
missing_tracks = self._download_json(
request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
if missing_tracks:
tracks.extend(missing_tracks)
return self.playlist_result(
entries, compat_str(playlist_id),
self._build_playlist(tracks),
compat_str(playlist_id),
playlist['title'], playlist.get('description'))

View File

@@ -49,6 +49,17 @@ class YoukuIE(InfoExtractor):
},
'playlist_count': 13,
'skip': 'Available in China only',
}, {
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
'note': 'Video protected with password',
'info_dict': {
'id': 'XNjA1NzA2Njgw',
'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
},
'playlist_count': 19,
'params': {
'videopassword': '100600',
},
}]
def construct_video_urls(self, data1, data2):
@@ -185,9 +196,15 @@ class YoukuIE(InfoExtractor):
raw_data = self._download_json(req, video_id, note=note)
return raw_data['data'][0]
video_password = self._downloader.params.get('videopassword', None)
# request basic data
basic_data_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
if video_password:
basic_data_url += '?password=%s' % video_password
data1 = retrieve_data(
'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
basic_data_url,
'Downloading JSON metadata 1')
data2 = retrieve_data(
'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,

View File

@@ -26,6 +26,7 @@ from ..compat import (
)
from ..utils import (
clean_html,
encode_dict,
ExtractorError,
float_or_none,
get_element_by_attribute,
@@ -111,10 +112,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'hl': 'en_US',
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
login_results = self._download_webpage(
@@ -147,8 +145,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'TrustDevice': 'on',
})
tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
tfa_results = self._download_webpage(
@@ -660,7 +657,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match(
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
player_url)
if not id_m:
raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1243,7 +1240,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
url_map = {}
formats = []
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
if 'itag' not in url_data or 'url' not in url_data:
@@ -1289,7 +1286,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
r'html5player-([^/]+?)(?:/html5player)?\.js',
r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
@@ -1303,8 +1300,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
url += '&signature=' + signature
if 'ratebypass' not in url:
url += '&ratebypass=yes'
url_map[format_id] = url
formats = _map_to_format_list(url_map)
# Some itags are not included in DASH manifest thus corresponding formats will
# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
dct = {
'format_id': format_id,
'url': url,
'player_url': player_url,
'filesize': int_or_none(url_data.get('clen', [None])[0]),
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
'width': width,
'height': height,
'fps': int_or_none(url_data.get('fps', [None])[0]),
'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
}
type_ = url_data.get('type', [None])[0]
if type_:
type_split = type_.split(';')
kind_ext = type_split[0].split('/')
if len(kind_ext) == 2:
kind, ext = kind_ext
dct['ext'] = ext
if kind in ('audio', 'video'):
codecs = None
for mobj in re.finditer(
r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
if mobj.group('key') == 'codecs':
codecs = mobj.group('val')
break
if codecs:
codecs = codecs.split(',')
if len(codecs) == 2:
acodec, vcodec = codecs[0], codecs[1]
else:
acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
dct.update({
'acodec': acodec,
'vcodec': vcodec,
})
if format_id in self._formats:
dct.update(self._formats[format_id])
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
url_map = self._extract_from_m3u8(manifest_url, video_id)
@@ -1796,8 +1835,8 @@ class YoutubeShowIE(InfoExtractor):
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
IE_NAME = 'youtube:show'
_TESTS = [{
'url': 'http://www.youtube.com/show/airdisasters',
'playlist_mincount': 3,
'url': 'https://www.youtube.com/show/airdisasters',
'playlist_mincount': 5,
'info_dict': {
'id': 'airdisasters',
'title': 'Air Disasters',
@@ -1808,7 +1847,7 @@ class YoutubeShowIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(
url, playlist_id, 'Downloading show webpage')
'https://www.youtube.com/show/%s/playlists' % playlist_id, playlist_id, 'Downloading show webpage')
# There's one playlist for each season of the show
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))

View File

@@ -2,7 +2,6 @@ from __future__ import unicode_literals
import os.path
import optparse
import shlex
import sys
from .downloader.external import list_external_downloaders
@@ -11,6 +10,7 @@ from .compat import (
compat_get_terminal_size,
compat_getenv,
compat_kwargs,
compat_shlex_split,
)
from .utils import (
preferredencoding,
@@ -28,7 +28,7 @@ def parseOpts(overrideArguments=None):
try:
res = []
for l in optionf:
res += shlex.split(l, comments=True)
res += compat_shlex_split(l, comments=True)
finally:
optionf.close()
return res
@@ -320,7 +320,7 @@ def parseOpts(overrideArguments=None):
authentication.add_option(
'--video-password',
dest='videopassword', metavar='PASSWORD',
help='Video password (vimeo, smotri)')
help='Video password (vimeo, smotri, youku)')
video_format = optparse.OptionGroup(parser, 'Video Format Options')
video_format.add_option(

View File

@@ -4,6 +4,7 @@ import os
from ..utils import (
PostProcessingError,
cli_configuration_args,
encodeFilename,
)
@@ -61,11 +62,7 @@ class PostProcessor(object):
self._downloader.report_warning(errnote)
def _configuration_args(self, default=[]):
pp_args = self._downloader.params.get('postprocessor_args')
if pp_args is None:
return default
assert isinstance(pp_args, list)
return pp_args
return cli_configuration_args(self._downloader.params, 'postprocessor_args', default)
class AudioConversionError(PostProcessingError):

View File

@@ -141,7 +141,7 @@ def write_json_file(obj, fn):
if sys.version_info >= (2, 7):
def find_xpath_attr(node, xpath, key, val=None):
""" Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z-]+$', key)
assert re.match(r'^[a-zA-Z_-]+$', key)
if val:
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
@@ -176,12 +176,12 @@ def xpath_with_ns(path, ns_map):
return '/'.join(replaced)
def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
if sys.version_info < (2, 7): # Crazy 2.6
xpath = xpath.encode('ascii')
n = node.find(xpath)
if n is None or n.text is None:
if n is None:
if default is not NO_DEFAULT:
return default
elif fatal:
@@ -189,9 +189,37 @@ def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
raise ExtractorError('Could not find XML element %s' % name)
else:
return None
return n
def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
n = xpath_element(node, xpath, name, fatal=fatal, default=default)
if n is None or n == default:
return n
if n.text is None:
if default is not NO_DEFAULT:
return default
elif fatal:
name = xpath if name is None else name
raise ExtractorError('Could not find XML element\'s text %s' % name)
else:
return None
return n.text
def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
n = find_xpath_attr(node, xpath, key)
if n is None:
if default is not NO_DEFAULT:
return default
elif fatal:
name = '%s[@%s]' % (xpath, key) if name is None else name
raise ExtractorError('Could not find XML attribute %s' % name)
else:
return None
return n.attrib[key]
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
return get_element_by_attribute("id", id, html)
@@ -587,6 +615,11 @@ class ContentTooShortError(Exception):
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
# expected HTTP responses to meet HTTP/1.0 or later (see also
# https://github.com/rg3/youtube-dl/issues/6727)
if sys.version_info < (3, 0):
kwargs['strict'] = True
hc = http_class(*args, **kwargs)
source_address = ydl_handler._params.get('source_address')
if source_address is not None:
@@ -715,7 +748,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
gz = io.BytesIO(self.deflate(resp.read()))
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
# https://github.com/rg3/youtube-dl/issues/6457).
if 300 <= resp.code < 400:
location = resp.headers.get('Location')
if location:
@@ -749,6 +783,30 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
req, **kwargs)
class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
def http_response(self, request, response):
# Python 2 will choke on next HTTP request in row if there are non-ASCII
# characters in Set-Cookie HTTP header of last response (see
# https://github.com/rg3/youtube-dl/issues/6769).
# In order to at least prevent crashing we will percent encode Set-Cookie
# header before HTTPCookieProcessor starts processing it.
# if sys.version_info < (3, 0) and response.headers:
# for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
# set_cookie = response.headers.get(set_cookie_header)
# if set_cookie:
# set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
# if set_cookie != set_cookie_escaped:
# del response.headers[set_cookie_header]
# response.headers[set_cookie_header] = set_cookie_escaped
return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
https_request = compat_urllib_request.HTTPCookieProcessor.http_request
https_response = http_response
def parse_iso8601(date_str, delimiter='T', timezone=None):
""" Return a UNIX timestamp from the given date """
@@ -1578,6 +1636,10 @@ def urlencode_postdata(*args, **kargs):
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
def encode_dict(d, encoding='utf-8'):
return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
try:
etree_iter = xml.etree.ElementTree.Element.iter
except AttributeError: # Python <=2.6
@@ -1918,6 +1980,32 @@ def dfxp2srt(dfxp_data):
return ''.join(out)
def cli_option(params, command_option, param):
param = params.get(param)
return [command_option, param] if param is not None else []
def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
param = params.get(param)
assert isinstance(param, bool)
if separator:
return [command_option + separator + (true_value if param else false_value)]
return [command_option, true_value if param else false_value]
def cli_valueless_option(params, command_option, param, expected_value=True):
param = params.get(param)
return [command_option] if param == expected_value else []
def cli_configuration_args(params, param, default=[]):
ex_args = params.get(param)
if ex_args is None:
return default
assert isinstance(ex_args, list)
return ex_args
class ISO639Utils(object):
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
_lang_map = {

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.08.16.1'
__version__ = '2015.09.09'