Compare commits

..

171 Commits

Author SHA1 Message Date
Philipp Hagemeister
3b0bec8d11 release 2014.12.12.2 2014-12-12 15:56:45 +01:00
Philipp Hagemeister
412c617d0f [cnet] Update to new theplatform infrastructure (Fixes #2736) 2014-12-12 15:55:55 +01:00
Philipp Hagemeister
751536f5c8 [goldenmoustache] Remove view count
view count is not present anymore, so we can't extract it.
2014-12-12 13:09:55 +01:00
Philipp Hagemeister
025f30ba38 [channel9] Do not return compat_list results anymore 2014-12-12 13:07:43 +01:00
Philipp Hagemeister
0d2fb1d193 [helsinki] Fix extraction 2014-12-12 13:03:16 +01:00
Philipp Hagemeister
82b34105d3 [goshgay] Fix extraction 2014-12-12 12:55:13 +01:00
Philipp Hagemeister
73aeb2dc56 [goshgay] Modernize 2014-12-12 12:44:50 +01:00
Philipp Hagemeister
c6973bd412 [compat] Simplify kwarg detection code
This enables nuitka to compile youtube-dl.
2014-12-12 12:42:35 +01:00
Philipp Hagemeister
f8780e6d11 Merge remote-tracking branch 'grompe/patch-1' 2014-12-12 11:35:04 +01:00
Philipp Hagemeister
e2f89ec7aa Revert "[utils] Work around PyPy stupidity with Windows DLLs (Fixes #4392)"
This reverts commit 16040f46d6.
2014-12-12 11:33:55 +01:00
Philipp Hagemeister
62651c556a [howstuffworks] Parse only once, but right (#4383) 2014-12-12 04:23:34 +01:00
Philipp Hagemeister
bf94e38d3d Merge remote-tracking branch 'Tithen-Firion/hsw-update' 2014-12-12 04:10:55 +01:00
Philipp Hagemeister
4f97852316 Remove unused imports 2014-12-12 04:09:32 +01:00
Philipp Hagemeister
16040f46d6 [utils] Work around PyPy stupidity with Windows DLLs (Fixes #4392) 2014-12-12 04:01:08 +01:00
Philipp Hagemeister
d068ba24f3 release 2014.12.12.1 2014-12-12 03:34:33 +01:00
Philipp Hagemeister
f5e43bc695 [vine] Provide alt_title (Fixes #4448) 2014-12-12 03:34:28 +01:00
Philipp Hagemeister
6a5308ab49 release 2014.12.12 2014-12-12 03:02:56 +01:00
Philipp Hagemeister
63e0f29564 [vine] Modernize 2014-12-12 02:59:52 +01:00
Philipp Hagemeister
42bdd9d051 [cinchcast] Add new extractor (Fixes #4428) 2014-12-12 02:57:36 +01:00
Philipp Hagemeister
4e40de6e2a Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-12 02:14:31 +01:00
Philipp Hagemeister
0fa2b899d1 [Makefile] remove *.info.json in clean target 2014-12-12 02:14:04 +01:00
Philipp Hagemeister
f17e4c9c28 [screenwavemedia] Simplify (#3766) 2014-12-12 02:11:58 +01:00
Philipp Hagemeister
807962f4a1 [pornhd] Adapt to new sources scheme (Fixes #4446) 2014-12-11 23:50:25 +01:00
Jaime Marquínez Ferrándiz
9c1aa1d668 [mixcloud] Fix metadata extraction (fixes #4443) 2014-12-11 23:16:40 +01:00
Philipp Hagemeister
69f491f14e Merge remote-tracking branch 'fstirlitz/master' 2014-12-11 17:11:25 +01:00
Philipp Hagemeister
cb007f47c1 release 2014.12.11 2014-12-11 17:08:31 +01:00
Philipp Hagemeister
9abd500a74 [zdf:channel] Simplify (#4427) 2014-12-11 17:07:59 +01:00
Philipp Hagemeister
cf68bcaeff Merge remote-tracking branch 'akretz/master' 2014-12-11 16:35:45 +01:00
Philipp Hagemeister
cbe2bd914d [youtube] Amend test 2014-12-11 16:34:37 +01:00
Philipp Hagemeister
75111274ed [youtube] Do not warn if DASH manifest is missing (#4442) 2014-12-11 16:33:28 +01:00
Philipp Hagemeister
624dcebff6 [youtube] Make category optional (#4442) 2014-12-11 16:32:48 +01:00
Philipp Hagemeister
9684f17cde Merge remote-tracking branch 'akretz/youtube_fix' 2014-12-11 16:28:10 +01:00
Philipp Hagemeister
e52a40abf7 [youtube] Add test case for #4431 2014-12-11 16:28:07 +01:00
Philipp Hagemeister
0daa05961b Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-11 16:23:01 +01:00
Naglis Jonaitis
158731f83e [tvplay] Don't raise an exception if is_geo_blocked is True
Videos which return `is_geo_blocked' to be True can actually be downloaded from
the country to which the video is restricted
2014-12-11 17:07:50 +02:00
Adrian Kretz
24270b0301 [youtube] The case that 'url_encoded_fmt_stream_map' or 'adaptive_fmts' is the empty string is handled accordingly (fixes #4431) 2014-12-11 16:00:46 +01:00
Naglis Jonaitis
3c1b81b957 [ntv] Rename flash_ver to flash_version in the format dict
RTMP downloader uses `flash_version`
2014-12-11 16:58:45 +02:00
Philipp Hagemeister
45c24df512 Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-11 15:27:54 +01:00
Sergey M․
bf671b605e [behindkink] Remove superfluous whitespace 2014-12-11 20:09:52 +06:00
Sergey M․
09c82fbc9a [behindkink] Simplify 2014-12-11 20:06:19 +06:00
Sergey M.
3bca0409fe Merge pull request #4440 from 5moufl/behindkink-fix
[BehindKink] update
2014-12-11 19:58:31 +06:00
5moufl
d6f78a354d [BehindKink] Replace test
Old one is not accessible anymore
2014-12-11 14:26:59 +01:00
5moufl
e0b9d47387 [BehindKink] Update URL extraction 2014-12-11 14:25:26 +01:00
Philipp Hagemeister
f8795e102b [utils] Add "yesterday" as a date keyword 2014-12-11 10:29:30 +01:00
Philipp Hagemeister
4bb4a18876 [youtube] Fix imports 2014-12-11 10:08:17 +01:00
Adrian Kretz
8560c61842 [zdf] Add support for channels 2014-12-10 17:29:03 +01:00
Sergey M․
a81bbebf44 [smotri:broadcast] Fix extraction 2014-12-10 20:22:49 +06:00
Philipp Hagemeister
72e3ffeb74 release 2014.12.10.3 2014-12-10 15:19:08 +01:00
Philipp Hagemeister
2fc9f2b41d [facebook] Make thumbnail and duration optional
Fixes #4425.
Looks like both properties aren't given to us anymore. For now, just fall back to not returning them.
2014-12-10 15:18:36 +01:00
Philipp Hagemeister
5f3544baa3 release 2014.12.10.2 2014-12-10 14:39:06 +01:00
Philipp Hagemeister
da27660014 [youtube] Pass in all variables to DASH manifest (Fixes #4424) 2014-12-10 14:39:00 +01:00
Philipp Hagemeister
b8a6114309 release 2014.12.10.1 2014-12-10 13:21:49 +01:00
Philipp Hagemeister
774e208f94 [youtube] Handle missing DASH manifest (Fixes #4421, fixes #4420) 2014-12-10 13:21:24 +01:00
Philipp Hagemeister
f20b52778b release 2014.12.10 2014-12-10 12:21:40 +01:00
Jaime Marquínez Ferrándiz
83e865a370 Fix PEP8 issue E713 2014-12-09 23:11:26 +01:00
Sergey M․
b89a938687 [bet] Add extractor (Closes #4416) 2014-12-09 22:29:01 +06:00
Sergey M․
e89a2aabed [extractor/common] Add generic SMIL formats extraction routine 2014-12-09 22:28:28 +06:00
Philipp Hagemeister
f58766ce5c [extractor/common] Document ie_key in url results 2014-12-09 10:58:06 +01:00
Philipp Hagemeister
15644a40df Merge pull request #4395 from cryptonaut/issue2883
Handle --get-url with merged formats (fixes #2883)
2014-12-08 17:21:56 +01:00
Philipp Hagemeister
d4800f3c3f Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-08 17:17:31 +01:00
Philipp Hagemeister
09a5dd2d3b [bliptv] Add support for audio-only files (Fixes #4404) 2014-12-08 17:17:22 +01:00
Sergey M․
819039ee63 [tvigle] Update test and modernize 2014-12-08 22:03:02 +06:00
felix
ce36339575 add teamfourstar.com support 2014-12-08 17:01:22 +01:00
felix
684712076f add direct screenwavemedia.com URL support 2014-12-08 17:01:22 +01:00
Jaime Marquínez Ferrándiz
603c92080f [nhl] Make sure we add '_sd' before the extension (fixes #4397)
'.replace' would find the first dot in the path.
2014-12-07 11:26:07 +01:00
cryptonaut
16ae61f655 Handle --get-url with merged formats (fixes #2883)
Outputs one URL per line
2014-12-06 12:55:07 -08:00
Sergey M․
0ef4d4ab7e Credit @akretz for prosiebensat1 playlist support (#4394) 2014-12-07 01:48:45 +06:00
Sergey M․
4542535f94 Merge branch 'akretz-master' 2014-12-07 01:47:09 +06:00
Sergey M․
6a52eed80e [prosiebensat1] Improve and simplify 2014-12-07 01:46:44 +06:00
Sergey M․
acf5cbfe93 [extractor/common] Add description to playlist_result 2014-12-07 01:46:30 +06:00
Adrian Kretz
8d1c8cae9c [prosiebensat1] Fix broken tests 2014-12-06 19:21:05 +01:00
Adrian Kretz
c84890f708 [prosiebensat1] Add support for playlists (fixes #4357) 2014-12-06 19:05:22 +01:00
Sergey M․
6d0886204a [radio.de] Add support for radio.de websites (Closes #4393) 2014-12-06 23:01:52 +06:00
Sergey M․
04d02a9d57 [twitch] Add login support (#3986) 2014-12-06 21:24:20 +06:00
Grom PE
6ac4e8065a Fix utils.py for PyPy on Windows
The line
```python
from __future__ import unicode_literals
```
introduced in commit [ecc0c5ee01](ecc0c5ee01) broke youtube-dl for PyPy on Windows, making it unable to locate WinAPI functions.
Error: "TypeError: function name must be a string or integer"

Adding "b" prefix to strings with WinAPI function names fixes it.
2014-12-06 20:15:41 +07:00
Philipp Hagemeister
b82f815f37 Allow iterators for playlist result entries 2014-12-06 14:02:19 +01:00
Philipp Hagemeister
158f8cadc0 [adultswim] PEP8 2014-12-06 14:01:59 +01:00
Philipp Hagemeister
7d70cf4157 [nba] Remove unused import 2014-12-06 13:59:37 +01:00
Philipp Hagemeister
6591fdf51f [tagesschau] Look at the right place for download links 2014-12-06 13:59:10 +01:00
Philipp Hagemeister
47d7c64274 [test_utils] Make test more realistically (#4377) 2014-12-06 12:36:23 +01:00
Philipp Hagemeister
db175341c7 Credit @cryptonaut for adultswim (#4388) 2014-12-06 12:32:01 +01:00
Philipp Hagemeister
9ff6772790 [youtube] Modernize 2014-12-06 12:20:54 +01:00
Philipp Hagemeister
5f9b83944d [ffmpeg] Improve version check and call it from hls (Fixes #4377) 2014-12-06 12:14:26 +01:00
Philipp Hagemeister
f6735be4da Merge remote-tracking branch 'cryptonaut/adultswim' 2014-12-06 11:55:24 +01:00
Philipp Hagemeister
6a3e0103bb [nba] Add test for #4387 2014-12-06 11:26:17 +01:00
Philipp Hagemeister
0b5cc1983e [nba] Modernize 2014-12-06 11:15:25 +01:00
cryptonaut
1a9f8b1ad4 [nba] Improve _VALID_URL regex (fixes #4387)
Allows for optional trailing / or /index.html
2014-12-06 01:49:22 -08:00
cryptonaut
7115599121 [adultswim] Updated to work with new site format (fixes #4317) 2014-12-05 21:55:47 -08:00
Philipp Hagemeister
0df23ba9f9 release 2014.12.06.1 2014-12-06 00:48:34 +01:00
Philipp Hagemeister
58daf5ebed [youporn] Fix JSON parameter regexp (Fixes #4384) 2014-12-06 00:48:29 +01:00
Philipp Hagemeister
1a7c6c69d3 release 2014.12.06 2014-12-06 00:43:04 +01:00
Philipp Hagemeister
045c48847a [tagesschau] Add suppot for sendung (Fixes #4378) 2014-12-06 00:42:43 +01:00
Tithen-Firion
e638e83662 [howstuffworks] Update extractor 2014-12-05 19:46:49 +01:00
Sergey M․
90644a6843 [azubu] Add extractor (Closes #4379) 2014-12-05 22:08:30 +06:00
Tithen-Firion
d958fa9ff9 [howstuffworks] Rewrite extractor 2014-12-05 12:21:21 +01:00
Tithen-Firion
ebb6419960 [common] Split _download_json
Add ability for extractor to use _parse_json
2014-12-05 12:21:21 +01:00
Philipp Hagemeister
122c2f87c1 [tagesschau] Modernize 2014-12-05 10:59:55 +01:00
Philipp Hagemeister
a154eb3d15 release 2014.12.04.2 2014-12-04 17:43:39 +01:00
Philipp Hagemeister
81028ff9eb [xminus] Capture description (#4300) 2014-12-04 17:43:34 +01:00
Philipp Hagemeister
e8df5cee12 [minhateca] Fix duration parsing 2014-12-04 17:35:40 +01:00
Philipp Hagemeister
ab07963b5c release 2014.12.04.1 2014-12-04 17:02:23 +01:00
Philipp Hagemeister
7e26084d09 Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-04 17:02:14 +01:00
Philipp Hagemeister
4349c07dd7 [minhateca] Add extractor (Fixes #4094) 2014-12-04 17:02:05 +01:00
Sergey M․
1139a54d9b [foxnews] Add extractor (Closes #4352) 2014-12-04 21:19:08 +06:00
Sergey M․
b128c9ed68 [vine:user] Add support for another URL format (Closes #4365) 2014-12-04 20:12:06 +06:00
Philipp Hagemeister
9776bc7f57 release 2014.12.04 2014-12-04 08:34:12 +01:00
Philipp Hagemeister
e703fc66c2 Merge remote-tracking branch 'origin/master'
Conflicts:
	youtube_dl/extractor/audiomack.py
2014-12-04 08:33:37 +01:00
Philipp Hagemeister
39c52bbd32 [myvidster] Enforce age limit in test 2014-12-04 08:31:55 +01:00
Philipp Hagemeister
6219802165 Merge remote-tracking branch 'zackfern/myvidster' 2014-12-04 08:30:22 +01:00
Philipp Hagemeister
8b97115358 Credit @zackfern for foxgay (#4371) 2014-12-04 08:28:41 +01:00
Philipp Hagemeister
810fb84d5e pep8 and minor beautification all around 2014-12-04 08:27:40 +01:00
Philipp Hagemeister
5f5e993dc6 [bbccouk] Remove unused import 2014-12-04 08:22:53 +01:00
Philipp Hagemeister
191cc41ba4 [foxgay] Add thumbnail to test definition 2014-12-04 08:22:20 +01:00
Jaime Marquínez Ferrándiz
abe70fa044 [audiomack] Modernize test definition 2014-12-04 08:21:29 +01:00
Philipp Hagemeister
7f142293df Merge remote-tracking branch 'zackfern/foxgay' 2014-12-04 08:20:01 +01:00
Philipp Hagemeister
d4e06d4a83 [options] Standardize mentoined configuration file location (Fixes #4367) 2014-12-04 07:57:18 +01:00
Zack Fernandes
ecd7ea1e6b [myvidster] Added support for Myvidster 2014-12-03 22:22:36 -08:00
Zack Fernandes
b92c548693 [foxgay] Initial support 2014-12-03 20:22:48 -08:00
Tithen-Firion
eecd6a467d [vgtv] Update tests 2014-12-04 01:34:24 +01:00
Philipp Hagemeister
dce2a3cf9e [break] Remove md5sum from test 2014-12-04 01:33:30 +01:00
Tithen-Firion
9095aa38ac [audiomack] Update test 2014-12-04 00:42:01 +01:00
Tithen-Firion
0403b06985 [soundcloud] Improve_VALID_URL
Add support for links from Audiomack
2014-12-04 00:42:01 +01:00
Sergey M․
de9bd74bc2 [ted] Fix type_watch links extraction 2014-12-03 21:17:11 +06:00
Jaime Marquínez Ferrándiz
233d37fb6b [brightcove] Make sure that the 'ext' variable is set (fixes #4360) 2014-12-03 13:25:49 +01:00
Philipp Hagemeister
c627f7d48c release 2014.12.03 2014-12-03 12:15:34 +01:00
Jaime Marquínez Ferrándiz
163c8babaa [nhl] Simplify 2014-12-03 00:08:26 +01:00
Jaime Marquínez Ferrándiz
6708542099 Merge branch 'master' of https://github.com/akretz/youtube-dl 2014-12-03 00:00:05 +01:00
Jaime Marquínez Ferrándiz
ea2ee40357 [nhl.com:videocenter] Don't match url with 'id=*' before 'catid' in the query
Since the order extractors are added is not defined, it would match instead of NHLIE.
2014-12-02 23:56:30 +01:00
Adrian Kretz
62d8b56655 [nhl] Support videos which don't have mp4-extension (fixes #4348) 2014-12-02 23:26:37 +01:00
Sergey M․
c492970b4b [rts] Improve _VALID_URL 2014-12-02 22:24:47 +06:00
Sergey M․
ac5633592a [24video] Add extractor (Closes #4350) 2014-12-02 22:23:23 +06:00
Sergey M․
706d7d4ee7 [YoutubeDL] Avoid negative timestamps on Windows 2014-12-02 21:18:07 +06:00
Sergey M․
752c8c9b76 [rts] Improve _VALID_URL 2014-12-02 20:53:19 +06:00
Sergey M․
b1399a144d [rts] Add support for the new URL format and extract display id (Closes #4349) 2014-12-02 20:45:43 +06:00
Jaime Marquínez Ferrándiz
05177b34a6 [rutube] Extract m3u8 formats (fixes #3984) 2014-12-01 18:20:36 +01:00
Jaime Marquínez Ferrándiz
c41a9650c3 [youtube] Extract framerate from the dash manifest
Not all videos have 60 fps, for example they can have 48 fps.
2014-12-01 17:36:12 +01:00
Philipp Hagemeister
df015c69ea release 2014.12.01 2014-12-01 17:28:34 +01:00
Naglis Jonaitis
1434bffa1f [tunein] Use station API 2014-12-01 18:10:15 +02:00
Jaime Marquínez Ferrándiz
94aa25b995 Credit @Tithen-Firion for the myspace changes (#4341) 2014-12-01 16:15:09 +01:00
Sergey M․
d128cfe393 [slideshare] Fix description extraction 2014-12-01 20:18:42 +06:00
Jaime Marquínez Ferrándiz
954f36f890 [myspace] Cleanup 2014-12-01 00:10:12 +01:00
Jaime Marquínez Ferrándiz
19e92770c9 [myspace] Replace removed test video and fix the others 2014-12-01 00:10:12 +01:00
Tithen-Firion
95c673a148 [myspace] Add extractor for albums 2014-12-01 00:10:12 +01:00
Tithen-Firion
a196a53265 [myspace] Update tests 2014-12-01 00:10:12 +01:00
Tithen-Firion
3266f0c68e [myspace] Redirect to other extractors
There are many songs just linked from Vevo/YouTube to MySpace.
Vevo example: https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041
YouTube example: https://myspace.com/starset2/music/song/first-light-95799905-106964426
2014-12-01 00:10:12 +01:00
Tithen-Firion
1940fadd53 [myspace] Handle non-playable songs
I'm adding this because sometimes there is a song page, but you cannot play it.
Example: https://myspace.com/starset2/music/song/let-it-die-maniac-agenda-remix-bonus-track-95799916-106964439
It will be useful for downloading whole album with songs like this.
2014-12-01 00:10:11 +01:00
Tithen-Firion
03fd72d996 [myspace] Add more data to info dict
`uploader` is an artist
`playlist` is an album
2014-12-01 00:10:11 +01:00
Tithen-Firion
f2b44a2513 [myspace] Use player_url for faster download
It keeps reconnecting without it. Download time decreased from 7+ minutes to 25 seconds for me.
2014-12-01 00:10:11 +01:00
Jaime Marquínez Ferrándiz
c522adb1f0 [youtube] Add a normal age-gate test video 2014-11-30 21:45:49 +01:00
Jaime Marquínez Ferrándiz
7160532d41 [youtube] Simplify code for getting the dash manifest url
video_info contains now the 'ytplayer.config.args' dictionary
2014-11-30 21:07:50 +01:00
Jaime Marquínez Ferrándiz
4e62ebe250 [youtube] Try to extract the video_info from the webpage before requesting the 'get_video_info' pages
The YouTube player doesn't seem to use them except for embedded videos, so we can skip a network request.
But they still provide better error mesagges (for removed videos for example).
2014-11-30 20:56:32 +01:00
Jaime Marquínez Ferrándiz
4472f84f0c [test/test_subtitles] Update checksum for vimeo subtitle file 2014-11-30 19:42:54 +01:00
Jaime Marquínez Ferrándiz
b766eb2707 [youtube] Update test 2014-11-30 19:18:39 +01:00
Jaime Marquínez Ferrándiz
10a404c335 [youtube] Add format 313 (fixes #4339) 2014-11-30 18:56:14 +01:00
Sergey M․
c056efa2e3 [bbccouk] Fix extraction (#4104, #4214) 2014-11-30 22:37:56 +06:00
Philipp Hagemeister
283ac8d592 Merge pull request #4338 from t0mm0/x-minus-fix
[xminus] update tkn extraction regex
2014-11-30 17:11:05 +01:00
t0mm0
313d4572ce [xminus] update tkn extraction regex 2014-11-30 16:04:04 +00:00
Jaime Marquínez Ferrándiz
42939b6129 [youtube] Use a cookie for seeting the language
This way, we don't have to do an aditional request
2014-11-30 00:03:59 +01:00
Jaime Marquínez Ferrándiz
37ea8164d3 [youtube] Don't confirm age when initializing
It seems that all the videos with age restriction use now the age gate method, which doesn't require any confirmation.
2014-11-29 23:46:39 +01:00
Jaime Marquínez Ferrándiz
8c810a7db3 Merge pull request #4333 from ymln/bliptv-fixes
[bliptv] Fix some videos not downloading
2014-11-29 20:20:45 +01:00
Yuriy Melnyk
248a0b890f [bliptv] Fix \n\n at the end of real_url
See https://github.com/rg3/youtube-dl/issues/3544#issuecomment-53166516
2014-11-29 19:17:56 +02:00
Yuriy Melnyk
96b7c7fe3f [bliptv] Fix resolution of lookup id in some videos
In some videos (for example, http://blip.tv/play/gbk766dkj4Yn) resolving
lookup id would fail, because page at
http://blip.tv/play/gbk766dkj4Yn.x?p=1 would have no "config.id" in
it. Fixed by requesting different URL and inspecting the URL which the
client is redirected to.
2014-11-29 19:17:56 +02:00
Sergey M․
e987e91fcc [playvid] Capture and output error message 2014-11-29 22:16:35 +06:00
Sergey M․
cb6444e197 [noco] Add support for multi language videos (Closes #4326) 2014-11-28 20:38:47 +06:00
Philipp Hagemeister
93b8a10e3b release 2014.11.27 2014-11-27 15:44:49 +01:00
Philipp Hagemeister
4207558e8b [buzzfeed] Add support for more video types (#4259) 2014-11-27 15:44:35 +01:00
Philipp Hagemeister
ad0d800fc3 release 2014.11.26.4 2014-11-26 22:53:02 +01:00
Philipp Hagemeister
e232f787f6 [buzzfeed] Add new extractor (Fixes #4259) 2014-11-26 22:52:52 +01:00
Philipp Hagemeister
155f9550c0 [test/helper] Fix newlines in output of missing test fields 2014-11-26 22:52:28 +01:00
Philipp Hagemeister
72476fcc42 release 2014.11.26.3 2014-11-26 22:08:30 +01:00
Philipp Hagemeister
29e950f7c8 release 2014.11.26.2 2014-11-26 22:06:27 +01:00
71 changed files with 2124 additions and 791 deletions

View File

@@ -88,3 +88,7 @@ Dao Hoang Son
Oskar Jauch
Matthew Rayfield
t0mm0
Tithen-Firion
Zack Fernandes
cryptonaut
Adrian Kretz

View File

@@ -1,7 +1,7 @@
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json
cleanall: clean
rm -f youtube-dl youtube-dl.exe

View File

@@ -65,10 +65,10 @@ which means you can modify it, redistribute it or use it however you like.
this is not possible instead of searching.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: do not read the user
configuration in ~/.config/youtube-dl.conf
(%APPDATA%/youtube-dl/config.txt on
Windows)
/youtube-dl.conf: Do not read the user
configuration in ~/.config/youtube-
dl/config (%APPDATA%/youtube-dl/config.txt
on Windows)
--flat-playlist Do not extract the videos of a playlist,
only list them.

View File

@@ -141,7 +141,7 @@ def expect_info_dict(self, expected_dict, got_dict):
if missing_keys:
def _repr(v):
if isinstance(v, compat_str):
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'")
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
else:
return repr(v)
info_dict_str = ''.join(

View File

@@ -238,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True

View File

@@ -48,6 +48,7 @@ from youtube_dl.utils import (
intlist_to_bytes,
args_to_str,
parse_filesize,
version_tuple,
)
@@ -143,6 +144,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
self.assertEqual(
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
'20141126')
def test_find_xpath_attr(self):
testxml = '''<root>
@@ -220,6 +224,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('0s'), 0)
self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
self.assertEqual(parse_duration('T30M38S'), 1838)
self.assertEqual(parse_duration('5 s'), 5)
self.assertEqual(parse_duration('3 min'), 180)
self.assertEqual(parse_duration('2.5 hours'), 9000)
def test_fix_xml_ampersands(self):
self.assertEqual(
@@ -376,6 +383,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_filesize('2 MiB'), 2097152)
self.assertEqual(parse_filesize('5 GB'), 5000000000)
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
self.assertEqual(parse_filesize('1,24 KB'), 1240)
def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,))
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
if __name__ == '__main__':
unittest.main()

View File

@@ -7,6 +7,7 @@ import collections
import datetime
import errno
import io
import itertools
import json
import locale
import os
@@ -621,23 +622,15 @@ class YoutubeDL(object):
ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False)
def make_result(embedded_info):
new_result = ie_result.copy()
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
'entries', 'ie_key', 'duration',
'subtitles', 'annotations', 'format',
'thumbnail', 'thumbnails'):
if f in new_result:
del new_result[f]
if f in embedded_info:
new_result[f] = embedded_info[f]
return new_result
new_result = make_result(info)
force_properties = dict(
(k, v) for k, v in ie_result.items() if v is not None)
for f in ('_type', 'url'):
if f in force_properties:
del force_properties[f]
new_result = info.copy()
new_result.update(force_properties)
assert new_result.get('_type') != 'url_transparent'
if new_result.get('_type') == 'compat_list':
new_result['entries'] = [
make_result(e) for e in new_result['entries']]
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
@@ -654,21 +647,28 @@ class YoutubeDL(object):
if playlistend == -1:
playlistend = None
if isinstance(ie_result['entries'], list):
n_all_entries = len(ie_result['entries'])
entries = ie_result['entries'][playliststart:playlistend]
ie_entries = ie_result['entries']
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
else:
assert isinstance(ie_result['entries'], PagedList)
entries = ie_result['entries'].getslice(
elif isinstance(ie_entries, PagedList):
entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
else: # iterable
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
@@ -787,6 +787,10 @@ class YoutubeDL(object):
info_dict['display_id'] = info_dict['id']
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
# Working around negative timestamps in Windows
# (see http://bugs.python.org/issue1646728)
if info_dict['timestamp'] < 0 and os.name == 'nt':
info_dict['timestamp'] = 0
upload_date = datetime.datetime.utcfromtimestamp(
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
@@ -930,8 +934,12 @@ class YoutubeDL(object):
if self.params.get('forceid', False):
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if info_dict.get('requested_formats') is not None:
for f in info_dict['requested_formats']:
self.to_stdout(f['url'] + f.get('play_path', ''))
else:
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:

View File

@@ -247,7 +247,7 @@ else:
userhome = compat_getenv('HOME')
elif 'USERPROFILE' in os.environ:
userhome = compat_getenv('USERPROFILE')
elif not 'HOMEPATH' in os.environ:
elif 'HOMEPATH' not in os.environ:
return path
else:
try:
@@ -297,7 +297,9 @@ else:
# Old 2.6 and 2.7 releases require kwargs to be bytes
try:
(lambda x: x)(**{'x': 0})
def _testfunc(x):
pass
_testfunc(**{'x': 0})
except TypeError:
def compat_kwargs(kwargs):
return dict((bytes(k), v) for k, v in kwargs.items())

View File

@@ -4,6 +4,7 @@ import os
import re
import subprocess
from ..postprocessor.ffmpeg import FFmpegPostProcessor
from .common import FileDownloader
from ..utils import (
compat_urlparse,
@@ -32,6 +33,9 @@ class HlsFD(FileDownloader):
return False
cmd = [program] + args
ffpp = FFmpegPostProcessor(downloader=self)
ffpp.check_version()
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))

View File

@@ -24,11 +24,13 @@ from .arte import (
)
from .audiomack import AudiomackIE
from .auengine import AUEngineIE
from .azubu import AzubuIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE
@@ -38,6 +40,7 @@ from .bpb import BpbIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
from .canal13cl import Canal13clIE
@@ -48,7 +51,7 @@ from .cbsnews import CBSNewsIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
from .cinemassacre import CinemassacreIE
from .cinchcast import CinchcastIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE
@@ -120,6 +123,8 @@ from .fktv import (
from .flickr import FlickrIE
from .folketinget import FolketingetIE
from .fourtube import FourTubeIE
from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
from .francetv import (
@@ -215,6 +220,7 @@ from .mdr import MDRIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
@@ -241,9 +247,10 @@ from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
from .musicvault import MusicVaultIE
from .muzu import MuzuTVIE
from .myspace import MySpaceIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import (
@@ -301,6 +308,7 @@ from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
from .radiode import RadioDeIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
from .rbmaradio import RBMARadioIE
@@ -328,6 +336,7 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
from .servingsys import ServingSysIE
from .sexu import SexuIE
from .sexykarma import SexyKarmaIE
@@ -416,6 +425,7 @@ from .tutv import TutvIE
from .tvigle import TvigleIE
from .tvp import TvpIE
from .tvplay import TVPlayIE
from .twentyfourvideo import TwentyFourVideoIE
from .twitch import TwitchIE
from .ubu import UbuIE
from .udemy import (
@@ -517,7 +527,7 @@ from .youtube import (
YoutubeUserIE,
YoutubeWatchLaterIE,
)
from .zdf import ZDFIE
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import (
ZingMp3SongIE,
ZingMp3AlbumIE,

View File

@@ -2,123 +2,147 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class AdultSwimIE(InfoExtractor):
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
_TEST = {
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
_TESTS = [{
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
'playlist': [
{
'md5': '4da359ec73b58df4575cd01a610ba5dc',
'md5': '247572debc75c7652f253c8daa51a14d',
'info_dict': {
'id': '8a250ba1450996e901453d7f02ca02f5',
'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
'title': 'Rick and Morty - Pilot Part 1',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
},
{
'md5': 'ffbdf55af9331c509d95350bd0cc1819',
'md5': '77b0e037a4b20ec6b98671c4c379f48d',
'info_dict': {
'id': '8a250ba1450996e901453d7f4bd102f6',
'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
'title': 'Rick and Morty - Pilot Part 4',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
},
],
'info_dict': {
'title': 'Rick and Morty - Pilot',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
}
}, {
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
'playlist': [
{
'md5': 'b92409635540304280b4b6c36bd14a0a',
'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
'info_dict': {
'id': '8a250ba1450996e901453d7fa73c02f7',
'id': '-t8CamQlQ2aYZ49ItZCFog-0',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'e8818891d60e47b29cd89d7b0278156d',
'info_dict': {
'id': '8a250ba1450996e901453d7fc8ba02f8',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}
]
}
],
'info_dict': {
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}]
_video_extensions = {
'3500': 'flv',
'640': 'mp4',
'150': 'mp4',
'ipad': 'm3u8',
'iphone': 'm3u8'
}
_video_dimensions = {
'3500': (1280, 720),
'640': (480, 270),
'150': (320, 180)
}
@staticmethod
def find_video_info(collection, slug):
for video in collection.get('videos'):
if video.get('slug') == slug:
return video
@staticmethod
def find_collection_by_linkURL(collections, linkURL):
for collection in collections:
if collection.get('linkURL') == linkURL:
return collection
@staticmethod
def find_collection_containing_video(collections, slug):
for collection in collections:
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_path = mobj.group('path')
show_path = mobj.group('show_path')
episode_path = mobj.group('episode_path')
is_playlist = True if mobj.group('is_playlist') else False
webpage = self._download_webpage(url, video_path)
episode_id = self._html_search_regex(
r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
webpage, 'episode_id')
title = self._og_search_title(webpage)
webpage = self._download_webpage(url, episode_path)
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
# Extract the value of `bootstrappedData` from the Javascript in the page.
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
episode_el = idoc.find('.//episode')
show_title = episode_el.attrib.get('collectionTitle')
episode_title = episode_el.attrib.get('title')
thumbnail = episode_el.attrib.get('thumbnailUrl')
description = episode_el.find('./description').text.strip()
try:
bootstrappedData = json.loads(bootstrappedDataJS)
except ValueError as ve:
errmsg = '%s: Failed to parse JSON ' % episode_path
raise ExtractorError(errmsg, cause=ve)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
collections = bootstrappedData['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
else:
collections = bootstrappedData['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
show = bootstrappedData['show']
show_title = show['title']
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
episode_id = video_info['id']
episode_title = video_info['title']
episode_description = video_info['description']
episode_duration = video_info.get('duration')
entries = []
segment_els = episode_el.findall('./segments/segment')
for part_num, segment_id in enumerate(segment_ids):
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
for part_num, segment_el in enumerate(segment_els):
segment_id = segment_el.attrib.get('id')
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
thumbnail = segment_el.attrib.get('thumbnailUrl')
duration = segment_el.attrib.get('duration')
segment_title = '%s - %s' % (show_title, episode_title)
if len(segment_ids) > 1:
segment_title += ' Part %d' % (part_num + 1)
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
idoc = self._download_xml(
segment_url, segment_title,
'Downloading segment information', 'Unable to download segment information')
segment_duration = idoc.find('.//trt').text.strip()
formats = []
file_els = idoc.findall('.//files/file')
for file_el in file_els:
bitrate = file_el.attrib.get('bitrate')
type = file_el.attrib.get('type')
width, height = self._video_dimensions.get(bitrate, (None, None))
ftype = file_el.attrib.get('type')
formats.append({
'format_id': '%s-%s' % (bitrate, type),
'url': file_el.text,
'ext': self._video_extensions.get(bitrate, 'mp4'),
'format_id': '%s_%s' % (bitrate, ftype),
'url': file_el.text.strip(),
# The bitrate may not be a number (for example: 'iphone')
'tbr': int(bitrate) if bitrate.isdigit() else None,
'height': height,
'width': width
'quality': 1 if ftype == 'hd' else -1
})
self._sort_formats(formats)
@@ -127,18 +151,16 @@ class AdultSwimIE(InfoExtractor):
'id': segment_id,
'title': segment_title,
'formats': formats,
'uploader': show_title,
'thumbnail': thumbnail,
'duration': duration,
'description': description
'duration': segment_duration,
'description': episode_description
})
return {
'_type': 'playlist',
'id': episode_id,
'display_id': video_path,
'display_id': episode_path,
'entries': entries,
'title': '%s %s' % (show_title, episode_title),
'description': description,
'thumbnail': thumbnail
'title': '%s - %s' % (show_title, episode_title),
'description': episode_description,
'duration': episode_duration
}

View File

@@ -24,17 +24,17 @@ class AudiomackIE(InfoExtractor):
},
# hosted on soundcloud via audiomack
{
'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
'file': '172419696.mp3',
'info_dict':
{
'info_dict': {
'id': '172419696',
'ext': 'mp3',
'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
'title': 'Young Thug ft Lil Wayne - Take Kare',
"upload_date": "20141016",
"description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n",
"uploader": "Young Thug World"
'uploader': 'Young Thug World',
'upload_date': '20141016',
}
}
},
]
def _real_extract(self, url):

View File

@@ -0,0 +1,93 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import float_or_none
class AzubuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
'info_dict': {
'id': '15575',
'ext': 'mp4',
'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
'thumbnail': 're:^https?://.*\.jpe?g',
'timestamp': 1417523507.334,
'upload_date': '20141202',
'duration': 9988.7,
'uploader': 'GSL',
'uploader_id': 414310,
'view_count': int,
},
},
{
'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
'info_dict': {
'id': '9344',
'ext': 'mp4',
'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
'thumbnail': 're:^https?://.*\.jpe?g',
'timestamp': 1410530893.320,
'upload_date': '20140912',
'duration': 172.385,
'uploader': 'FnaticTV',
'uploader_id': 272749,
'view_count': int,
},
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
title = data['title'].strip()
description = data['description']
thumbnail = data['thumbnail']
view_count = data['view_count']
uploader = data['user']['username']
uploader_id = data['user']['id']
stream_params = json.loads(data['stream_params'])
timestamp = float_or_none(stream_params['creationDate'], 1000)
duration = float_or_none(stream_params['length'], 1000)
renditions = stream_params.get('renditions') or []
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
if video:
renditions.append(video)
formats = [{
'url': fmt['url'],
'width': fmt['frameWidth'],
'height': fmt['frameHeight'],
'vbr': float_or_none(fmt['encodingRate'], 1000),
'filesize': fmt['size'],
'vcodec': fmt['videoCodec'],
'container': fmt['videoContainer'],
} for fmt in renditions if fmt['url']]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'view_count': view_count,
'formats': formats,
}

View File

@@ -1,9 +1,10 @@
from __future__ import unicode_literals
import re
import xml.etree.ElementTree
from .subtitles import SubtitlesInfoExtractor
from ..utils import ExtractorError
from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor):
@@ -55,7 +56,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'skip_download': True,
},
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
}
},
{
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
'info_dict': {
'id': 'b03k3pb7',
'ext': 'flv',
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
'description': '2. Invasion',
'duration': 3600,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
},
]
def _extract_asx_playlist(self, connection, programme_id):
@@ -102,6 +118,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
def _extract_medias(self, media_selection):
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
if error is not None:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
def _extract_connections(self, media):
@@ -158,54 +178,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
subtitles[lang] = srt
return subtitles
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
group_id = mobj.group('id')
webpage = self._download_webpage(url, group_id, 'Downloading video page')
if re.search(r'id="emp-error" class="notinuk">', webpage):
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
expected=True)
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
def _download_media_selector(self, programme_id):
try:
media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
programme_id, 'Downloading media selection XML')
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
raise
formats = []
subtitles = None
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
for media in self._extract_medias(media_selection):
kind = media.get('kind')
if kind == 'audio':
formats.extend(self._extract_audio(media, programme_id))
elif kind == 'video':
formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id)
programme_id = item.get('identifier')
duration = int(item.get('duration'))
return formats, subtitles
media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
programme_id, 'Downloading media selection XML')
def _real_extract(self, url):
group_id = self._match_id(url)
for media in self._extract_medias(media_selection):
kind = media.get('kind')
if kind == 'audio':
formats.extend(self._extract_audio(media, programme_id))
elif kind == 'video':
formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id)
webpage = self._download_webpage(url, group_id, 'Downloading video page')
programme_id = self._search_regex(
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
if programme_id:
player = self._download_json(
'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
group_id)['jsConf']['player']
title = player['title']
description = player['subtitle']
duration = player['duration']
formats, subtitles = self._download_media_selector(programme_id)
else:
playlist = self._download_xml(
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
group_id, 'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
elif reason == 'noMedia':
msg = 'Episode %s is not currently available' % group_id
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
programme_id = item.get('identifier')
duration = int(item.get('duration'))
formats, subtitles = self._download_media_selector(programme_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles)

View File

@@ -10,15 +10,15 @@ from ..utils import url_basename
class BehindKinkIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
_TEST = {
'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
'md5': '41ad01222b8442089a55528fec43ec01',
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
'info_dict': {
'id': '36370',
'id': '37127',
'ext': 'mp4',
'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
'upload_date': '20140814',
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
'title': 'What are you passionate about Marley Blaze',
'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
'upload_date': '20141205',
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
'age_limit': 18,
}
}
@@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
year = mobj.group('year')
month = mobj.group('month')
day = mobj.group('day')
upload_date = year + month + day
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
r"'file':\s*'([^']+)'",
webpage, 'URL base')
video_id = url_basename(video_url)
video_id = video_id.split('_')[0]
r'<source src="([^"]+)"', webpage, 'video URL')
video_id = url_basename(video_url).split('_')[0]
upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'display_id': display_id,
'url': video_url,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
'upload_date': upload_date,

108
youtube_dl/extractor/bet.py Normal file
View File

@@ -0,0 +1,108 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
xpath_text,
xpath_with_ns,
int_or_none,
parse_iso8601,
)
class BetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
_TESTS = [
{
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
'info_dict': {
'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
'ext': 'flv',
'title': 'BET News Presents: A Conversation With President Obama',
'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
'duration': 1534,
'timestamp': 1418075340,
'upload_date': '20141208',
'uploader': 'admin',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
'info_dict': {
'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
'display_id': 'justice-for-ferguson-a-community-reacts',
'ext': 'flv',
'title': 'Justice for Ferguson: A Community Reacts',
'description': 'A BET News special.',
'duration': 1696,
'timestamp': 1416942360,
'upload_date': '20141125',
'uploader': 'admin',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
},
'params': {
# rtmp download
'skip_download': True,
},
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
media_url = compat_urllib_parse.unquote(self._search_regex(
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
webpage, 'media URL'))
mrss = self._download_xml(media_url, display_id)
item = mrss.find('./channel/item')
NS_MAP = {
'dc': 'http://purl.org/dc/elements/1.1/',
'media': 'http://search.yahoo.com/mrss/',
'ka': 'http://kickapps.com/karss',
}
title = xpath_text(item, './title', 'title')
description = xpath_text(
item, './description', 'description', fatal=False)
video_id = xpath_text(item, './guid', 'video id', fatal=False)
timestamp = parse_iso8601(xpath_text(
item, xpath_with_ns('./dc:date', NS_MAP),
'upload date', fatal=False))
uploader = xpath_text(
item, xpath_with_ns('./dc:creator', NS_MAP),
'uploader', fatal=False)
media_content = item.find(
xpath_with_ns('./media:content', NS_MAP))
duration = int_or_none(media_content.get('duration'))
smil_url = media_content.get('url')
thumbnail = media_content.find(
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
formats = self._extract_smil_formats(smil_url, display_id)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'uploader': uploader,
'duration': duration,
'formats': formats,
}

View File

@@ -4,13 +4,17 @@ import re
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_urllib_request,
unescapeHTML,
parse_iso8601,
compat_urlparse,
clean_html,
from ..compat import (
compat_str,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
unescapeHTML,
)
@@ -64,7 +68,39 @@ class BlipTVIE(SubtitlesInfoExtractor):
'uploader': 'redvsblue',
'uploader_id': '792887',
}
}
},
{
'url': 'http://blip.tv/play/gbk766dkj4Yn',
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
'info_dict': {
'id': '1749452',
'ext': 'mp4',
'upload_date': '20090208',
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
'title': 'Nostalgia Critic: Transformers',
'timestamp': 1234068723,
'uploader': 'NostalgiaCritic',
'uploader_id': '246467',
}
},
{
# https://github.com/rg3/youtube-dl/pull/4404
'note': 'Audio only',
'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
'md5': '76c0a56f24e769ceaab21fbb6416a351',
'info_dict': {
'id': '7103299',
'ext': 'flv',
'title': 'Weekly Manga Recap: Kingdom',
'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
'timestamp': 1417660321,
'upload_date': '20141204',
'uploader': 'The Rollo T',
'uploader_id': '407429',
'duration': 7251,
'vcodec': 'none',
}
},
]
def _real_extract(self, url):
@@ -74,11 +110,13 @@ class BlipTVIE(SubtitlesInfoExtractor):
# See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197
if lookup_id:
info_page = self._download_webpage(
'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
else:
video_id = mobj.group('id')
urlh = self._request_webpage(
'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
url = compat_urlparse.urlparse(urlh.geturl())
qs = compat_urlparse.parse_qs(url.query)
mobj = re.match(self._VALID_URL, qs['file'][0])
video_id = mobj.group('id')
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
@@ -114,7 +152,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role)
real_url = compat_urlparse.parse_qs(msg)['message'][0]
real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
media_type = media_content.get('type')
if media_type == 'text/srt' or url.endswith('.srt'):
@@ -129,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):
'url': real_url,
'format_id': role,
'format_note': media_type,
'vcodec': media_content.get(blip('vcodec')),
'vcodec': media_content.get(blip('vcodec')) or 'none',
'acodec': media_content.get(blip('acodec')),
'filesize': media_content.get('filesize'),
'width': int(media_content.get('width')),
'height': int(media_content.get('height')),
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
})
self._sort_formats(formats)

View File

@@ -14,7 +14,6 @@ class BreakIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
'md5': '33aa4ff477ecd124d18d7b5d23b87ce5',
'info_dict': {
'id': '2468056',
'ext': 'mp4',

View File

@@ -265,6 +265,7 @@ class BrightcoveIE(InfoExtractor):
url = rend['defaultURL']
if not url:
continue
ext = None
if rend['remote']:
url_comp = compat_urllib_parse_urlparse(url)
if url_comp.path.endswith('.m3u8'):
@@ -276,7 +277,7 @@ class BrightcoveIE(InfoExtractor):
# akamaihd.net, but they don't use f4m manifests
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
ext = 'flv'
else:
if ext is None:
ext = determine_ext(url)
size = rend.get('size')
formats.append({

View File

@@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
class BuzzFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
_TESTS = [{
'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
'info_dict': {
'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
'description': 'Rambro!',
},
'playlist': [{
'info_dict': {
'id': 'aVCR29aE_OQ',
'ext': 'mp4',
'upload_date': '20141024',
'uploader_id': 'Buddhanz1',
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
'uploader': 'Buddhanz',
'title': 'Angry Ram destroys a punching bag',
}
}]
}, {
'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
'params': {
'skip_download': True, # Got enough YouTube download tests
},
'info_dict': {
'description': 'Munchkin the Teddy Bear is back !',
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
},
'playlist': [{
'info_dict': {
'id': 'mVmBL8B-In0',
'ext': 'mp4',
'upload_date': '20141124',
'uploader_id': 'CindysMunchkin',
'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
'uploader': 'Munchkin the Shih Tzu',
'title': 'Munchkin the Teddy Bear gets her exercise',
},
}]
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
all_buckets = re.findall(
r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
webpage)
entries = []
for bd_json in all_buckets:
bd = json.loads(bd_json)
video = bd.get('video') or bd.get('progload_video')
if not video:
continue
entries.append(self.url_result(video['url']))
return {
'_type': 'playlist',
'id': playlist_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'entries': entries,
}

View File

@@ -236,16 +236,17 @@ class Channel9IE(InfoExtractor):
if contents is None:
return contents
session_meta = {'session_code': self._extract_session_code(html),
'session_day': self._extract_session_day(html),
'session_room': self._extract_session_room(html),
'session_speakers': self._extract_session_speakers(html),
}
session_meta = {
'session_code': self._extract_session_code(html),
'session_day': self._extract_session_day(html),
'session_room': self._extract_session_room(html),
'session_speakers': self._extract_session_speakers(html),
}
for content in contents:
content.update(session_meta)
return contents
return self.playlist_result(contents)
def _extract_list(self, content_path):
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')

View File

@@ -0,0 +1,52 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
xpath_text,
)
class CinchcastIE(InfoExtractor):
_VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
_TEST = {
# Actual test is run in generic, look for undergroundwellness
'url': 'http://player.cinchcast.com/?platformId=1&#038;assetType=single&#038;assetId=7141703',
'only_matching': True,
}
def _real_extract(self, url):
video_id = self._match_id(url)
doc = self._download_xml(
'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
video_id)
item = doc.find('.//item')
title = xpath_text(item, './title', fatal=True)
date_str = xpath_text(
item, './{http://developer.longtailvideo.com/trac/}date')
upload_date = unified_strdate(date_str, day_first=False)
# duration is present but wrong
formats = []
formats.append({
'format_id': 'main',
'url': item.find(
'./{http://search.yahoo.com/mrss/}content').attrib['url'],
})
backup_url = xpath_text(
item, './{http://developer.longtailvideo.com/trac/}backupContent')
if backup_url:
formats.append({
'preference': 2, # seems to be more reliable
'format_id': 'backup',
'url': backup_url,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'upload_date': upload_date,
'formats': formats,
}

View File

@@ -15,23 +15,24 @@ class CNETIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
_TEST = {
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'md5': '041233212a0d06b179c87cbcca1577b8',
'info_dict': {
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
'ext': 'mp4',
'ext': 'flv',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
'uploader': 'Sarah Mitroff',
},
'params': {
'skip_download': 'requires rtmpdump',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
webpage, 'data json')
@@ -42,37 +43,31 @@ class CNETIE(InfoExtractor):
if not vdata:
raise ExtractorError('Cannot find video data')
mpx_account = data['config']['players']['default']['mpx_account']
vid = vdata['files']['rtmp']
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
video_id = vdata['id']
title = vdata.get('headline')
if title is None:
title = vdata.get('title')
if title is None:
raise ExtractorError('Cannot find title!')
description = vdata.get('dek')
thumbnail = vdata.get('image', {}).get('path')
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
uploader_id = author.get('email')
uploader_id = author.get('id')
else:
uploader = None
uploader_id = None
formats = [{
'format_id': '%s-%s-%s' % (
f['type'], f['format'],
int_or_none(f.get('bitrate'), 1000, default='')),
'url': f['uri'],
'tbr': int_or_none(f.get('bitrate'), 1000),
} for f in vdata['files']['data']]
self._sort_formats(formats)
return {
'_type': 'url_transparent',
'url': tp_link,
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'description': description,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,

View File

@@ -13,6 +13,7 @@ import time
import xml.etree.ElementTree
from ..compat import (
compat_cookiejar,
compat_http_client,
compat_urllib_error,
compat_urllib_parse_urlparse,
@@ -117,6 +118,7 @@ class InfoExtractor(object):
The following fields are optional:
alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
something like "4234987", title "Dancing naked mole rats",
@@ -128,7 +130,7 @@ class InfoExtractor(object):
* "resolution" (optional, string "{width}x{height"},
deprecated)
thumbnail: Full URL to a video thumbnail image.
description: One-line video description.
description: Full video description.
uploader: Full name of the video uploader.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
@@ -157,8 +159,8 @@ class InfoExtractor(object):
_type "playlist" indicates multiple videos.
There must be a key "entries", which is a list or a PagedList object, each
element of which is a valid dictionary under this specfication.
There must be a key "entries", which is a list, an iterable, or a PagedList
object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above).
@@ -173,9 +175,10 @@ class InfoExtractor(object):
_type "url" indicates that the video must be extracted from another
location, possibly by a different extractor. Its only required key is:
"url" - the next URL to extract.
Additionally, it may have properties believed to be identical to the
resolved entity, for example "title" if the title of the referred video is
The key "ie_key" can be set to the class name (minus the trailing "IE",
e.g. "Youtube") if the extractor class is known in advance.
Additionally, the dictionary may have any properties of the resolved entity
known in advance, for example "title" if the title of the referred video is
known ahead of time.
@@ -389,6 +392,10 @@ class InfoExtractor(object):
url_or_request, video_id, note, errnote, fatal=fatal)
if (not fatal) and json_string is False:
return None
return self._parse_json(
json_string, video_id, transform_source=transform_source, fatal=fatal)
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
if transform_source:
json_string = transform_source(json_string)
try:
@@ -438,7 +445,7 @@ class InfoExtractor(object):
return video_info
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None):
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
"""Returns a playlist"""
video_info = {'_type': 'playlist',
'entries': entries}
@@ -446,6 +453,8 @@ class InfoExtractor(object):
video_info['id'] = playlist_id
if playlist_title:
video_info['title'] = playlist_title
if playlist_description:
video_info['description'] = playlist_description
return video_info
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -789,6 +798,49 @@ class InfoExtractor(object):
self._sort_formats(formats)
return formats
# TODO: improve extraction
def _extract_smil_formats(self, smil_url, video_id):
smil = self._download_xml(
smil_url, video_id, 'Downloading SMIL file',
'Unable to download SMIL file')
base = smil.find('./head/meta').get('base')
formats = []
rtmp_count = 0
for video in smil.findall('./body/switch/video'):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
formats.append({
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
})
self._sort_formats(formats)
return formats
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()
@@ -817,6 +869,12 @@ class InfoExtractor(object):
self._downloader.report_warning(msg)
return res
def _set_cookie(self, domain, name, value, expire_time=None):
cookie = compat_cookiejar.Cookie(
0, name, value, None, None, domain, None,
None, '/', True, False, expire_time, '', None, None, None)
self._downloader.cookiejar.set_cookie(cookie)
class SearchInfoExtractor(InfoExtractor):
"""

View File

@@ -13,9 +13,10 @@ from ..compat import (
compat_urllib_request,
)
from ..utils import (
urlencode_postdata,
ExtractorError,
int_or_none,
limit_length,
urlencode_postdata,
)
@@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '637842556329505',
'ext': 'mp4',
'duration': 38,
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
}
}, {
@@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):
self._login()
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
webpage = self._download_webpage(url, video_id)
@@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):
'id': video_id,
'title': video_title,
'url': video_url,
'duration': int(video_data['video_duration']),
'thumbnail': video_data['thumbnail_src'],
'duration': int_or_none(video_data.get('video_duration')),
'thumbnail': video_data.get('thumbnail_src'),
}

View File

@@ -0,0 +1,48 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class FoxgayIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
_TEST = {
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
'md5': '80d72beab5d04e1655a56ad37afe6841',
'info_dict': {
'id': '2582',
'ext': 'mp4',
'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
'age_limit': 18,
'thumbnail': 're:https?://.*\.jpg$',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<title>(?P<title>.*?)</title>',
webpage, 'title', fatal=False)
description = self._html_search_regex(
r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
webpage, 'description', fatal=False)
# Find the URL for the iFrame which contains the actual video.
iframe = self._download_webpage(
self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
video_id)
video_url = self._html_search_regex(
r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
thumb_url = self._html_search_regex(
r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'description': description,
'thumbnail': thumb_url,
'age_limit': 18,
}

View File

@@ -0,0 +1,94 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
)
class FoxNewsIE(InfoExtractor):
_VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
{
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
'info_dict': {
'id': '3937480',
'ext': 'flv',
'title': 'Frozen in Time',
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
'duration': 265,
'timestamp': 1304411491,
'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
'md5': '5846c64a1ea05ec78175421b8323e2df',
'info_dict': {
'id': '3922535568001',
'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
'description': "Congressman discusses the president's executive action",
'duration': 292,
'timestamp': 1417662047,
'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
'only_matching': True,
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
item = video['channel']['item']
title = item['title']
description = item['description']
timestamp = parse_iso8601(item['dc-date'])
media_group = item['media-group']
duration = None
formats = []
for media in media_group['media-content']:
attributes = media['@attributes']
video_url = attributes['url']
if video_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
elif video_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
elif not video_url.endswith('.smil'):
duration = int_or_none(attributes.get('duration'))
formats.append({
'url': video_url,
'format_id': media['media-category']['@attributes']['label'],
'preference': 1,
'vbr': int_or_none(attributes.get('bitrate')),
'filesize': int_or_none(attributes.get('fileSize'))
})
self._sort_formats(formats)
media_thumbnail = media_group['media-thumbnail']['@attributes']
thumbnails = [{
'url': media_thumbnail['url'],
'width': int_or_none(media_thumbnail.get('width')),
'height': int_or_none(media_thumbnail.get('height')),
}] if media_thumbnail else []
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}

View File

@@ -467,8 +467,17 @@ class GenericIE(InfoExtractor):
'expected_warnings': [
'URL could be a direct video link, returning it as such.'
]
}
},
# Cinchcast embed
{
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
'info_dict': {
'id': '7141703',
'ext': 'mp3',
'upload_date': '20141126',
'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
}
},
]
def report_following_redirect(self, new_url):
@@ -962,6 +971,13 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS')
# Look for embedded Cinchcast player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Cinchcast')
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
webpage)

View File

@@ -17,7 +17,6 @@ class GoldenMoustacheIE(InfoExtractor):
'title': 'Suricate - Le Poker',
'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
}
}, {
'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
@@ -28,7 +27,6 @@ class GoldenMoustacheIE(InfoExtractor):
'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
'view_count': int,
}
}]
@@ -42,9 +40,6 @@ class GoldenMoustacheIE(InfoExtractor):
r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage)
view_count = int_or_none(self._html_search_regex(
r'<strong>([0-9]+)</strong>\s*VUES</span>',
webpage, 'view count', fatal=False))
return {
'id': video_id,
@@ -53,5 +48,4 @@ class GoldenMoustacheIE(InfoExtractor):
'title': title,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
}

View File

@@ -2,57 +2,52 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
)
from ..utils import (
compat_urlparse,
ExtractorError,
parse_duration,
)
class GoshgayIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
_VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
_TEST = {
'url': 'http://www.goshgay.com/video4116282',
'md5': '268b9f3c3229105c57859e166dd72b03',
'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
'md5': '027fcc54459dff0feb0bc06a7aeda680',
'info_dict': {
'id': '4116282',
'id': '299069',
'ext': 'flv',
'title': 'md5:089833a4790b5e103285a07337f245bf',
'thumbnail': 're:http://.*\.jpg',
'title': 'DIESEL SFW XXX Video',
'thumbnail': 're:^http://.*\.jpg$',
'duration': 79,
'age_limit': 18,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
title = self._html_search_regex(
r'<h2>(.*?)<', webpage, 'title')
duration = parse_duration(self._html_search_regex(
r'<span class="duration">\s*-?\s*(.*?)</span>',
webpage, 'duration', fatal=False))
family_friendly = self._html_search_meta(
'isFamilyFriendly', webpage, default='false')
config_url = self._search_regex(
r"'config'\s*:\s*'([^']+)'", webpage, 'config URL')
config = self._download_xml(
config_url, video_id, 'Downloading player config XML')
if config is None:
raise ExtractorError('Missing config XML')
if config.tag != 'config':
raise ExtractorError('Missing config attribute')
fns = config.findall('file')
if len(fns) < 1:
raise ExtractorError('Missing media URI')
video_url = fns[0].text
url_comp = compat_urlparse.urlparse(url)
ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
flashvars = compat_parse_qs(self._html_search_regex(
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
webpage, 'flashvars'))
thumbnail = flashvars.get('url_bigthumb', [None])[0]
video_url = flashvars['flv_url'][0]
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'http_referer': ref,
'duration': duration,
'age_limit': 0 if family_friendly == 'true' else 18,
}

View File

@@ -2,9 +2,8 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import js_to_json
class HelsinkiIE(InfoExtractor):
@@ -24,39 +23,21 @@ class HelsinkiIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
formats = []
mobj = re.search(r'file=((\w+):[^&]+)', webpage)
if mobj:
formats.append({
'ext': mobj.group(2),
'play_path': mobj.group(1),
'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
'player_url': 'http://video.helsinki.fi/player.swf',
'format_note': 'sd',
'quality': 0,
})
mobj = re.search(r'hd\.file=((\w+):[^&]+)', webpage)
if mobj:
formats.append({
'ext': mobj.group(2),
'play_path': mobj.group(1),
'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
'player_url': 'http://video.helsinki.fi/player.swf',
'format_note': 'hd',
'quality': 1,
})
params = self._parse_json(self._html_search_regex(
r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);',
webpage, 'player code'), video_id, transform_source=js_to_json)
formats = [{
'url': s['file'],
'ext': 'mp4',
} for s in params['sources']]
self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage).replace('Video: ', ''),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
}

View File

@@ -1,12 +1,12 @@
from __future__ import unicode_literals
import re
import json
import random
import string
from .common import InfoExtractor
from ..utils import find_xpath_attr
from ..utils import (
find_xpath_attr,
int_or_none,
js_to_json,
unescapeHTML,
)
class HowStuffWorksIE(InfoExtractor):
@@ -16,98 +16,74 @@ class HowStuffWorksIE(InfoExtractor):
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
'info_dict': {
'id': '450221',
'display_id': 'cool-jobs-iditarod-musher',
'ext': 'flv',
'title': 'Cool Jobs - Iditarod Musher',
'description': 'md5:82bb58438a88027b8186a1fccb365f90',
'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
'display_id': 'cool-jobs-iditarod-musher',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 161,
},
'params': {
# md5 is not consistent
'skip_download': True
}
},
{
'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
'info_dict': {
'id': '453464',
'display_id': 'survival-zone-food-and-water-in-the-savanna',
'ext': 'mp4',
'title': 'Survival Zone: Food and Water In the Savanna',
'description': 'md5:7e1c89f6411434970c15fa094170c371',
'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
'display_id': 'survival-zone-food-and-water-in-the-savanna',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
# md5 is not consistent
'skip_download': True
}
},
{
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
'info_dict': {
'id': '440011',
'display_id': 'sword-swallowing-1-by-dan-meyer',
'ext': 'flv',
'title': 'Sword Swallowing #1 by Dan Meyer',
'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
'display_id': 'sword-swallowing-1-by-dan-meyer',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
# md5 is not consistent
'skip_download': True
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
clip_js = self._search_regex(
r'(?s)var clip = ({.*?});', webpage, 'clip info')
clip_info = self._parse_json(
clip_js, display_id, transform_source=js_to_json)
content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
mp4 = self._search_regex(
r'''(?xs)var\s+clip\s*=\s*{\s*
.+?\s*
content_id\s*:\s*%s\s*,\s*
.+?\s*
mp4\s*:\s*\[(.*?),?\]\s*
};\s*
videoData\.push\(clip\);''' % content_id,
webpage, 'mp4', fatal=False, default=None)
smil = self._download_xml(
'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
content_id, 'Downloading video SMIL')
http_base = find_xpath_attr(
smil,
'./{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
'name',
'httpBase').get('content')
def random_string(str_len=0):
return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
video_id = clip_info['content_id']
formats = []
m3u8_url = clip_info.get('m3u8')
if m3u8_url:
formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
for video in clip_info.get('mp4', []):
formats.append({
'url': video['src'],
'format_id': video['bitrate'],
'vbr': int(video['bitrate'].rstrip('k')),
})
if not formats:
smil = self._download_xml(
'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id,
video_id, 'Downloading video SMIL')
http_base = find_xpath_attr(
smil,
'./{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
'name',
'httpBase').get('content')
URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'
if mp4:
for video in json.loads('[%s]' % mp4):
bitrate = video['bitrate']
fmt = {
'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
'format_id': bitrate,
}
m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
if m:
fmt['vbr'] = int(m.group('vbr'))
formats.append(fmt)
else:
for video in smil.findall(
'.//{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
vbr = int(video.attrib['system-bitrate']) / 1000
'./{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
vbr = int_or_none(video.attrib['system-bitrate'], scale=1000)
formats.append({
'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
'format_id': '%dk' % vbr,
@@ -116,19 +92,12 @@ class HowStuffWorksIE(InfoExtractor):
self._sort_formats(formats)
title = self._og_search_title(webpage)
TITLE_SUFFIX = ' : HowStuffWorks'
if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
return {
'id': content_id,
'id': '%s' % video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'title': unescapeHTML(clip_info['clip_title']),
'description': unescapeHTML(clip_info.get('caption')),
'thumbnail': clip_info.get('video_still_url'),
'duration': clip_info.get('duration'),
'formats': formats,
}

View File

@@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
int_or_none,
parse_duration,
parse_filesize,
)
class MinhatecaIE(InfoExtractor):
_VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
_TEST = {
'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
'info_dict': {
'id': '125848331',
'ext': 'mp4',
'title': 'youtube-dl test video',
'thumbnail': 're:^https?://.*\.jpg$',
'filesize_approx': 1530000,
'duration': 9,
'view_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
token = self._html_search_regex(
r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
webpage, 'request token')
token_data = [
('fileId', video_id),
('__RequestVerificationToken', token),
]
req = compat_urllib_request.Request(
'http://minhateca.com.br/action/License/Download',
data=compat_urllib_parse.urlencode(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
data = self._download_json(
req, video_id, note='Downloading metadata')
video_url = data['redirectUrl']
title_str = self._html_search_regex(
r'<h1.*?>(.*?)</h1>', webpage, 'title')
title, _, ext = title_str.rpartition('.')
filesize_approx = parse_filesize(self._html_search_regex(
r'<p class="fileSize">(.*?)</p>',
webpage, 'file size approximation', fatal=False))
duration = parse_duration(self._html_search_regex(
r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'<p class="downloadsCounter">([0-9]+)</p>',
webpage, 'view count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': ext,
'filesize_approx': filesize_approx,
'duration': duration,
'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@@ -70,7 +70,7 @@ class MixcloudIE(InfoExtractor):
raise ExtractorError('Unable to extract track url')
PREFIX = (
r'<div class="cloudcast-play-button-container[^"]*?"'
r'<span class="play-button[^"]*?"'
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
title = self._html_search_regex(
PREFIX + r'm-title="([^"]+)"', webpage, 'title')

View File

@@ -1,3 +1,4 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
@@ -7,6 +8,7 @@ from .common import InfoExtractor
from ..compat import (
compat_str,
)
from ..utils import ExtractorError
class MySpaceIE(InfoExtractor):
@@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor):
_TESTS = [
{
'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
'info_dict': {
'id': '100008689',
'id': '109594919',
'ext': 'flv',
'title': 'Viva La Vida',
'description': 'The official Viva La Vida video, directed by Hype Williams',
'uploader': 'Coldplay',
'uploader_id': 'coldplay',
'title': 'Little Big Town',
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
'uploader': 'Five Minutes to the Stage',
'uploader_id': 'fiveminutestothestage',
},
'params': {
# rtmp download
'skip_download': True,
},
},
# song
# songs
{
'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
'info_dict': {
'id': '39008454',
'id': '93388656',
'ext': 'flv',
'title': 'Darkness In My Heart',
'uploader_id': 'spiderbags',
'title': 'Of weakened soul...',
'uploader': 'Killsorrow',
'uploader_id': 'killsorrow',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'add_ie': ['Vevo'],
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
'info_dict': {
'id': 'USZM20600099',
'ext': 'mp4',
'title': 'Animal I Have Become',
'uploader': 'Three Days Grace',
'timestamp': int,
'upload_date': '20060502',
},
'skip': 'VEVO is only available in some countries',
}, {
'add_ie': ['Youtube'],
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
'info_dict': {
'id': 'ypWvQgnJrSU',
'ext': 'mp4',
'title': 'Starset - First Light',
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
'uploader': 'Jacob Soren',
'uploader_id': 'SorenPromotions',
'upload_date': '20140725',
}
},
]
@@ -48,16 +75,41 @@ class MySpaceIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'playerSwf":"([^"?]*)', webpage, 'player URL')
if mobj.group('mediatype').startswith('music/song'):
# songs don't store any useful info in the 'context' variable
song_data = self._search_regex(
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
webpage, 'song_data', default=None, group=0)
if song_data is None:
# some songs in an album are not playable
self.report_warning(
'%s: No downloadable song on this page' % video_id)
return
def search_data(name):
return self._search_regex(
r'data-%s="(.*?)"' % name, webpage, name)
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
song_data, name, default='', group='data')
streamUrl = search_data('stream-url')
if not streamUrl:
vevo_id = search_data('vevo-id')
youtube_id = search_data('youtube-id')
if vevo_id:
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
elif youtube_id:
self.to_screen('Youtube video detected: %s' % youtube_id)
return self.url_result(youtube_id, ie='Youtube')
else:
raise ExtractorError(
'Found song but don\'t know how to download it')
info = {
'id': video_id,
'title': self._og_search_title(webpage),
'uploader': search_data('artist-name'),
'uploader_id': search_data('artist-username'),
'thumbnail': self._og_search_thumbnail(webpage),
}
@@ -79,6 +131,50 @@ class MySpaceIE(InfoExtractor):
info.update({
'url': rtmp_url,
'play_path': play_path,
'player_url': player_url,
'ext': 'flv',
})
return info
class MySpaceAlbumIE(InfoExtractor):
IE_NAME = 'MySpace:album'
_VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
_TESTS = [{
'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
'info_dict': {
'title': 'Transmissions',
'id': '19455773',
},
'playlist_count': 14,
'skip': 'this album is only available in some countries',
}, {
'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
'info_dict': {
'title': 'The Demo',
'id': '18596029',
},
'playlist_count': 5,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
display_id = mobj.group('title') + playlist_id
webpage = self._download_webpage(url, display_id)
tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
if not tracks_paths:
raise ExtractorError(
'%s: No songs found, try using proxy' % display_id,
expected=True)
entries = [
self.url_result(t_path, ie=MySpaceIE.ie_key())
for t_path in tracks_paths]
return {
'_type': 'playlist',
'id': playlist_id,
'display_id': display_id,
'title': self._og_search_title(webpage),
'entries': entries,
}

View File

@@ -0,0 +1,29 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class MyVidsterIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
_TEST = {
'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
'md5': '95296d0231c1363222c3441af62dc4ca',
'info_dict': {
'id': '3685814',
'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
'upload_date': '20141027',
'uploader_id': 'utkualp',
'ext': 'mp4',
'age_limit': 18,
},
'add_ie': ['XHamster'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return self.url_result(self._html_search_regex(
r'rel="videolink" href="(?P<real_url>.*)">',
webpage, 'real video url'))

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
remove_end,
@@ -10,8 +8,8 @@ from ..utils import (
class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
_TEST = {
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
'info_dict': {
@@ -21,12 +19,13 @@ class NBAIE(InfoExtractor):
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
},
}
}, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
@@ -37,7 +36,7 @@ class NBAIE(InfoExtractor):
description = self._og_search_description(webpage)
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration', fatal=False))
self._html_search_meta('duration', webpage, 'duration'))
return {
'id': shortened_video_id,

View File

@@ -2,11 +2,13 @@ from __future__ import unicode_literals
import re
import json
import os
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_urllib_parse,
compat_urllib_parse_urlparse
)
from ..utils import (
unified_strdate,
@@ -24,9 +26,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
initial_video_url = info['publishPoint']
if info['formats'] == '1':
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
filename, ext = os.path.splitext(parsed_url.path)
path = '%s_sd%s' % (filename, ext)
data = compat_urllib_parse.urlencode({
'type': 'fvod',
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_doc = self._download_xml(
@@ -73,6 +78,17 @@ class NHLIE(NHLBaseInfoExtractor):
'duration': 0,
'upload_date': '20141011',
},
}, {
'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
'md5': 'c78fc64ea01777e426cfc202b746c825',
'info_dict': {
'id': '58665',
'ext': 'flv',
'title': 'Classic Game In Six - April 22, 1979',
'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
'duration': 400,
'upload_date': '20100129'
},
}, {
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
'only_matching': True,
@@ -90,7 +106,7 @@ class NHLIE(NHLBaseInfoExtractor):
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:videocenter'
IE_DESC = 'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
_TEST = {
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
'info_dict': {

View File

@@ -20,6 +20,7 @@ class NocoIE(InfoExtractor):
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
_LOGIN_URL = 'http://noco.tv/do.php'
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
_NETRC_MACHINE = 'noco'
_TEST = {
@@ -60,10 +61,12 @@ class NocoIE(InfoExtractor):
if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
def _call_api(self, path, video_id, note):
def _call_api(self, path, video_id, note, sub_lang=None):
ts = compat_str(int(time.time() * 1000))
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
url = self._API_URL_TEMPLATE % (path, ts, tk)
if sub_lang:
url += self._SUB_LANG_TEMPLATE % sub_lang
resp = self._download_json(url, video_id, note)
@@ -91,31 +94,34 @@ class NocoIE(InfoExtractor):
formats = []
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
for lang, lang_dict in medias['fr']['video_list'].items():
for format_id, fmt in lang_dict['quality_list'].items():
format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
video = self._call_api(
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
video_id, 'Downloading %s video JSON' % format_id)
video = self._call_api(
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
video_id, 'Downloading %s video JSON' % format_id_extended,
lang if lang != 'none' else None)
file_url = video['file']
if not file_url:
continue
file_url = video['file']
if not file_url:
continue
if file_url in ['forbidden', 'not found']:
popmessage = video['popmessage']
self._raise_error(popmessage['title'], popmessage['message'])
if file_url in ['forbidden', 'not found']:
popmessage = video['popmessage']
self._raise_error(popmessage['title'], popmessage['message'])
formats.append({
'url': file_url,
'format_id': format_id,
'width': fmt['res_width'],
'height': fmt['res_lines'],
'abr': fmt['audiobitrate'],
'vbr': fmt['videobitrate'],
'filesize': fmt['filesize'],
'format_note': qualities[format_id]['quality_name'],
'preference': qualities[format_id]['priority'],
})
formats.append({
'url': file_url,
'format_id': format_id_extended,
'width': fmt['res_width'],
'height': fmt['res_lines'],
'abr': fmt['audiobitrate'],
'vbr': fmt['videobitrate'],
'filesize': fmt['filesize'],
'format_note': qualities[format_id]['quality_name'],
'preference': qualities[format_id]['priority'],
})
self._sort_formats(formats)

View File

@@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):
'rtmp_conn': 'B:1',
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
'page_url': 'http://www.ntv.ru',
'flash_ver': 'LNX 11,2,202,341',
'flash_version': 'LNX 11,2,202,341',
'rtmp_live': True,
'ext': 'flv',
'filesize': int(size.text),

View File

@@ -4,6 +4,8 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
compat_urllib_parse,
)
@@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
m_error = re.search(
r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
if m_error:
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
video_title = None
duration = None
video_thumbnail = None

View File

@@ -8,7 +8,6 @@ from ..utils import (
int_or_none,
js_to_json,
qualities,
determine_ext,
)
@@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor):
thumbnail = self._search_regex(
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
quality = qualities(['SD', 'HD'])
formats = [{
'url': source['file'],
'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])),
'quality': quality(source['label']),
} for source in json.loads(js_to_json(self._search_regex(
r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))]
quality = qualities(['sd', 'hd'])
sources = json.loads(js_to_json(self._search_regex(
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
formats = []
for container, s in sources.items():
for qname, video_url in s.items():
formats.append({
'url': video_url,
'container': container,
'format_id': '%s-%s' % (container, qname),
'quality': quality(qname),
})
self._sort_formats(formats)
return {

View File

@@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4',
'title': 'Im Interview: Kai Wiesinger',
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
'upload_date': '20140225',
'upload_date': '20140203',
'duration': 522.56,
},
'params': {
@@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4',
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
'upload_date': '20140225',
'upload_date': '20141014',
'duration': 2410.44,
},
'params': {
@@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):
'skip_download': True,
},
},
{
'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
'info_dict': {
'id': '439664',
'title': 'Episode 8 - Ganze Folge - Playlist',
'description': 'md5:63b8963e71f481782aeea877658dec84',
},
'playlist_count': 2,
},
]
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
]
_TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
@@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
]
_PAGE_TYPE_REGEXES = [
r'<meta name="page_type" content="([^"]+)">',
r"'itemType'\s*:\s*'([^']*)'",
]
_PLAYLIST_ID_REGEXES = [
r'content[iI]d=(\d+)',
r"'itemId'\s*:\s*'([^']*)'",
]
_PLAYLIST_CLIP_REGEXES = [
r'(?s)data-qvt=.+?<a href="([^"]+)"',
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'testclient'
@@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):
'duration': duration,
'formats': formats,
}
def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex(
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
for regex in self._PLAYLIST_CLIP_REGEXES:
playlist_clips = re.findall(regex, webpage)
if playlist_clips:
title = self._html_search_regex(
self._TITLE_REGEXES, webpage, 'title')
description = self._html_search_regex(
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
entries = [
self.url_result(
re.match('(.+?//.+?)/', url).group(1) + clip_path,
'ProSiebenSat1')
for clip_path in playlist_clips]
return self.playlist_result(entries, playlist_id, title, description)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
page_type = self._search_regex(
self._PAGE_TYPE_REGEXES, webpage,
'page type', default='clip').lower()
if page_type == 'clip':
return self._extract_clip(url, webpage)
elif page_type == 'playlist':
return self._extract_playlist(url, webpage)

View File

@@ -0,0 +1,55 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
class RadioDeIE(InfoExtractor):
IE_NAME = 'radio.de'
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
_TEST = {
'url': 'http://ndr2.radio.de/',
'md5': '3b4cdd011bc59174596b6145cda474a4',
'info_dict': {
'id': 'ndr2',
'ext': 'mp3',
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:591c49c702db1a33751625ebfb67f273',
'thumbnail': 're:^https?://.*\.png',
},
'params': {
'skip_download': True,
}
}
def _real_extract(self, url):
radio_id = self._match_id(url)
webpage = self._download_webpage(url, radio_id)
broadcast = json.loads(self._search_regex(
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
webpage, 'broadcast'))
title = self._live_title(broadcast['name'])
description = broadcast.get('description') or broadcast.get('shortDescription')
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
formats = [{
'url': stream['streamUrl'],
'ext': stream['streamContentFormat'].lower(),
'acodec': stream['streamContentFormat'],
'abr': stream['bitRate'],
'asr': stream['sampleRate']
} for stream in broadcast['streamUrls']]
self._sort_formats(formats)
return {
'id': radio_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'is_live': True,
'formats': formats,
}

View File

@@ -15,7 +15,7 @@ from ..utils import (
class RTSIE(InfoExtractor):
IE_DESC = 'RTS.ch'
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
_VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))'
_TESTS = [
{
@@ -23,6 +23,7 @@ class RTSIE(InfoExtractor):
'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': {
'id': '3449373',
'display_id': 'les-enfants-terribles',
'ext': 'mp4',
'duration': 1488,
'title': 'Les Enfants Terribles',
@@ -30,7 +31,8 @@ class RTSIE(InfoExtractor):
'uploader': 'Divers',
'upload_date': '19680921',
'timestamp': -40280400,
'thumbnail': 're:^https?://.*\.image'
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
},
{
@@ -38,6 +40,7 @@ class RTSIE(InfoExtractor):
'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
'info_dict': {
'id': '5624067',
'display_id': 'entre-ciel-et-mer',
'ext': 'mp4',
'duration': 3720,
'title': 'Les yeux dans les cieux - Mon homard au Canada',
@@ -45,7 +48,8 @@ class RTSIE(InfoExtractor):
'uploader': 'Passe-moi les jumelles',
'upload_date': '20140404',
'timestamp': 1396635300,
'thumbnail': 're:^https?://.*\.image'
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
},
{
@@ -53,6 +57,7 @@ class RTSIE(InfoExtractor):
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
'info_dict': {
'id': '5745975',
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
'ext': 'mp4',
'duration': 48,
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
@@ -60,7 +65,8 @@ class RTSIE(InfoExtractor):
'uploader': 'Hockey',
'upload_date': '20140403',
'timestamp': 1396556882,
'thumbnail': 're:^https?://.*\.image'
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
'skip': 'Blocked outside Switzerland',
},
@@ -69,6 +75,7 @@ class RTSIE(InfoExtractor):
'md5': '9bb06503773c07ce83d3cbd793cebb91',
'info_dict': {
'id': '5745356',
'display_id': 'londres-cachee-par-un-epais-smog',
'ext': 'mp4',
'duration': 33,
'title': 'Londres cachée par un épais smog',
@@ -76,7 +83,8 @@ class RTSIE(InfoExtractor):
'uploader': 'Le Journal en continu',
'upload_date': '20140403',
'timestamp': 1396537322,
'thumbnail': 're:^https?://.*\.image'
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
},
{
@@ -84,6 +92,7 @@ class RTSIE(InfoExtractor):
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
'info_dict': {
'id': '5706148',
'display_id': 'urban-hippie-de-damien-krisl-03-04-2014',
'ext': 'mp3',
'duration': 123,
'title': '"Urban Hippie", de Damien Krisl',
@@ -92,22 +101,44 @@ class RTSIE(InfoExtractor):
'timestamp': 1396551600,
},
},
{
'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
'md5': '968777c8779e5aa2434be96c54e19743',
'info_dict': {
'id': '6348260',
'display_id': 'le-19h30',
'ext': 'mp4',
'duration': 1796,
'title': 'Le 19h30',
'description': '',
'uploader': 'Le 19h30',
'upload_date': '20141201',
'timestamp': 1417458600,
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
},
{
'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280',
'only_matching': True,
}
]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
video_id = m.group('id') or m.group('id_new')
display_id = m.group('display_id') or m.group('display_id_new')
def download_json(internal_id):
return self._download_json(
'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
video_id)
display_id)
all_info = download_json(video_id)
# video_id extracted out of URL is not always a real id
if 'video' not in all_info and 'audio' not in all_info:
page = self._download_webpage(url, video_id)
page = self._download_webpage(url, display_id)
internal_id = self._html_search_regex(
r'<(?:video|audio) data-id="([0-9]+)"', page,
'internal video id')
@@ -143,6 +174,7 @@ class RTSIE(InfoExtractor):
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'title': info['title'],
'description': info.get('intro'),

View File

@@ -53,6 +53,7 @@ class RutubeIE(InfoExtractor):
m3u8_url = options['video_balancer'].get('m3u8')
if m3u8_url is None:
raise ExtractorError('Couldn\'t find m3u8 manifest url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
return {
'id': video['id'],
@@ -60,8 +61,7 @@ class RutubeIE(InfoExtractor):
'description': video['description'],
'duration': video['duration'],
'view_count': video['hits'],
'url': m3u8_url,
'ext': 'mp4',
'formats': formats,
'thumbnail': video['thumbnail_url'],
'uploader': author.get('name'),
'uploader_id': compat_str(author['id']) if author else None,

View File

@@ -5,61 +5,27 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
unified_strdate,
)
class CinemassacreIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
_TESTS = [
{
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'md5': 'fde81fbafaee331785f58cd6c0d46190',
'info_dict': {
'id': '19911',
'ext': 'mp4',
'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': {
'id': '521be8ef82b16',
'ext': 'mp4',
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
},
}
]
class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
_TESTS = [{
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
video_id = mobj.group('video_id')
full_video_id = mobj.group('full_video_id')
video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
video_id = self._match_id(url)
playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
vidtitle = self._search_regex(
r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
vidurl = self._search_regex(
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
videolist_url = None
@@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor):
if mobj:
videoserver = mobj.group('videoserver')
mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
vidid = mobj.group('vidid') if mobj else full_video_id
vidid = mobj.group('vidid') if mobj else video_id
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
else:
mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
@@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor):
file_ = src.partition(':')[-1]
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
bitrate = int_or_none(video.get('system-bitrate'))
bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
format = {
'url': baseurl + file_,
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
}
if width or height:
format.update({
'tbr': bitrate // 1000 if bitrate else None,
'tbr': bitrate,
'width': width,
'height': height,
})
else:
format.update({
'abr': bitrate // 1000 if bitrate else None,
'abr': bitrate,
'vcodec': 'none',
})
formats.append(format)
self._sort_formats(formats)
else:
formats = [{
'url': vidurl,
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'title': vidtitle,
'formats': formats,
}
class CinemassacreIE(InfoExtractor):
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
_TESTS = [
{
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'md5': 'fde81fbafaee331785f58cd6c0d46190',
'info_dict': {
'id': 'Cinemassacre-19911',
'ext': 'mp4',
'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': {
'id': 'Cinemassacre-521be8ef82b16',
'ext': 'mp4',
'upload_date': '20131002',
'title': 'The Mummys Hand (1940)',
},
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
webpage = self._download_webpage(url, display_id)
playerdata_url = self._search_regex(
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
webpage, 'player data URL')
video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
video_description = self._html_search_regex(
r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}
class TeamFourIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
_TEST = {
'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
'info_dict': {
'id': 'TeamFourStar-5292a02f20bfa',
'ext': 'mp4',
'upload_date': '20130401',
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
'title': 'A Moment With TFS Episode 4',
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
playerdata_url = self._search_regex(
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
webpage, 'player data URL')
video_title = self._html_search_regex(
r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
webpage, 'title')
video_date = unified_strdate(self._html_search_regex(
r'<div class="heroheadingdate">(?P<date>.+?)</div>',
webpage, 'date', fatal=False))
video_description = self._html_search_regex(
r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
webpage, 'description', fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}

View File

@@ -39,7 +39,7 @@ class SlideshareIE(InfoExtractor):
ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex(
r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
'description', fatal=False)
return {

View File

@@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor):
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
raise ExtractorError(
'Broadcast %s does not exist' % broadcast_id, expected=True)
# Adult content
if re.search('EroConfirmText">', broadcast_page) is not None:
(username, password) = self._get_login_info()
if username is None:
raise ExtractorError('Erotic broadcasts allowed only for registered users, '
'use --username and --password options to provide account credentials.', expected=True)
raise ExtractorError(
'Erotic broadcasts allowed only for registered users, '
'use --username and --password options to provide account credentials.',
expected=True)
login_form = {
'login-hint53': '1',
@@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor):
'password': password,
}
request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
request = compat_urllib_request.Request(
broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
broadcast_page = self._download_webpage(
request, broadcast_id, 'Logging in and confirming age')
if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
raise ExtractorError('Unable to log in: bad username or password', expected=True)
@@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor):
adult_content = False
ticket = self._html_search_regex(
'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
broadcast_page, 'broadcast ticket')
url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
@@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor):
if broadcast_password:
url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
broadcast_json_page = self._download_webpage(
url, broadcast_id, 'Downloading broadcast JSON')
try:
broadcast_json = json.loads(broadcast_json_page)
protected_broadcast = broadcast_json['_pass_protected'] == 1
if protected_broadcast and not broadcast_password:
raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
raise ExtractorError(
'This broadcast is protected by a password, use the --video-password option',
expected=True)
broadcast_offline = broadcast_json['is_play'] == 0
if broadcast_offline:
raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
rtmp_url = broadcast_json['_server']
if not rtmp_url.startswith('rtmp://'):
mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
if not mobj:
raise ExtractorError('Unexpected broadcast rtmp URL')
broadcast_playpath = broadcast_json['_streamName']
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
broadcast_thumbnail = broadcast_json['_imgURL']
broadcast_title = broadcast_json['title']
broadcast_title = self._live_title(broadcast_json['title'])
broadcast_description = broadcast_json['description']
broadcaster_nick = broadcast_json['nick']
broadcaster_login = broadcast_json['login']
@@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor):
'age_limit': 18 if adult_content else 0,
'ext': 'flv',
'play_path': broadcast_playpath,
'player_url': 'http://pics.smotri.com/broadcast_play.swf',
'app': broadcast_app,
'rtmp_live': True,
'rtmp_conn': rtmp_conn
'rtmp_conn': rtmp_conn,
'is_live': True,
}

View File

@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
(?:/?\?secret_token=(?P<secret_token>[^&]+?))?$)
(?:/?\?secret_token=(?P<secret_token>[^&]+))?)
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
)
'''

View File

@@ -4,10 +4,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import parse_filesize
class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
@@ -19,6 +20,16 @@ class TagesschauIE(InfoExtractor):
'description': 'md5:69da3c61275b426426d711bde96463ab',
'thumbnail': 're:^http:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
'md5': '3c54c1f6243d279b706bde660ceec633',
'info_dict': {
'id': '5727',
'ext': 'mp4',
'description': 'md5:695c01bfd98b7e313c501386327aea59',
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
'thumbnail': 're:^http:.*\.jpg$',
}
}]
_FORMATS = {
@@ -28,42 +39,82 @@ class TagesschauIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if video_id.startswith('-'):
display_id = video_id.strip('-')
else:
display_id = video_id
video_id = self._match_id(url)
display_id = video_id.lstrip('-')
webpage = self._download_webpage(url, display_id)
playerpage = self._download_webpage(
'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
display_id, 'Downloading player page')
player_url = self._html_search_meta(
'twitter:player', webpage, 'player URL', default=None)
if player_url:
playerpage = self._download_webpage(
player_url, display_id, 'Downloading player page')
medias = re.findall(
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
playerpage)
formats = []
for url, ext, res in medias:
f = {
'format_id': res + '_' + ext,
'url': url,
'ext': ext,
}
f.update(self._FORMATS.get(res, {}))
formats.append(f)
medias = re.findall(
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
playerpage)
formats = []
for url, ext, res in medias:
f = {
'format_id': res + '_' + ext,
'url': url,
'ext': ext,
}
f.update(self._FORMATS.get(res, {}))
formats.append(f)
thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
title = self._og_search_title(webpage).strip()
description = self._og_search_description(webpage).strip()
else:
download_text = self._search_regex(
r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
webpage, 'download links')
links = re.finditer(
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
download_text)
formats = []
for l in links:
format_id = self._search_regex(
r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
format = {
'format_id': format_id,
'url': l.group('url'),
'format_name': l.group('name'),
}
m = re.match(
r'''(?x)
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
(?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
(?P<vbr>[0-9]+)kbps&\#10;
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
l.group('title'))
if m:
format.update({
'format_note': m.group('audio_desc'),
'vcodec': m.group('vcodec'),
'width': int(m.group('width')),
'height': int(m.group('height')),
'abr': int(m.group('abr')),
'vbr': int(m.group('vbr')),
'filesize_approx': parse_filesize(m.group('filesize_approx')),
})
formats.append(format)
thumbnail_fn = self._search_regex(
r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
description = self._html_search_regex(
r'(?s)<p class="teasertext">(.*?)</p>',
webpage, 'description', fatal=False)
title = self._html_search_regex(
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
self._sort_formats(formats)
thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
return {
'id': display_id,
'title': self._og_search_title(webpage).strip(),
'thumbnail': 'http://www.tagesschau.de' + thumbnail,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
'description': self._og_search_description(webpage).strip(),
'description': description,
}

View File

@@ -199,8 +199,9 @@ class TEDIE(SubtitlesInfoExtractor):
webpage = self._download_webpage(url, name)
config_json = self._html_search_regex(
r"data-config='([^']+)", webpage, 'config')
config = json.loads(config_json)
r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
webpage, 'config')
config = json.loads(config_json)['config']
video_url = config['video']['url']
thumbnail = config.get('image', {}).get('url')

View File

@@ -19,6 +19,7 @@ class TuneInIE(InfoExtractor):
|tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
)
'''
_API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
_INFO_DICT = {
'id': '34682',
@@ -56,13 +57,10 @@ class TuneInIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
station_id = mobj.group('id')
webpage = self._download_webpage(
url, station_id, note='Downloading station webpage')
station_info = self._download_json(
self._API_URL_TEMPLATE.format(station_id),
station_id, note='Downloading station JSON')
payload = self._html_search_regex(
r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
json_data = json.loads(payload)
station_info = json_data['Station']['broadcast']
title = station_info['Title']
thumbnail = station_info.get('Logo')
location = station_info.get('Location')

View File

@@ -1,32 +1,30 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,
str_to_int,
parse_age_limit,
)
class TvigleIE(InfoExtractor):
IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru'
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
_TESTS = [
{
'url': 'http://www.tvigle.ru/video/brat/',
'md5': 'ff4344a4894b0524441fb6f8218dc716',
'url': 'http://www.tvigle.ru/video/sokrat/',
'md5': '36514aed3657d4f70b4b2cef8eb520cd',
'info_dict': {
'id': '5118490',
'display_id': 'brat',
'ext': 'mp4',
'title': 'Брат',
'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb',
'duration': 5722.6,
'age_limit': 16,
'id': '1848932',
'display_id': 'sokrat',
'ext': 'flv',
'title': 'Сократ',
'description': 'md5:a05bd01be310074d5833efc6743be95e',
'duration': 6586,
'age_limit': 0,
},
},
{
@@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
@@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor):
title = item['title']
description = item['description']
thumbnail = item['thumbnail']
duration = float_or_none(item['durationMilliseconds'], 1000)
age_limit = str_to_int(item['ageRestrictions'])
duration = float_or_none(item.get('durationMilliseconds'), 1000)
age_limit = parse_age_limit(item.get('ageRestrictions'))
formats = []
for vcodec, fmts in item['videos'].items():

View File

@@ -6,7 +6,6 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
parse_iso8601,
qualities,
)
@@ -182,8 +181,8 @@ class TVPlayIE(InfoExtractor):
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
if video['is_geo_blocked']:
raise ExtractorError(
'This content is not available in your country due to copyright reasons', expected=True)
self.report_warning(
'This content might not be available in your country due to copyright reasons')
streams = self._download_json(
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')

View File

@@ -0,0 +1,109 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
)
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
_VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.24video.net/video/view/1044982',
'md5': '48dd7646775690a80447a8dca6a2df76',
'info_dict': {
'id': '1044982',
'ext': 'mp4',
'title': 'Эротика каменного века',
'description': 'Как смотрели порно в каменном веке.',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'SUPERTELO',
'duration': 31,
'timestamp': 1275937857,
'upload_date': '20100607',
'age_limit': 18,
'like_count': int,
'dislike_count': int,
},
},
{
'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
'only_matching': True,
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.24video.net/video/view/%s' % video_id, video_id)
title = self._og_search_title(webpage)
description = self._html_search_regex(
r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
duration = int_or_none(self._og_search_property(
'duration', webpage, 'duration', fatal=False))
timestamp = parse_iso8601(self._search_regex(
r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
webpage, 'upload date'))
uploader = self._html_search_regex(
r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>',
webpage, 'uploader', fatal=False)
view_count = int_or_none(self._html_search_regex(
r'<span class="video-views">(\d+) просмотр',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._html_search_regex(
r'<div class="comments-title" id="comments-count">(\d+) комментари',
webpage, 'comment count', fatal=False))
formats = []
pc_video = self._download_xml(
'http://www.24video.net/video/xml/%s?mode=play' % video_id,
video_id, 'Downloading PC video URL').find('.//video')
formats.append({
'url': pc_video.attrib['url'],
'format_id': 'pc',
'quality': 1,
})
like_count = int_or_none(pc_video.get('ratingPlus'))
dislike_count = int_or_none(pc_video.get('ratingMinus'))
age_limit = 18 if pc_video.get('adult') == 'true' else 0
mobile_video = self._download_xml(
'http://www.24video.net/video/xml/%s' % video_id,
video_id, 'Downloading mobile video URL').find('.//video')
formats.append({
'url': mobile_video.attrib['url'],
'format_id': 'mobile',
'quality': 0,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'comment_count': comment_count,
'like_count': like_count,
'dislike_count': dislike_count,
'age_limit': age_limit,
'formats': formats,
}

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
@@ -5,6 +6,8 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
parse_iso8601,
)
@@ -24,6 +27,7 @@ class TwitchIE(InfoExtractor):
"""
_PAGE_LIMIT = 100
_API_BASE = 'https://api.twitch.tv'
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
_TESTS = [{
'url': 'http://www.twitch.tv/riotgames/b/577357806',
'info_dict': {
@@ -109,6 +113,44 @@ class TwitchIE(InfoExtractor):
'view_count': info['views'],
}
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
authenticity_token = self._search_regex(
r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
login_page, 'authenticity token')
login_form = {
'utf8': ''.encode('utf-8'),
'authenticity_token': authenticity_token,
'redirect_on_login': '',
'embed_form': 'false',
'mp_source_action': '',
'follow': '',
'user[login]': username,
'user[password]': password,
}
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
m = re.search(
r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
if m:
raise ExtractorError(
'Unable to login: %s' % m.group('msg').strip(), expected=True)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('chapterid'):

View File

@@ -97,11 +97,8 @@ class UdemyIE(InfoExtractor):
if 'returnUrl' not in response:
raise ExtractorError('Unable to log in')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
lecture_id = mobj.group('id')
lecture_id = self._match_id(url)
lecture = self._download_json(
'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id,

View File

@@ -13,7 +13,7 @@ from ..utils import (
class VevoIE(InfoExtractor):
"""
Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE)
(currently used by MTVIE and MySpaceIE)
"""
_VALID_URL = r'''(?x)
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|

View File

@@ -17,7 +17,7 @@ class VGTVIE(InfoExtractor):
'info_dict': {
'id': '84196',
'ext': 'mp4',
'title': 'Hevnen er søt episode 1:10 - Abu',
'title': 'Hevnen er søt: Episode 10 - Abu',
'description': 'md5:e25e4badb5f544b04341e14abdc72234',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 648.000,
@@ -35,7 +35,7 @@ class VGTVIE(InfoExtractor):
'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 9056.000,
'duration': 9103.0,
'timestamp': 1410113864,
'upload_date': '20140907',
'view_count': int,

View File

@@ -17,6 +17,7 @@ class VineIE(InfoExtractor):
'id': 'b9KOOWX7HUx',
'ext': 'mp4',
'title': 'Chicken.',
'alt_title': 'Vine by Jack Dorsey',
'description': 'Chicken.',
'upload_date': '20130519',
'uploader': 'Jack Dorsey',
@@ -25,30 +26,26 @@ class VineIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
data = json.loads(self._html_search_regex(
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
formats = [
{
'url': data['videoLowURL'],
'ext': 'mp4',
'format_id': 'low',
},
{
'url': data['videoUrl'],
'ext': 'mp4',
'format_id': 'standard',
}
]
formats = [{
'url': data['videoLowURL'],
'ext': 'mp4',
'format_id': 'low',
}, {
'url': data['videoUrl'],
'ext': 'mp4',
'format_id': 'standard',
}]
return {
'id': video_id,
'title': self._og_search_title(webpage),
'alt_title': self._og_search_description(webpage),
'description': data['description'],
'thumbnail': data['thumbnailUrl'],
'upload_date': unified_strdate(data['created']),
@@ -63,29 +60,36 @@ class VineIE(InfoExtractor):
class VineUserIE(InfoExtractor):
IE_NAME = 'vine:user'
_VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
_VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
_VINE_BASE_URL = "https://vine.co/"
_TEST = {
'url': 'https://vine.co/Visa',
'info_dict': {
'id': 'Visa',
_TESTS = [
{
'url': 'https://vine.co/Visa',
'info_dict': {
'id': 'Visa',
},
'playlist_mincount': 46,
},
'playlist_mincount': 46,
}
{
'url': 'https://vine.co/u/941705360593584128',
'only_matching': True,
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
u = mobj.group('u')
profile_url = "%sapi/users/profiles/vanity/%s" % (
self._VINE_BASE_URL, user)
profile_url = "%sapi/users/profiles/%s%s" % (
self._VINE_BASE_URL, 'vanity/' if not u else '', user)
profile_data = self._download_json(
profile_url, user, note='Downloading user profile data')
user_id = profile_data['data']['userId']
timeline_data = []
for pagenum in itertools.count(1):
timeline_url = "%sapi/timelines/users/%s?page=%s" % (
timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (
self._VINE_BASE_URL, user_id, pagenum)
timeline_page = self._download_json(
timeline_url, user, note='Downloading page %d' % pagenum)

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_chr,
@@ -25,6 +27,7 @@ class XMinusIE(InfoExtractor):
'tbr': 320,
'filesize_approx': 5900000,
'view_count': int,
'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
}
}
@@ -48,9 +51,14 @@ class XMinusIE(InfoExtractor):
view_count = int_or_none(self._html_search_regex(
r'<div class="quality.*?► ([0-9]+)',
webpage, 'view count', fatal=False))
description = self._html_search_regex(
r'(?s)<div id="song_texts">(.*?)</div><br',
webpage, 'song lyrics', fatal=False)
if description:
description = re.sub(' *\r *', '\n', description)
enc_token = self._html_search_regex(
r'data-mt="(.*?)"', webpage, 'enc_token')
r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
token = ''.join(
c if pos == 3 else compat_chr(compat_ord(c) - 1)
for pos, c in enumerate(reversed(enc_token)))
@@ -64,4 +72,5 @@ class XMinusIE(InfoExtractor):
'filesize_approx': filesize_approx,
'tbr': tbr,
'view_count': view_count,
'description': description,
}

View File

@@ -45,7 +45,9 @@ class YouPornIE(InfoExtractor):
age_limit = self._rta_search(webpage)
# Get JSON parameters
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
json_params = self._search_regex(
r'var currentVideo = new Video\((.*)\)[,;]',
webpage, 'JSON parameters')
try:
params = json.loads(json_params)
except:

View File

@@ -7,29 +7,31 @@ import itertools
import json
import os.path
import re
import time
import traceback
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..utils import (
from ..compat import (
compat_chr,
compat_parse_qs,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
)
from ..utils import (
clean_html,
get_element_by_id,
get_element_by_attribute,
ExtractorError,
get_element_by_attribute,
get_element_by_id,
int_or_none,
OnDemandPagedList,
orderedSet,
unescapeHTML,
unified_strdate,
orderedSet,
uppercase_escape,
)
@@ -38,17 +40,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
def _set_language(self):
return bool(self._download_webpage(
self._LANG_URL, None,
note='Setting language', errnote='unable to set language',
fatal=False))
self._set_cookie(
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
# YouTube sets the expire time to about two months
expire_time=time.time() + 2 * 30 * 24 * 3600)
def _login(self):
"""
@@ -176,30 +176,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return False
return True
def _confirm_age(self):
age_form = {
'next_url': '/',
'action_confirm': 'Confirm',
}
req = compat_urllib_request.Request(
self._AGE_URL,
compat_urllib_parse.urlencode(age_form).encode('ascii')
)
self._download_webpage(
req, None,
note='Confirming age', errnote='Unable to confirm age',
fatal=False)
def _real_initialize(self):
if self._downloader is None:
return
if self._get_login_info()[0] is not None:
if not self._set_language():
return
self._set_language()
if not self._login():
return
self._confirm_age()
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -305,6 +287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -398,8 +381,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'info_dict': {
'id': 'IB3lcPjvWLA',
'ext': 'm4a',
'title': 'Afrojack - The Spark ft. Spree Wilson',
'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011',
@@ -421,7 +404,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
}
},
# Normal age-gate video (No vevo, embed allowed)
{
'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
'ext': 'mp4',
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
'description': 'md5:eca57043abae25130f58f655ad9a7771',
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
'upload_date': '20140605',
},
},
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
{
'url': '__2ABJjxzNo',
'info_dict': {
'id': '__2ABJjxzNo',
'ext': 'mp4',
'upload_date': '20100430',
'uploader_id': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)',
},
'expected_warnings': [
'DASH manifest missing',
]
},
# Olympics (https://github.com/rg3/youtube-dl/issues/4431)
{
'url': 'lqQg6PlCWgI',
'info_dict': {
'id': 'lqQg6PlCWgI',
'ext': 'mp4',
'upload_date': '20120731',
'uploader_id': 'olympic',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympics',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
},
'params': {
'skip_download': 'requires avconv',
}
},
]
def __init__(self, *args, **kwargs):
@@ -671,6 +699,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
def _parse_dash_manifest(
self, video_id, dash_manifest_url, player_url, age_gate):
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
return '/signature/%s' % dec_s
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dash_doc = self._download_xml(
dash_manifest_url, video_id,
note='Downloading DASH manifest',
errnote='Could not download DASH manifest')
formats = []
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
if url_el is None:
continue
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
f = {
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')),
}
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
f.update(self._formats.get(format_id, {}))
formats.append(f)
else:
existing_format.update(f)
return formats
def _real_extract(self, url):
proto = (
'http' if self._downloader.params.get('prefer_insecure', False)
@@ -684,16 +752,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
pref_cookies = [
c for c in self._downloader.cookiejar
if c.domain == '.youtube.com' and c.name == 'PREF']
for pc in pref_cookies:
if 'hl=' in pc.value:
pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
else:
if pc.value:
pc.value += '&'
pc.value += 'hl=en'
video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL
@@ -704,7 +762,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
player_url = None
# Get video info
self.report_video_info_webpage_download(video_id)
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -723,15 +780,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_info = compat_parse_qs(video_info_webpage)
else:
age_gate = False
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
if 'token' in video_info:
break
try:
# Try looking directly into the video webpage
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
if not mobj:
raise ValueError('Could not find ytplayer.config') # caught below
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args']
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
if 'url_encoded_fmt_stream_map' not in args:
raise ValueError('No stream_map present') # caught below
except ValueError:
# We fallback to the get_video_info pages (used by the embed page)
self.report_video_info_webpage_download(video_id)
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = (
'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (proto, video_id, el_type))
video_info_webpage = self._download_webpage(
video_info_url,
video_id, note=False,
errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
if 'token' in video_info:
break
if 'token' not in video_info:
if 'reason' in video_info:
raise ExtractorError(
@@ -799,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', fatal=False)
video_webpage, 'categories', default=None)
if m_cat_container:
category = self._html_search_regex(
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
@@ -856,32 +930,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
# Decide which formats to download
try:
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
if not mobj:
raise ValueError('Could not find vevo ID')
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args']
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
# this signatures are encrypted
if 'url_encoded_fmt_stream_map' not in args:
raise ValueError('No stream_map present') # caught below
re_signature = re.compile(r'[&,]s=')
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
if m_s is not None:
self.to_screen('%s: Encrypted signatures detected.' % video_id)
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
m_s = re_signature.search(args.get('adaptive_fmts', ''))
if m_s is not None:
if 'adaptive_fmts' in video_info:
video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
else:
video_info['adaptive_fmts'] = [args['adaptive_fmts']]
except ValueError:
pass
def _map_to_format_list(urlmap):
formats = []
for itag, video_real_url in urlmap.items():
@@ -903,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'url': video_info['conn'][0],
'player_url': player_url,
}]
elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
@@ -968,53 +1016,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True):
try:
# The DASH manifest used needs to be the one from the original video_webpage.
# The one found in get_video_info seems to be using different signatures.
# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
if age_gate:
dash_manifest_url = video_info.get('dashmpd')[0]
dash_mpd = video_info.get('dashmpd')
if dash_mpd:
dash_manifest_url = dash_mpd[0]
try:
dash_formats = self._parse_dash_manifest(
video_id, dash_manifest_url, player_url, age_gate)
except (ExtractorError, KeyError) as e:
self.report_warning(
'Skipping DASH manifest: %r' % e, video_id)
else:
dash_manifest_url = ytplayer_config['args']['dashmpd']
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
return '/signature/%s' % dec_s
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dash_doc = self._download_xml(
dash_manifest_url, video_id,
note='Downloading DASH manifest',
errnote='Could not download DASH manifest')
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
if url_el is None:
continue
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
f = {
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
}
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
f.update(self._formats.get(format_id, {}))
formats.append(f)
else:
existing_format.update(f)
except (ExtractorError, KeyError) as e:
self.report_warning('Skipping DASH manifest: %r' % e, video_id)
formats.extend(dash_formats)
self._sort_formats(formats)
@@ -1256,7 +1268,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = 'youtube:channel'
@@ -1274,13 +1286,8 @@ class YoutubeChannelIE(InfoExtractor):
return ids_in_page
def _real_extract(self, url):
# Extract channel id
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
channel_id = self._match_id(url)
# Download channel page
channel_id = mobj.group(1)
video_ids = []
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id)
@@ -1294,8 +1301,12 @@ class YoutubeChannelIE(InfoExtractor):
# The videos are contained in a single page
# the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page)
else:
# Download all channel pages using the json-based channel_ajax query
entries = [
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(entries, channel_id)
def _entries():
for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_json(
@@ -1303,21 +1314,19 @@ class YoutubeChannelIE(InfoExtractor):
transform_source=uppercase_escape)
ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page)
for video_id in ids_in_page:
yield self.url_result(
video_id, 'Youtube', video_id=video_id)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, channel_id)
return self.playlist_result(_entries(), channel_id)
class YoutubeUserIE(InfoExtractor):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1345,12 +1354,7 @@ class YoutubeUserIE(InfoExtractor):
return super(YoutubeUserIE, cls).suitable(url)
def _real_extract(self, url):
# Extract username
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
username = mobj.group(1)
username = self._match_id(url)
# Download video ids using YouTube Data API. Result size per
# query is limited (currently to 50 videos) so we need to query

View File

@@ -1,12 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
import functools
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
OnDemandPagedList,
)
@@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url):
class ZDFIE(InfoExtractor):
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_TEST = {
'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
@@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
return extract_from_xml_url(self, video_id, xml_url)
class ZDFChannelIE(InfoExtractor):
_VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',
'info_dict': {
'id': '1586442',
},
'playlist_count': 4,
}
_PAGE_SIZE = 50
def _fetch_page(self, channel_id, page):
offset = page * self._PAGE_SIZE
xml_url = (
'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s'
% (offset, self._PAGE_SIZE, channel_id))
doc = self._download_xml(
xml_url, channel_id,
note='Downloading channel info',
errnote='Failed to download channel info')
title = doc.find('.//information/title').text
description = doc.find('.//information/detail').text
for asset in doc.findall('.//teasers/teaser'):
a_type = asset.find('./type').text
a_id = asset.find('./details/assetId').text
if a_type not in ('video', 'topic'):
continue
yield {
'_type': 'url',
'playlist_title': title,
'playlist_description': description,
'url': 'zdf:%s:%s' % (a_type, a_id),
}
def _real_extract(self, url):
channel_id = self._match_id(url)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE)
return {
'_type': 'playlist',
'id': channel_id,
'entries': entries,
}

View File

@@ -163,7 +163,10 @@ def parseOpts(overrideArguments=None):
general.add_option(
'--ignore-config',
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
help='Do not read configuration files. '
'When given in the global configuration file /etc/youtube-dl.conf: '
'Do not read the user configuration in ~/.config/youtube-dl/config '
'(%APPDATA%/youtube-dl/config.txt on Windows)')
general.add_option(
'--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist',

View File

@@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor):
def run(self, information):
cmd = self.exec_cmd
if not '{}' in cmd:
if '{}' not in cmd:
cmd += ' {}'
cmd = cmd.replace('{}', shlex_quote(information['filepath']))

View File

@@ -37,11 +37,11 @@ class FFmpegPostProcessor(PostProcessor):
if not self._executable:
raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
REQUIRED_VERSION = '1.0'
required_version = '10-0' if self._uses_avconv() else '1.0'
if is_outdated_version(
self._versions[self._executable], REQUIRED_VERSION):
self._versions[self._executable], required_version):
warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
self._executable, self._executable, REQUIRED_VERSION)
self._executable, self._executable, required_version)
if self._downloader:
self._downloader.report_warning(warning)

View File

@@ -79,7 +79,7 @@ def update_self(to_screen, verbose):
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
return
if not 'signature' in versions_info:
if 'signature' not in versions_info:
to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
return
signature = versions_info['signature']

View File

@@ -166,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False):
xpath = xpath.encode('ascii')
n = node.find(xpath)
if n is None:
if n is None or n.text is None:
if fatal:
name = xpath if name is None else name
raise ExtractorError('Could not find XML element %s' % name)
@@ -644,17 +644,19 @@ def parse_iso8601(date_str, delimiter='T'):
return calendar.timegm(dt.timetuple())
def unified_strdate(date_str):
def unified_strdate(date_str, day_first=True):
"""Return a string with the date in the format YYYYMMDD"""
if date_str is None:
return None
upload_date = None
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
format_expressions = [
'%d %B %Y',
'%d %b %Y',
@@ -669,7 +671,6 @@ def unified_strdate(date_str):
'%d/%m/%Y',
'%d/%m/%y',
'%Y/%m/%d %H:%M:%S',
'%d/%m/%Y %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
@@ -681,6 +682,14 @@ def unified_strdate(date_str):
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
]
if day_first:
format_expressions.extend([
'%d/%m/%Y %H:%M:%S',
])
else:
format_expressions.extend([
'%m/%d/%Y %H:%M:%S',
])
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -712,8 +721,10 @@ def date_from_str(date_str):
Return a datetime object from a string in the format YYYYMMDD or
(now|today)[+-][0-9](day|week|month|year)(s)?"""
today = datetime.date.today()
if date_str == 'now'or date_str == 'today':
if date_str in ('now', 'today'):
return today
if date_str == 'yesterday':
return today - datetime.timedelta(days=1)
match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
if match is not None:
sign = match.group('sign')
@@ -808,22 +819,22 @@ def _windows_write_string(s, out):
GetStdHandle = ctypes.WINFUNCTYPE(
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
("GetStdHandle", ctypes.windll.kernel32))
(b"GetStdHandle", ctypes.windll.kernel32))
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
WriteConsoleW = ctypes.WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
written = ctypes.wintypes.DWORD(0)
GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = ctypes.WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
ctypes.POINTER(ctypes.wintypes.DWORD))(
("GetConsoleMode", ctypes.windll.kernel32))
(b"GetConsoleMode", ctypes.windll.kernel32))
INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
def not_a_console(handle):
@@ -1024,7 +1035,7 @@ def smuggle_url(url, data):
def unsmuggle_url(smug_url, default=None):
if not '#__youtubedl_smuggle' in smug_url:
if '#__youtubedl_smuggle' not in smug_url:
return smug_url, default
url, _, sdata = smug_url.rpartition('#')
jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
@@ -1090,11 +1101,14 @@ def parse_filesize(s):
}
units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
m = re.match(
r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
if not m:
return None
return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
num_str = m.group('num').replace(',', '.')
mult = _UNIT_TABLE[m.group('unit')]
return int(float(num_str) * mult)
def get_term_width():
@@ -1203,18 +1217,29 @@ def parse_duration(s):
m = re.match(
r'''(?ix)T?
(?:
(?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
(?:
(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
)?
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
)$''', s)
if not m:
return None
res = int(m.group('secs'))
res = 0
if m.group('only_mins'):
return float_or_none(m.group('only_mins'), invscale=60)
if m.group('only_hours'):
return float_or_none(m.group('only_hours'), invscale=60 * 60)
if m.group('secs'):
res += int(m.group('secs'))
if m.group('mins'):
res += int(m.group('mins')) * 60
if m.group('hours'):
res += int(m.group('hours')) * 60 * 60
if m.group('hours'):
res += int(m.group('hours')) * 60 * 60
if m.group('ms'):
res += float(m.group('ms'))
return res
@@ -1488,7 +1513,7 @@ def limit_length(s, length):
def version_tuple(v):
return [int(e) for e in v.split('.')]
return tuple(int(e) for e in re.split(r'[-.]', v))
def is_outdated_version(version, limit, assume_new=True):

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2014.11.26.1'
__version__ = '2014.12.12.2'