Compare commits

..

213 Commits

Author SHA1 Message Date
Philipp Hagemeister
f1f25be6db release 2014.04.30 2014-04-30 02:05:03 +02:00
Philipp Hagemeister
deab8c1960 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-30 02:04:55 +02:00
Philipp Hagemeister
c57f775710 [YoutubeDL] Add simple tests for format_note (Closes #2825) 2014-04-30 02:02:41 +02:00
AGSPhoenix
e75cafe9fb Clean up format list for consistency
This should make the format list output look a bit nicer.
2014-04-30 01:52:05 +02:00
Philipp Hagemeister
33ab8453c4 Merge pull request #2813 from dstftw/test-real-download-improvement
Improve download mechanism when Range HTTP header is ignored
2014-04-30 01:50:33 +02:00
Philipp Hagemeister
ebd3c7b370 [generic] Add support for protocol-independent URLs (Fixes #2810) 2014-04-30 01:46:06 +02:00
Philipp Hagemeister
29645a1d44 Merge remote-tracking branch 'pulpe/moviezinese' 2014-04-30 01:37:05 +02:00
Philipp Hagemeister
22d99a801a [syfy] Add suppor for generic URLs (Fixes #2827) 2014-04-30 01:35:52 +02:00
Jaime Marquínez Ferrándiz
57b8d84cd9 [5min] Raise an error if the 'success' field is False
For example for georestricted videos.
2014-04-29 14:57:38 +02:00
Sergey M․
65e4ad5bfe [rtbf] Minor changes and YouTube videos support 2014-04-29 19:41:58 +07:00
Nicolas Évrard
98b7d476d9 [RTBFVideo] Remove useless print statement 2014-04-28 23:19:56 +02:00
Nicolas Évrard
201e3c99b9 [RTBFVideo] Add new extractor 2014-04-28 20:32:13 +02:00
Sergey M․
8a7a4a9796 [scivee] Skip test for now 2014-04-28 19:52:32 +07:00
Sergey M․
df297c8794 [http] Improve download mechanism when Range HTTP header is ignored 2014-04-27 09:32:01 +07:00
pulpe
3f53a75f02 [moviezine] Add extractor for moviezine.se (fixes #2808) 2014-04-26 18:55:29 +02:00
Sergey M․
7c360e3a04 [scivee] Add support for scivee.tv 2014-04-26 20:22:15 +07:00
Sergey M․
d2176c8011 [nrk] Add support for nrk.no (Closes #2804) 2014-04-25 21:34:44 +07:00
Jaime Marquínez Ferrándiz
aa92f06308 [youtube] Don't call 'unquote_plus' on the video title (fixes #2799)
It's already unquoted after calling 'compat_parse_qs'.
It replaced '+' with spaces, for example in https://www.youtube.com/watch?v=XC0b5YexO-I.
2014-04-25 13:19:03 +02:00
Jaime Marquínez Ferrándiz
e00c9cf599 [youtube] Update test description field 2014-04-25 13:14:15 +02:00
Jaime Marquínez Ferrándiz
ba60a3ebe0 [youtube] Update test description field 2014-04-25 12:57:04 +02:00
Jaime Marquínez Ferrándiz
efb7e11988 [vimeo] Add an extractor for the watch later list (closes #2787) 2014-04-24 21:51:20 +02:00
Sergey M․
a55c8b7aac [9gag] Fix post view regex 2014-04-24 19:52:34 +07:00
Jaime Marquínez Ferrándiz
a980bc4324 [vimeo] Fix logging in python 3.x
The POST data must be a bytes object.
2014-04-24 14:44:27 +02:00
Sergey M․
4b10aadffc [dailymotion] Fix user playlist extraction 2014-04-23 19:42:34 +07:00
Sergey M․
5bec574859 [ted] Update test 2014-04-22 19:49:41 +07:00
Philipp Hagemeister
d11271dd29 [youtube] Include video Id in common error message (Fixes #2786) 2014-04-21 20:34:03 +02:00
Philipp Hagemeister
1d9d26d09b release 2014.04.21.6 2014-04-21 16:18:32 +02:00
Philipp Hagemeister
c0292e8ab7 [generic] Improve jwplayer detection (Fixes #2731) 2014-04-21 16:16:53 +02:00
Philipp Hagemeister
f44e5d8b43 [vuclip] Fix VALID_URL regex 2014-04-21 16:14:21 +02:00
Philipp Hagemeister
6ea74538e3 release 2014.04.21.5 2014-04-21 15:56:23 +02:00
Philipp Hagemeister
24b8924b46 [facebook] Correct login (Fixes #2743) 2014-04-21 15:56:09 +02:00
Philipp Hagemeister
86a3c67112 release 2014.04.21.4 2014-04-21 15:25:16 +02:00
Philipp Hagemeister
8be874370d Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-21 15:24:51 +02:00
Philipp Hagemeister
aec74dd95a [vuclip] Add extractor (Fixes #2735) 2014-04-21 15:24:44 +02:00
Sergey M․
6890574256 [rutube] Add missing whitespace 2014-04-21 19:04:11 +07:00
Sergey M․
d03745c684 [jukebox] Update test md5 2014-04-21 19:00:27 +07:00
Philipp Hagemeister
28746fbd59 [bilibili] Add preliminary support (#2174)
The URL http://www.bilibili.tv/video/av636603/index_2.html does not work yet.
2014-04-21 13:46:41 +02:00
Philipp Hagemeister
0321213c11 [test_subtitles] Allow more subtitles for TED videos 2014-04-21 13:20:14 +02:00
Philipp Hagemeister
3f0aae4244 release 2014.04.21.3 2014-04-21 12:40:09 +02:00
Philipp Hagemeister
48099643cc [generic] Be more relaxed when looking for aparat embeds (Fixes #2784) 2014-04-21 12:37:41 +02:00
Philipp Hagemeister
621f33c9d0 [ted] Extend search for description 2014-04-21 12:37:16 +02:00
Philipp Hagemeister
f07a9f6f43 [ted] Remove superfluous u prefixes 2014-04-21 12:34:32 +02:00
Philipp Hagemeister
e51880fd32 [cnet] Correct JSON capturing 2014-04-21 07:59:29 +02:00
Philipp Hagemeister
88ce273da4 [arte] differentiate JSON outputs 2014-04-21 07:59:16 +02:00
Philipp Hagemeister
b9ba5dfa28 [test helper] Correct only_matching test gathering 2014-04-21 07:56:51 +02:00
Philipp Hagemeister
4086f11929 release 2014.04.21.2 2014-04-21 07:12:12 +02:00
Philipp Hagemeister
478c2c6193 [clubic] Add extractor (Fixes #2773) 2014-04-21 07:12:02 +02:00
Philipp Hagemeister
d2d6481afb [mdr] Remove unused imports 2014-04-21 06:49:21 +02:00
Philipp Hagemeister
43acb120f3 release 2014.04.21.1 2014-04-21 06:28:25 +02:00
Philipp Hagemeister
e8f2025edf [mdr] Add support for modern URLs (Fixes #2775) 2014-04-21 06:25:21 +02:00
Philipp Hagemeister
a4eb9578af [yahoo] Add support for movies (Fixes #2780) 2014-04-21 06:18:04 +02:00
Philipp Hagemeister
fa35cdad02 [condenast|generic] Add support for condenast embeds (Fixes #2783) 2014-04-21 05:47:52 +02:00
Philipp Hagemeister
d1b9c912a4 [utils] Fix _windows_write_string (Fixes #2779)
It turns out that the function did not work for outputs longer than 1024 UCS-2 tokens.
Write non-BMP characters one by one to ensure that we count correctly.
2014-04-21 04:59:46 +02:00
Philipp Hagemeister
edec83a025 [infoq] Add support for HTTP downloads (Fixes #722) 2014-04-21 03:21:34 +02:00
Philipp Hagemeister
c0a7c60815 [infoq] Simplify (#2777) 2014-04-21 02:55:35 +02:00
Philipp Hagemeister
117a7d1944 Merge remote-tracking branch 'kwbr/master' 2014-04-21 02:48:04 +02:00
Philipp Hagemeister
a40e0dd434 release 2014.04.21 2014-04-21 02:34:53 +02:00
Philipp Hagemeister
188b086dd9 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-21 02:34:44 +02:00
Philipp Hagemeister
1f27d2c0e1 [steam] Add support for steamcommunity.com (Fixes #2757) 2014-04-21 02:34:34 +02:00
Kai Weber
7560096db5 [infoq] Simplify playpath calculation 2014-04-20 01:10:30 +02:00
Kai Weber
282cb9c7ba [infoq] Fix extractor 2014-04-20 01:01:37 +02:00
Sergey M․
3a9d6790ad [ivi] Update playlist tests 2014-04-20 03:06:50 +07:00
Philipp Hagemeister
0610a3e0b2 Remove unused imports 2014-04-19 19:57:09 +02:00
Philipp Hagemeister
7f9c31df88 [steam] Simplify 2014-04-19 19:55:53 +02:00
Philipp Hagemeister
3fa6b6e293 [steam] Modernize 2014-04-19 19:51:04 +02:00
Philipp Hagemeister
3c50b99ab4 [extremetube] Modernize 2014-04-19 19:42:51 +02:00
Philipp Hagemeister
52fadd5fb2 [test_all_urls] Add support for distributed URL matching test definition 2014-04-19 19:41:06 +02:00
Philipp Hagemeister
5367fe7f4d [test_all_urls] Simplify 2014-04-19 13:01:15 +02:00
Philipp Hagemeister
427588f6e7 Merge remote-tracking branch 'MikeCol/extremetube-gay' 2014-04-19 12:59:52 +02:00
Philipp Hagemeister
51745be312 release 2014.04.19 2014-04-19 11:55:33 +02:00
Sergey M․
d7f1e7c88f [rutube] Fix extraction 2014-04-19 15:59:12 +07:00
MikeCol
4145a257be Extended regex match to include gay clips 2014-04-19 00:29:42 +02:00
Sergey M․
525dc9809e [noco] Fix test description md5 2014-04-18 21:36:04 +07:00
Sergey M․
1bf3210816 [noco] Add support for noco.tv (Closes #2712) 2014-04-18 21:11:09 +07:00
Sergey M․
e6c6d10d99 [podomatic] Improve video URL extraction (Closes #2763) 2014-04-17 19:59:52 +07:00
Jaime Marquínez Ferrándiz
f270256e06 [tlc] Add an extractor for tlc.com
It uses the same system as discovery.com
2014-04-16 20:29:31 +02:00
Jaime Marquínez Ferrándiz
f401c6f69f [canalplus] Download the video in the test
It doesn't use rtmpdump now.
2014-04-16 15:54:00 +02:00
Sergey M․
b075d25bed [canalplus] Prefer f4m and modernize (Closes #2749) 2014-04-16 20:47:39 +07:00
Jaime Marquínez Ferrándiz
3d1bb6b4dd Add an extractor for tlc.de (fixes #2748) 2014-04-16 15:45:05 +02:00
Philipp Hagemeister
1db2666916 [youtube:playlist] Correct playlist ID output
The ID now starts with PL, so we don't need to output that twice.
2014-04-15 17:55:52 +02:00
Jaime Marquínez Ferrándiz
8f5c0218d8 [fivemin] Get the 'sid' from the embed page (fixes #2745)
It allows to download some videos that failed.
2014-04-15 16:18:37 +02:00
Sergey M․
d7666dff82 [9gag] Fix and improve extraction 2014-04-15 19:49:38 +07:00
Jaime Marquínez Ferrándiz
2d4c98dbd1 [ted] Use the rtmp links if there http downloads are not available. 2014-04-14 15:23:12 +02:00
Sergey M․
fd50bf623c [generic] Modernize tests 2014-04-14 18:56:29 +07:00
Sergey M․
d360a14678 [generic] Update test 2014-04-14 18:51:46 +07:00
Philipp Hagemeister
d0f2ab6969 release 2014.04.13 2014-04-13 03:22:30 +02:00
Philipp Hagemeister
de906ef543 [aol] Add support for playlists (Fixes #2730) 2014-04-13 03:22:24 +02:00
Sergey M․
2fb3deeca1 [tube8] Fix extraction and modernize 2014-04-13 03:56:32 +07:00
Philipp Hagemeister
66398056f1 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-12 17:15:16 +02:00
Jaime Marquínez Ferrándiz
77477fa4c9 Merge branch 'atomicparsley' (closes #2436) 2014-04-12 15:52:42 +02:00
Jaime Marquínez Ferrándiz
a169e18ce1 [atomicparsley] Remove unneeded __init__ method 2014-04-12 15:51:40 +02:00
Jaime Marquínez Ferrándiz
381640e3ac [brightcove] Only use url from meta element if it has the 'playerKey' field (fixes #2738) 2014-04-12 12:53:48 +02:00
Sergey M․
37e3410137 [prosiebensat1] Add one more clip id pattern (Closes #2737) 2014-04-12 02:53:55 +07:00
Jaime Marquínez Ferrándiz
97b5196960 [weibo] Modernize 2014-04-11 16:02:34 +02:00
Sergey M․
6a4f3528c8 [firstpost] Fix extraction 2014-04-11 20:40:42 +07:00
Philipp Hagemeister
b9c76aa1a9 [youtube] Add support for cleanvideosearch.com (Fixes #2734) 2014-04-11 13:53:05 +02:00
Philipp Hagemeister
0d3070d364 release 2014.04.11.2 2014-04-11 09:44:33 +02:00
Philipp Hagemeister
7753cadbfa [comedycentral:shows] Add support for TDS special editions (Fixes #2733) 2014-04-11 09:30:07 +02:00
Philipp Hagemeister
3950450342 [pyvideo] Fix title 2014-04-11 02:20:50 +02:00
Philipp Hagemeister
c82b1fdad6 [slideshare] Fix description 2014-04-11 02:19:15 +02:00
Philipp Hagemeister
b0fb63abe8 [dailymotion:playlist] Fix title 2014-04-11 02:16:46 +02:00
Philipp Hagemeister
3ab34c603e [comedycentral] Fix test md5sum 2014-04-11 02:14:31 +02:00
Philipp Hagemeister
7d6413341a release 2014.04.11.1 2014-04-11 01:29:54 +02:00
Philipp Hagemeister
140012d0f6 release 2014.04.11 2014-04-11 01:28:30 +02:00
Philipp Hagemeister
4be9f8c814 [ninegag] Add support for p/ URLs 2014-04-11 01:25:24 +02:00
Sergey M․
5c802bac37 [byutv] Fix test 2014-04-10 19:37:55 +07:00
Sergey M․
6c30ff756a [mpora] Fix test 2014-04-10 19:10:03 +07:00
Jaime Marquínez Ferrándiz
62749e4708 [morningstar] Also support 'Cover' (#2729) 2014-04-09 20:51:28 +02:00
Jaime Marquínez Ferrándiz
6b7dee4b38 [morningstar] Recognize urls that use 'videoCenter' (fixes #2729) 2014-04-09 20:45:49 +02:00
Sergey M․
ef2041eb4e [br] Add audio extraction and support more URLs (Closes #2728) 2014-04-09 20:19:27 +07:00
Philipp Hagemeister
29e3e682af [comedycentral] Match more URLs
Looks like they only offer clips instead of full episodes now. We'll need to add new parsing code as well.
2014-04-09 11:43:15 +02:00
Philipp Hagemeister
f983c44199 Merge pull request #2725 from foolscap/subtitles-error-fix
Fix subtitle download error reporting (Fixes #2724)
2014-04-09 10:16:06 +02:00
robbie
e4db19511a Fix subtitle download error reporting (Fixes #2724) 2014-04-08 15:59:27 +01:00
Sergey M․
c47d21da80 [ntv] Update test 2014-04-08 19:11:40 +07:00
Philipp Hagemeister
269aecd0c0 [ffmpeg] Do not pass in byets to subprocess (Fixes #2717) 2014-04-07 23:33:05 +02:00
Philipp Hagemeister
aafddb2b0a Merge remote-tracking branch 'anisse/fix-content-encoding-charset' 2014-04-07 23:27:03 +02:00
Philipp Hagemeister
6262ac8ac5 release 2014.04.07.4 2014-04-07 23:23:54 +02:00
Philipp Hagemeister
89938c719e Fix Windows output for non-BMP unicode characters 2014-04-07 23:23:48 +02:00
Anisse Astier
ec0fafbb19 [extractor/common] fallback on utf-8 when charset is not found
fixes #2721
2014-04-07 23:10:16 +02:00
Philipp Hagemeister
a5863bdf33 release 2014.04.07.3 2014-04-07 22:48:45 +02:00
Philipp Hagemeister
b58ddb32ba [utils] Completely rewrite Windows output (Fixes #2672) 2014-04-07 22:48:13 +02:00
Philipp Hagemeister
b9e12a8140 release 2014.04.07.2 2014-04-07 21:41:20 +02:00
Philipp Hagemeister
104aa7388a Use our own encoding when writing strings 2014-04-07 21:40:34 +02:00
Philipp Hagemeister
c3855d28b0 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-07 19:57:51 +02:00
Philipp Hagemeister
734f90bb41 Use --encoding when outputting 2014-04-07 19:57:42 +02:00
Jaime Marquínez Ferrándiz
91a6addeeb Add support for rtve.es/alacarta 2014-04-07 17:30:32 +02:00
Philipp Hagemeister
9afb76c5ad release 2014.04.07.1 2014-04-07 15:28:55 +02:00
Philipp Hagemeister
dfb2cb5cfd [teamcoco] Simplify ID management (Closes #2715) 2014-04-07 15:25:35 +02:00
Philipp Hagemeister
650d688d10 release 2014.04.07 2014-04-07 13:11:37 +02:00
Philipp Hagemeister
0ba77818f3 [ted] Add width and height (Fixes #2716) 2014-04-07 13:11:30 +02:00
Sergey M․
09baa7da7e [rts] Update test 2014-04-07 00:34:23 +07:00
Sergey M․
85e787f51d [cbsnews] Add support for cbsnews.com (Closes #2691) 2014-04-06 06:03:58 +07:00
Philipp Hagemeister
2a9e1e453a Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-05 20:05:47 +02:00
Philipp Hagemeister
ee1e199685 [justin.tv] Modernize (Fixes #2705) 2014-04-05 17:56:36 +02:00
Sergey M․
17c5a00774 [novamov] Simplify 2014-04-05 19:36:22 +07:00
Sergey M․
15c0e8e7b2 [generic] Generalize novamov based embeds 2014-04-05 17:20:05 +07:00
Sergey M․
cca37fba48 [divxstage] Fix typo in IE_NAME 2014-04-05 17:15:43 +07:00
Sergey M․
9d0993ec4a [movshare] Support more domains 2014-04-05 17:00:18 +07:00
Sergey M․
342f33bf9e [divxstage] Support more domains 2014-04-05 16:50:05 +07:00
Sergey M․
7cd3bc5f99 [nowvideo] Support more domains 2014-04-05 16:38:57 +07:00
Sergey M․
931055e6cb [videoweed] Revert _FILE_DELETED_REGEX 2014-04-05 16:32:14 +07:00
Sergey M․
d0e4cf82f1 [movshare] Add _FILE_DELETED_REGEX 2014-04-05 16:31:38 +07:00
Sergey M․
6f88df2c57 [divxstage] Add support for divxstage.eu 2014-04-05 16:29:44 +07:00
Sergey M․
4479bf2762 [videoweed] Simplify 2014-04-05 16:09:28 +07:00
Sergey M․
1ff7c0f7d8 [movshare] Add support for movshare.net 2014-04-05 16:09:03 +07:00
Sergey M․
610e47c87e Credit @sainyamkapoor for videoweed extractor 2014-04-05 15:53:50 +07:00
Sergey M․
50f566076f [generic] Add support for videoweed embeds 2014-04-05 15:49:45 +07:00
Sergey M․
92810ff497 [nowvideo] Improve _VALID_URL 2014-04-05 15:35:21 +07:00
Sergey M․
60ccc59a1c [novamov] Improve _VALID_URL 2014-04-05 15:34:54 +07:00
Sergey M․
91745595d3 [videoweed] Simplify 2014-04-05 15:32:55 +07:00
Sainyam Kapoor
d6e40507d0 [videoweed]Cleanup 2014-04-05 10:53:22 +05:30
Sainyam Kapoor
deed48b472 [Videoweed] Added support for videoweed. 2014-04-05 10:40:03 +05:30
Philipp Hagemeister
e4d41bfca5 Merge pull request #2696 from anovicecodemonkey/support-ustream-embeds
[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)
2014-04-04 23:33:08 +02:00
Philipp Hagemeister
a355b70f27 [cspan] Do not test number of playlist entries
Apparently, CSpan switches between single-file and multiple-file results. Either one is fine as long as we get the full four hours.
2014-04-04 23:16:22 +02:00
Philipp Hagemeister
f8514f6186 [rts] Use visible id in file names
Maybe the internal ID is more precise, but it's totally confusing, and the obvious ID still allows a google search.
2014-04-04 23:13:55 +02:00
Philipp Hagemeister
e09b8fcd9d [ro220] Make test case more flexible
Either one or two spaces is fine here.
2014-04-04 23:08:33 +02:00
Philipp Hagemeister
7d1b527ff9 [motorsport] Fix on Python 3 2014-04-04 23:06:27 +02:00
Philipp Hagemeister
f943c7b622 release 2014.04.04.7 2014-04-04 23:01:45 +02:00
Philipp Hagemeister
676eb3f2dd Fix unicode_escape (Fixes #2695) 2014-04-04 23:00:51 +02:00
Philipp Hagemeister
98b7cf1ace release 2014.04.04.6 2014-04-04 22:48:35 +02:00
Philipp Hagemeister
c465afd736 [teamcoco] Fix regex in 2.6 (#2700)
The re engine does not want to repeat an empty string, for fear that something like

    (.*)*

could be matching the tokens ...

    ""
    "" ""
    "" "" ""

Of course, that's harmless with a question mark, although still somewhat strange.
2014-04-04 22:46:47 +02:00
Philipp Hagemeister
b84d6e7fc4 Merge remote-tracking branch 'AGSPhoenix/teamcoco-fix' 2014-04-04 22:44:49 +02:00
Philipp Hagemeister
2efd5d78c1 release 2014.04.04.5 2014-04-04 22:24:45 +02:00
Philipp Hagemeister
c8edf47b3a [yahoo] Support https and -uploader URLs (Fixes #2701) 2014-04-04 22:23:59 +02:00
Philipp Hagemeister
3b4c26a428 [pornhd] Avoid shadowing variable url 2014-04-04 22:22:30 +02:00
Philipp Hagemeister
1525148114 Remove unused imports 2014-04-04 22:22:11 +02:00
Philipp Hagemeister
9e0c5791c1 release 2014.04.04.4 2014-04-04 22:15:32 +02:00
Philipp Hagemeister
29a1ab2afc Add alternative --prefer-unsecure spelling (Closes #2697) 2014-04-04 22:15:21 +02:00
AGSPhoenix
fa387d2d99 Revert "Workaround for regex engine limitation"
This reverts commit 6d0d573eca.
2014-04-04 15:37:49 -04:00
AGSPhoenix
6d0d573eca Workaround for regex engine limitation 2014-04-04 15:25:28 -04:00
AGSPhoenix
bb799e811b Add a test for the new URL pages
Add a test for the pages with the video_id in the URL.
2014-04-04 13:52:35 -04:00
AGSPhoenix
04ee53eca1 Support TeamCoco URLs with video_id in the title
If the URL has the video_id in it, use that since the current method of
finding the id breaks on those pages.

Fixes 2698.
2014-04-04 13:42:34 -04:00
Jaime Marquínez Ferrándiz
659eb98a53 [breakcom] Fix YouTube videos extraction (fixes #2699) 2014-04-04 19:01:18 +02:00
anovicecodemonkey
ca6aada48e Fix _TEST for Ustream embed URLs 2014-04-05 03:26:29 +10:30
Jaime Marquínez Ferrándiz
43df5a7e71 [keezmovies] Modernize 2014-04-04 18:52:43 +02:00
Jaime Marquínez Ferrándiz
88f1c6de7b [yahoo] Modernize 2014-04-04 18:52:43 +02:00
Sergey M․
65a40ab82b [pornhd] Update test checksum 2014-04-04 22:47:38 +07:00
Sergey M․
4b9cced103 [pornhd] Fix extraction (Closes #2693) 2014-04-04 22:45:39 +07:00
anovicecodemonkey
5c38625259 [UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694) 2014-04-05 00:53:09 +10:30
Sergey M․
6344fa04bb [rts] Add more formats and audio support (Closes #2689) 2014-04-04 20:42:06 +07:00
Jaime Marquínez Ferrándiz
e3ced9ed61 [downloader/common] Use compat_str with the error in try_rename (appeared in #2389)
Otherwise on python 2.x we get `UnicodeDecodeError` because it may contain non ascii characters.
2014-04-04 14:59:11 +02:00
Philipp Hagemeister
5075d598bc release 2014.04.04.2 2014-04-04 02:24:21 +02:00
Philipp Hagemeister
68eb8e90e6 [youtube:playlist] Fix playlists for logged-in users (Fixes #2690) 2014-04-04 02:23:36 +02:00
Philipp Hagemeister
d3a96346c4 release 2014.04.04.3 2014-04-04 02:09:16 +02:00
Philipp Hagemeister
0e518e2fea [cnet] Fall back to "videos" key 2014-04-04 02:09:04 +02:00
Philipp Hagemeister
1e0a235f39 [dailymotion] Fix playlist+user 2014-04-04 02:04:16 +02:00
Philipp Hagemeister
9ad400f75e [generic] Remove test case that has become a 404 2014-04-04 01:47:17 +02:00
Philipp Hagemeister
3537b93d8a [tests] Fix YoutubeDL tests
Since bec1fad, the id, title, and url (also in formats) keys are mandatory. Change the tests to reflect that.
2014-04-04 01:45:49 +02:00
Philipp Hagemeister
56eca2e956 release 2014.04.04.1 2014-04-04 00:25:43 +02:00
Philipp Hagemeister
2ad4d1ba07 [morningstar] Add new extractor (Fixes #2687) 2014-04-04 00:25:35 +02:00
Philipp Hagemeister
4853de808b release 2014.04.04 2014-04-04 00:06:06 +02:00
Philipp Hagemeister
6ff5f12218 [motorsport] Add extractor (Fixes #2688) 2014-04-04 00:05:43 +02:00
Philipp Hagemeister
52a180684f [README] Fix VALID_URL in extractor example 2014-04-03 23:25:23 +02:00
Philipp Hagemeister
b21e25702f Merge pull request #2681 from phihag/readme-dev-instructions
[README] Improve developer instructions
2014-04-03 23:06:15 +02:00
Jaime Marquínez Ferrándiz
983af2600f [wimp] Detect youtube videos (fixes #2686) 2014-04-03 20:44:51 +02:00
Philipp Hagemeister
f34e6a2cd6 [comedycentral:shows] Do no include 6-digit identifier in display ID 2014-04-03 18:39:00 +02:00
Philipp Hagemeister
a9f304031b release 2014.04.03.3 2014-04-03 16:21:54 +02:00
Philipp Hagemeister
9271bc8355 [cnet] Add new extractor (Fixes #2679) 2014-04-03 16:21:21 +02:00
Philipp Hagemeister
d1b3e3dd75 [README] Add md5 to code example 2014-04-03 15:59:04 +02:00
Philipp Hagemeister
968ed2a777 [comedycentral] Add test for #2677 2014-04-03 15:31:04 +02:00
Philipp Hagemeister
24de5d2556 release 2014.04.03.2 2014-04-03 15:28:56 +02:00
Philipp Hagemeister
d26e981df4 Correct check for empty dirname (Fixes #2683) 2014-04-03 15:28:41 +02:00
Jaime Marquínez Ferrándiz
e45d40b171 [youtube:subscriptions] Add space to the description 2014-04-03 15:13:52 +02:00
Sergey M․
4a419b8851 [c56] Modernize and add duration extraction 2014-04-03 19:53:11 +07:00
Philipp Hagemeister
5fbd672c38 [README] Improve developer instructions
Add a longer tutorial that should cover everything needed to start developing IEs.

Fixes #2676
2014-04-03 14:46:24 +02:00
Philipp Hagemeister
bec1fad223 [YouTubeDL] Throw an early error if the info_dict result is invalid 2014-04-03 14:38:16 +02:00
Philipp Hagemeister
177fed41bc [comedycentral:shows] Support guest/ URLs (Fixes #2677) 2014-04-03 14:38:16 +02:00
Jaime Marquínez Ferrándiz
b900e7cba4 [downloader/f4m] Close the final video 2014-04-03 13:35:07 +02:00
Jaime Marquínez Ferrándiz
14cb4979f0 MANIFEST.in: Only list the files from the docs folder that will be included (closes #2623)
Pruning the _build folder produced the message `no previously-included directories found matching 'docs/_build'` when installing from the source distribution.
2014-04-03 13:26:27 +02:00
pulpe
784763c565 we don't need to run ffmpeg more times 2014-03-26 15:22:52 +01:00
pulpe
39c68260c0 fix ffmpeg metadatapp 2014-03-26 15:22:52 +01:00
pulpe
149254d0d5 fix ffmpeg error, if youtube-dl runs more than once with --embed-thumbnail with same video 2014-03-26 15:22:52 +01:00
pulpe
0c14e2fbe3 add post processor 2014-03-26 15:22:51 +01:00
84 changed files with 2764 additions and 768 deletions

View File

@@ -3,5 +3,4 @@ include test/*.py
include test/*.json include test/*.json
include youtube-dl.bash-completion include youtube-dl.bash-completion
include youtube-dl.1 include youtube-dl.1
recursive-include docs * recursive-include docs Makefile conf.py *.rst
prune docs/_build

View File

@@ -250,6 +250,7 @@ which means you can modify it, redistribute it or use it however you like.
default default
--embed-subs embed subtitles in the video (only for mp4 --embed-subs embed subtitles in the video (only for mp4
videos) videos)
--embed-thumbnail embed thumbnail in the audio as cover art
--add-metadata write metadata to the video file --add-metadata write metadata to the video file
--xattrs write metadata to the video file's xattrs --xattrs write metadata to the video file's xattrs
(using dublin core and xdg standards) (using dublin core and xdg standards)
@@ -371,7 +372,67 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site ### Adding support for a new site
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
'info_dict': {
'id': '42',
'ext': 'mp4',
'title': 'Video title goes here',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
# TODO more code goes here, for example ...
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
return {
'id': video_id,
'title': title,
# TODO more properties (see youtube_dl/extractor/common.py)
}
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
$ git add youtube_dl/extractor/__init__.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
# BUGS # BUGS

View File

@@ -74,13 +74,19 @@ class FakeYDL(YoutubeDL):
old_report_warning(message) old_report_warning(message)
self.report_warning = types.MethodType(report_warning, self) self.report_warning = types.MethodType(report_warning, self)
def gettestcases():
def gettestcases(include_onlymatching=False):
for ie in youtube_dl.extractor.gen_extractors(): for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None) t = getattr(ie, '_TEST', None)
if t: if t:
t['name'] = type(ie).__name__[:-len('IE')] assert not hasattr(ie, '_TESTS'), \
yield t '%s has _TEST and _TESTS' % type(ie).__name__
for t in getattr(ie, '_TESTS', []): tests = [t]
else:
tests = getattr(ie, '_TESTS', [])
for t in tests:
if not include_onlymatching and t.get('only_matching', False):
continue
t['name'] = type(ie).__name__[:-len('IE')] t['name'] = type(ie).__name__[:-len('IE')]
yield t yield t
@@ -128,3 +134,17 @@ def expect_info_dict(self, expected_dict, got_dict):
missing_keys, missing_keys,
'Missing keys in test definition: %s' % ( 'Missing keys in test definition: %s' % (
', '.join(sorted(missing_keys)))) ', '.join(sorted(missing_keys))))
def assertRegexpMatches(self, text, regexp, msg=None):
if hasattr(self, 'assertRegexpMatches'):
return self.assertRegexpMatches(text, regexp, msg)
else:
m = re.match(regexp, text)
if not m:
note = 'Regexp didn\'t match: %r not found in %r' % (regexp, text)
if msg is None:
msg = note
else:
msg = note + ', ' + msg
self.assertTrue(m, msg)

View File

@@ -8,7 +8,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL from test.helper import FakeYDL, assertRegexpMatches
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE from youtube_dl.extractor import YoutubeIE
@@ -26,16 +26,27 @@ class YDL(FakeYDL):
self.msgs.append(msg) self.msgs.append(msg)
def _make_result(formats, **kwargs):
res = {
'formats': formats,
'id': 'testid',
'title': 'testttitle',
'extractor': 'testex',
}
res.update(**kwargs)
return res
class TestFormatSelection(unittest.TestCase): class TestFormatSelection(unittest.TestCase):
def test_prefer_free_formats(self): def test_prefer_free_formats(self):
# Same resolution => download webm # Same resolution => download webm
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = True ydl.params['prefer_free_formats'] = True
formats = [ formats = [
{'ext': 'webm', 'height': 460}, {'ext': 'webm', 'height': 460, 'url': 'x'},
{'ext': 'mp4', 'height': 460}, {'ext': 'mp4', 'height': 460, 'url': 'y'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats']) yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict)
@@ -46,8 +57,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = True ydl.params['prefer_free_formats'] = True
formats = [ formats = [
{'ext': 'webm', 'height': 720}, {'ext': 'webm', 'height': 720, 'url': 'a'},
{'ext': 'mp4', 'height': 1080}, {'ext': 'mp4', 'height': 1080, 'url': 'b'},
] ]
info_dict['formats'] = formats info_dict['formats'] = formats
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
@@ -60,9 +71,9 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = False ydl.params['prefer_free_formats'] = False
formats = [ formats = [
{'ext': 'webm', 'height': 720}, {'ext': 'webm', 'height': 720, 'url': '_'},
{'ext': 'mp4', 'height': 720}, {'ext': 'mp4', 'height': 720, 'url': '_'},
{'ext': 'flv', 'height': 720}, {'ext': 'flv', 'height': 720, 'url': '_'},
] ]
info_dict['formats'] = formats info_dict['formats'] = formats
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
@@ -74,8 +85,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = False ydl.params['prefer_free_formats'] = False
formats = [ formats = [
{'ext': 'flv', 'height': 720}, {'ext': 'flv', 'height': 720, 'url': '_'},
{'ext': 'webm', 'height': 720}, {'ext': 'webm', 'height': 720, 'url': '_'},
] ]
info_dict['formats'] = formats info_dict['formats'] = formats
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
@@ -91,8 +102,7 @@ class TestFormatSelection(unittest.TestCase):
{'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3}, {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
{'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4}, {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
] ]
info_dict = { info_dict = _make_result(formats)
'formats': formats, 'extractor': 'test', 'id': 'testvid'}
ydl = YDL() ydl = YDL()
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict)
@@ -120,12 +130,12 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection(self): def test_format_selection(self):
formats = [ formats = [
{'format_id': '35', 'ext': 'mp4', 'preference': 1}, {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
{'format_id': '45', 'ext': 'webm', 'preference': 2}, {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
{'format_id': '47', 'ext': 'webm', 'preference': 3}, {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
{'format_id': '2', 'ext': 'flv', 'preference': 4}, {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
ydl = YDL({'format': '20/47'}) ydl = YDL({'format': '20/47'})
ydl.process_ie_result(info_dict.copy()) ydl.process_ie_result(info_dict.copy())
@@ -154,12 +164,12 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_audio(self): def test_format_selection_audio(self):
formats = [ formats = [
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'}, {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'}, {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'}, {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
ydl = YDL({'format': 'bestaudio'}) ydl = YDL({'format': 'bestaudio'})
ydl.process_ie_result(info_dict.copy()) ydl.process_ie_result(info_dict.copy())
@@ -172,10 +182,10 @@ class TestFormatSelection(unittest.TestCase):
self.assertEqual(downloaded['format_id'], 'audio-low') self.assertEqual(downloaded['format_id'], 'audio-low')
formats = [ formats = [
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1}, {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2}, {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
ydl = YDL({'format': 'bestaudio/worstaudio/best'}) ydl = YDL({'format': 'bestaudio/worstaudio/best'})
ydl.process_ie_result(info_dict.copy()) ydl.process_ie_result(info_dict.copy())
@@ -184,11 +194,11 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_video(self): def test_format_selection_video(self):
formats = [ formats = [
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'}, {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'}, {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
] ]
info_dict = {'formats': formats, 'extractor': 'test'} info_dict = _make_result(formats)
ydl = YDL({'format': 'bestvideo'}) ydl = YDL({'format': 'bestvideo'})
ydl.process_ie_result(info_dict.copy()) ydl.process_ie_result(info_dict.copy())
@@ -217,10 +227,12 @@ class TestFormatSelection(unittest.TestCase):
for f1id, f2id in zip(order, order[1:]): for f1id, f2id in zip(order, order[1:]):
f1 = YoutubeIE._formats[f1id].copy() f1 = YoutubeIE._formats[f1id].copy()
f1['format_id'] = f1id f1['format_id'] = f1id
f1['url'] = 'url:' + f1id
f2 = YoutubeIE._formats[f2id].copy() f2 = YoutubeIE._formats[f2id].copy()
f2['format_id'] = f2id f2['format_id'] = f2id
f2['url'] = 'url:' + f2id
info_dict = {'formats': [f1, f2], 'extractor': 'youtube'} info_dict = _make_result([f1, f2], extractor='youtube')
ydl = YDL() ydl = YDL()
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats']) yie._sort_formats(info_dict['formats'])
@@ -228,7 +240,7 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id) self.assertEqual(downloaded['format_id'], f1id)
info_dict = {'formats': [f2, f1], 'extractor': 'youtube'} info_dict = _make_result([f2, f1], extractor='youtube')
ydl = YDL() ydl = YDL()
yie = YoutubeIE(ydl) yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats']) yie._sort_formats(info_dict['formats'])
@@ -262,6 +274,12 @@ class TestFormatSelection(unittest.TestCase):
# Replace missing fields with 'NA' # Replace missing fields with 'NA'
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
def test_format_note(self):
ydl = YoutubeDL()
self.assertEqual(ydl._format_note({}), '')
assertRegexpMatches(self, ydl._format_note({
'vbr': 10,
}), '^x\s*10k$')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -49,6 +49,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
def test_youtube_channel_matching(self): def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
@@ -76,20 +77,20 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_justin_tv_channelid_matching(self): def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/'))
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/'))
def test_justintv_videoid_matching(self): def test_justintv_videoid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
def test_justin_tv_chapterid_matching(self): def test_justin_tv_chapterid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
def test_youtube_extract(self): def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
@@ -105,7 +106,7 @@ class TestAllURLsMatching(unittest.TestCase):
def test_no_duplicates(self): def test_no_duplicates(self):
ies = gen_extractors() ies = gen_extractors()
for tc in gettestcases(): for tc in gettestcases(include_onlymatching=True):
url = tc['url'] url = tc['url']
for ie in ies: for ie in ies:
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
@@ -153,6 +154,28 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch( self.assertMatch(
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
['ComedyCentralShows']) ['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
['ComedyCentralShows'])
self.assertMatch(
'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
['ComedyCentralShows'])
self.assertMatch(
'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
['ComedyCentralShows'])
def test_yahoo_https(self):
# https://github.com/rg3/youtube-dl/issues/2701
self.assertMatch(
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -43,6 +43,7 @@ from youtube_dl.extractor import (
XTubeUserIE, XTubeUserIE,
InstagramUserIE, InstagramUserIE,
CSpanIE, CSpanIE,
AolIE,
) )
@@ -191,7 +192,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dezhurnyi_angel') self.assertEqual(result['id'], 'dezhurnyi_angel')
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
self.assertTrue(len(result['entries']) >= 36) self.assertTrue(len(result['entries']) >= 23)
def test_ivi_compilation_season(self): def test_ivi_compilation_season(self):
dl = FakeYDL() dl = FakeYDL()
@@ -200,7 +201,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dezhurnyi_angel/season2') self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон') self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20) self.assertTrue(len(result['entries']) >= 7)
def test_imdb_list(self): def test_imdb_list(self):
dl = FakeYDL() dl = FakeYDL()
@@ -324,10 +325,19 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '342759') self.assertEqual(result['id'], '342759')
self.assertEqual( self.assertEqual(
result['title'], 'General Motors Ignition Switch Recall') result['title'], 'General Motors Ignition Switch Recall')
self.assertEqual(len(result['entries']), 9)
whole_duration = sum(e['duration'] for e in result['entries']) whole_duration = sum(e['duration'] for e in result['entries'])
self.assertEqual(whole_duration, 14855) self.assertEqual(whole_duration, 14855)
def test_aol_playlist(self):
dl = FakeYDL()
ie = AolIE(dl)
result = ie.extract(
'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '152147')
self.assertEqual(
result['title'], 'Brace Yourself - Today\'s Weirdest News')
self.assertTrue(len(result['entries']) >= 10)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -181,7 +181,7 @@ class TestTedSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 28) self.assertTrue(len(subtitles.keys()) >= 28)
def test_list_subtitles(self): def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.expect_warning(u'Automatic Captions not supported by this server')

View File

@@ -38,6 +38,7 @@ from youtube_dl.utils import (
xpath_with_ns, xpath_with_ns,
parse_iso8601, parse_iso8601,
strip_jsonp, strip_jsonp,
uppercase_escape,
) )
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
@@ -279,6 +280,9 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}]) self.assertEqual(d, [{"id": "532cb", "x": 3}])
def test_uppercase_escpae(self):
self.assertEqual(uppercase_escape(u''), u'')
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

136
youtube_dl/YoutubeDL.py Normal file → Executable file
View File

@@ -286,6 +286,9 @@ class YoutubeDL(object):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
return self.to_stdout(message, skip_eol, check_quiet=True) return self.to_stdout(message, skip_eol, check_quiet=True)
def _write_string(self, s, out=None):
write_string(s, out=out, encoding=self.params.get('encoding'))
def to_stdout(self, message, skip_eol=False, check_quiet=False): def to_stdout(self, message, skip_eol=False, check_quiet=False):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
if self.params.get('logger'): if self.params.get('logger'):
@@ -295,7 +298,7 @@ class YoutubeDL(object):
terminator = ['\n', ''][skip_eol] terminator = ['\n', ''][skip_eol]
output = message + terminator output = message + terminator
write_string(output, self._screen_file) self._write_string(output, self._screen_file)
def to_stderr(self, message): def to_stderr(self, message):
"""Print message to stderr.""" """Print message to stderr."""
@@ -305,7 +308,7 @@ class YoutubeDL(object):
else: else:
message = self._bidi_workaround(message) message = self._bidi_workaround(message)
output = message + '\n' output = message + '\n'
write_string(output, self._err_file) self._write_string(output, self._err_file)
def to_console_title(self, message): def to_console_title(self, message):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
@@ -315,21 +318,21 @@ class YoutubeDL(object):
# already of type unicode() # already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ: elif 'TERM' in os.environ:
write_string('\033]0;%s\007' % message, self._screen_file) self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self): def save_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if 'TERM' in os.environ: if 'TERM' in os.environ:
# Save the title on stack # Save the title on stack
write_string('\033[22;0t', self._screen_file) self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self): def restore_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if 'TERM' in os.environ: if 'TERM' in os.environ:
# Restore the title from stack # Restore the title from stack
write_string('\033[23;0t', self._screen_file) self._write_string('\033[23;0t', self._screen_file)
def __enter__(self): def __enter__(self):
self.save_console_title() self.save_console_title()
@@ -702,6 +705,11 @@ class YoutubeDL(object):
def process_video_result(self, info_dict, download=True): def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video' assert info_dict.get('_type', 'video') == 'video'
if 'id' not in info_dict:
raise ExtractorError('Missing "id" field in extractor result')
if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result')
if 'playlist' not in info_dict: if 'playlist' not in info_dict:
# It isn't part of a playlist # It isn't part of a playlist
info_dict['playlist'] = None info_dict['playlist'] = None
@@ -733,6 +741,9 @@ class YoutubeDL(object):
# We check that all the formats have the format and format_id fields # We check that all the formats have the format and format_id fields
for i, format in enumerate(formats): for i, format in enumerate(formats):
if 'url' not in format:
raise ExtractorError('Missing "url" key in result (index %d)' % i)
if format.get('format_id') is None: if format.get('format_id') is None:
format['format_id'] = compat_str(i) format['format_id'] = compat_str(i)
if format.get('format') is None: if format.get('format') is None:
@@ -868,7 +879,7 @@ class YoutubeDL(object):
try: try:
dn = os.path.dirname(encodeFilename(filename)) dn = os.path.dirname(encodeFilename(filename))
if dn != '' and not os.path.exists(dn): if dn and not os.path.exists(dn):
os.makedirs(dn) os.makedirs(dn)
except (OSError, IOError) as err: except (OSError, IOError) as err:
self.report_error('unable to create directory ' + compat_str(err)) self.report_error('unable to create directory ' + compat_str(err))
@@ -925,7 +936,7 @@ class YoutubeDL(object):
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub) subfile.write(sub)
except (OSError, IOError): except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + descfn) self.report_error('Cannot write subtitles file ' + sub_filename)
return return
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
@@ -1128,57 +1139,57 @@ class YoutubeDL(object):
res = default res = default
return res return res
def list_formats(self, info_dict): def _format_note(self, fdict):
def format_note(fdict): res = ''
res = '' if fdict.get('ext') in ['f4f', 'f4m']:
if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported) '
res += '(unsupported) ' if fdict.get('format_note') is not None:
if fdict.get('format_note') is not None: res += fdict['format_note'] + ' '
res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None:
if fdict.get('tbr') is not None: res += '%4dk ' % fdict['tbr']
res += '%4dk ' % fdict['tbr'] if fdict.get('container') is not None:
if fdict.get('container') is not None: if res:
if res: res += ', '
res += ', ' res += '%s container' % fdict['container']
res += '%s container' % fdict['container'] if (fdict.get('vcodec') is not None and
if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'):
fdict.get('vcodec') != 'none'): if res:
if res: res += ', '
res += ', ' res += fdict['vcodec']
res += fdict['vcodec']
if fdict.get('vbr') is not None:
res += '@'
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
res += 'video@'
if fdict.get('vbr') is not None: if fdict.get('vbr') is not None:
res += '%4dk' % fdict['vbr'] res += '@'
if fdict.get('acodec') is not None: elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
if res: res += 'video@'
res += ', ' if fdict.get('vbr') is not None:
if fdict['acodec'] == 'none': res += '%4dk' % fdict['vbr']
res += 'video only' if fdict.get('acodec') is not None:
else: if res:
res += '%-5s' % fdict['acodec'] res += ', '
elif fdict.get('abr') is not None: if fdict['acodec'] == 'none':
if res: res += 'video only'
res += ', ' else:
res += 'audio' res += '%-5s' % fdict['acodec']
if fdict.get('abr') is not None: elif fdict.get('abr') is not None:
res += '@%3dk' % fdict['abr'] if res:
if fdict.get('asr') is not None: res += ', '
res += ' (%5dHz)' % fdict['asr'] res += 'audio'
if fdict.get('filesize') is not None: if fdict.get('abr') is not None:
if res: res += '@%3dk' % fdict['abr']
res += ', ' if fdict.get('asr') is not None:
res += format_bytes(fdict['filesize']) res += ' (%5dHz)' % fdict['asr']
return res if fdict.get('filesize') is not None:
if res:
res += ', '
res += format_bytes(fdict['filesize'])
return res
def list_formats(self, info_dict):
def line(format, idlen=20): def line(format, idlen=20):
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
format['format_id'], format['format_id'],
format['ext'], format['ext'],
self.format_resolution(format), self.format_resolution(format),
format_note(format), self._format_note(format),
)) ))
formats = info_dict.get('formats', [info_dict]) formats = info_dict.get('formats', [info_dict])
@@ -1186,8 +1197,8 @@ class YoutubeDL(object):
max(len(f['format_id']) for f in formats)) max(len(f['format_id']) for f in formats))
formats_s = [line(f, idlen) for f in formats] formats_s = [line(f, idlen) for f in formats]
if len(formats) > 1: if len(formats) > 1:
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)' formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)' formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
header_line = line({ header_line = line({
'format_id': 'format code', 'ext': 'extension', 'format_id': 'format code', 'ext': 'extension',
@@ -1203,9 +1214,16 @@ class YoutubeDL(object):
if not self.params.get('verbose'): if not self.params.get('verbose'):
return return
write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % write_string(
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding())) '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
write_string('[debug] youtube-dl version ' + __version__ + '\n') locale.getpreferredencoding(),
sys.getfilesystemencoding(),
sys.stdout.encoding,
self.get_encoding()),
encoding=None
)
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
try: try:
sp = subprocess.Popen( sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'], ['git', 'rev-parse', '--short', 'HEAD'],
@@ -1214,20 +1232,20 @@ class YoutubeDL(object):
out, err = sp.communicate() out, err = sp.communicate()
out = out.decode().strip() out = out.decode().strip()
if re.match('[0-9a-f]+', out): if re.match('[0-9a-f]+', out):
write_string('[debug] Git HEAD: ' + out + '\n') self._write_string('[debug] Git HEAD: ' + out + '\n')
except: except:
try: try:
sys.exc_clear() sys.exc_clear()
except: except:
pass pass
write_string('[debug] Python version %s - %s' % self._write_string('[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + '\n') (platform.python_version(), platform_name()) + '\n')
proxy_map = {} proxy_map = {}
for handler in self._opener.handlers: for handler in self._opener.handlers:
if hasattr(handler, 'proxies'): if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies) proxy_map.update(handler.proxies)
write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
def _setup_opener(self): def _setup_opener(self):
timeout_val = self.params.get('socket_timeout') timeout_val = self.params.get('socket_timeout')

View File

@@ -52,6 +52,7 @@ __authors__ = (
'Juan C. Olivares', 'Juan C. Olivares',
'Mattias Harrysson', 'Mattias Harrysson',
'phaer', 'phaer',
'Sainyam Kapoor',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@@ -91,6 +92,8 @@ from .extractor import gen_extractors
from .version import __version__ from .version import __version__
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
from .postprocessor import ( from .postprocessor import (
AtomicParsleyPP,
FFmpegAudioFixPP,
FFmpegMetadataPP, FFmpegMetadataPP,
FFmpegVideoConvertor, FFmpegVideoConvertor,
FFmpegExtractAudioPP, FFmpegExtractAudioPP,
@@ -242,7 +245,7 @@ def parseOpts(overrideArguments=None):
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option( general.add_option(
'--prefer-insecure', action='store_true', dest='prefer_insecure', '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
general.add_option( general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@@ -502,6 +505,8 @@ def parseOpts(overrideArguments=None):
help='do not overwrite post-processed files; the post-processed files are overwritten by default') help='do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)') help='embed subtitles in the video (only for mp4 videos)')
postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
help='embed thumbnail in the audio as cover art')
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
help='write metadata to the video file') help='write metadata to the video file')
postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False, postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
@@ -807,6 +812,10 @@ def _real_main(argv=None):
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
if opts.xattrs: if opts.xattrs:
ydl.add_post_processor(XAttrMetadataPP()) ydl.add_post_processor(XAttrMetadataPP())
if opts.embedthumbnail:
if not opts.addmetadata:
ydl.add_post_processor(FFmpegAudioFixPP())
ydl.add_post_processor(AtomicParsleyPP())
# Update version # Update version
if opts.update_self: if opts.update_self:

View File

@@ -4,9 +4,10 @@ import sys
import time import time
from ..utils import ( from ..utils import (
compat_str,
encodeFilename, encodeFilename,
timeconvert,
format_bytes, format_bytes,
timeconvert,
) )
@@ -173,7 +174,7 @@ class FileDownloader(object):
return return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err: except (IOError, OSError) as err:
self.report_error(u'unable to rename file: %s' % str(err)) self.report_error(u'unable to rename file: %s' % compat_str(err))
def try_utime(self, filename, last_modified_hdr): def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file.""" """Try to set the last-modified time of the given file."""

View File

@@ -297,6 +297,7 @@ class F4mFD(FileDownloader):
break break
frags_filenames.append(frag_filename) frags_filenames.append(frag_filename)
dest_stream.close()
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)

View File

@@ -14,6 +14,8 @@ from ..utils import (
class HttpFD(FileDownloader): class HttpFD(FileDownloader):
_TEST_FILE_SIZE = 10241
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
url = info_dict['url'] url = info_dict['url']
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
@@ -28,8 +30,10 @@ class HttpFD(FileDownloader):
basic_request = compat_urllib_request.Request(url, None, headers) basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers) request = compat_urllib_request.Request(url, None, headers)
if self.params.get('test', False): is_test = self.params.get('test', False)
request.add_header('Range', 'bytes=0-10240')
if is_test:
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)): if os.path.isfile(encodeFilename(tmpfilename)):
@@ -100,6 +104,15 @@ class HttpFD(FileDownloader):
return False return False
data_len = data.info().get('Content-length', None) data_len = data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and data_len > self._TEST_FILE_SIZE:
data_len = self._TEST_FILE_SIZE
if data_len is not None: if data_len is not None:
data_len = int(data_len) + resume_len data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None) min_data_len = self.params.get("min_filesize", None)
@@ -118,7 +131,7 @@ class HttpFD(FileDownloader):
while True: while True:
# Download and write # Download and write
before = time.time() before = time.time()
data_block = data.read(block_size) data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
after = time.time() after = time.time()
if len(data_block) == 0: if len(data_block) == 0:
break break
@@ -162,6 +175,9 @@ class HttpFD(FileDownloader):
'speed': speed, 'speed': speed,
}) })
if is_test and byte_counter == data_len:
break
# Apply rate limit # Apply rate limit
self.slow_down(start, byte_counter - resume_len) self.slow_down(start, byte_counter - resume_len)

View File

@@ -20,6 +20,7 @@ from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE from .bbccouk import BBCCoUkIE
from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
@@ -32,6 +33,7 @@ from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cbs import CBSIE from .cbs import CBSIE
from .cbsnews import CBSNewsIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE from .chilloutzone import ChilloutzoneIE
@@ -39,7 +41,9 @@ from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE from .clipsyndicate import ClipsyndicateIE
from .clubic import ClubicIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnet import CNETIE
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNBlogsIE, CNNBlogsIE,
@@ -61,6 +65,7 @@ from .dotsub import DotsubIE
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
from .dropbox import DropboxIE from .dropbox import DropboxIE
from .ebaumsworld import EbaumsWorldIE from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE from .ehow import EHowIE
@@ -153,6 +158,10 @@ from .mixcloud import MixcloudIE
from .mpora import MporaIE from .mpora import MporaIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mooshare import MooshareIE from .mooshare import MooshareIE
from .morningstar import MorningstarIE
from .motorsport import MotorsportIE
from .moviezine import MoviezineIE
from .movshare import MovShareIE
from .mtv import ( from .mtv import (
MTVIE, MTVIE,
MTVIggyIE, MTVIggyIE,
@@ -175,10 +184,12 @@ from .nfb import NFBIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE from .niconico import NiconicoIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .noco import NocoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
from .novamov import NovaMovIE from .novamov import NovaMovIE
from .nowness import NownessIE from .nowness import NownessIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .nrk import NRKIE
from .ntv import NTVIE from .ntv import NTVIE
from .oe1 import OE1IE from .oe1 import OE1IE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
@@ -200,8 +211,10 @@ from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtbf import RTBFIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rts import RTSIE from .rts import RTSIE
from .rtve import RTVEALaCartaIE
from .rutube import ( from .rutube import (
RutubeIE, RutubeIE,
RutubeChannelIE, RutubeChannelIE,
@@ -210,6 +223,7 @@ from .rutube import (
) )
from .rutv import RUTVIE from .rutv import RUTVIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .scivee import SciVeeIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sina import SinaIE from .sina import SinaIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
@@ -244,6 +258,7 @@ from .tf1 import TF1IE
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE
from .toutv import TouTvIE from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
@@ -273,6 +288,7 @@ from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .videoweed import VideoWeedIE
from .vimeo import ( from .vimeo import (
VimeoIE, VimeoIE,
VimeoChannelIE, VimeoChannelIE,
@@ -280,11 +296,13 @@ from .vimeo import (
VimeoAlbumIE, VimeoAlbumIE,
VimeoGroupsIE, VimeoGroupsIE,
VimeoReviewIE, VimeoReviewIE,
VimeoWatchLaterIE,
) )
from .vine import VineIE from .vine import VineIE
from .viki import VikiIE from .viki import VikiIE
from .vk import VKIE from .vk import VKIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE
from .washingtonpost import WashingtonPostIE from .washingtonpost import WashingtonPostIE
from .wat import WatIE from .wat import WatIE
from .wdr import ( from .wdr import (

View File

@@ -8,7 +8,18 @@ from .fivemin import FiveMinIE
class AolIE(InfoExtractor): class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com' IE_NAME = 'on.aol.com'
_VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)' _VALID_URL = r'''(?x)
(?:
aol-video:|
http://on\.aol\.com/
(?:
video/.*-|
playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
)
)
(?P<id>[0-9]+)
(?:$|\?)
'''
_TEST = { _TEST = {
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -24,5 +35,31 @@ class AolIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
self.to_screen('Downloading 5min.com video %s' % video_id)
playlist_id = mobj.group('playlist_id')
if playlist_id and not self._downloader.params.get('noplaylist'):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
playlist_html = self._search_regex(
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
'playlist HTML')
entries = [{
'_type': 'url',
'url': 'aol-video:%s' % m.group('id'),
'ie_key': 'Aol',
} for m in re.finditer(
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
playlist_html)]
return {
'_type': 'playlist',
'id': playlist_id,
'display_id': mobj.group('playlist_display_id'),
'title': title,
'entries': entries,
}
return FiveMinIE._build_result(video_id) return FiveMinIE._build_result(video_id)

View File

@@ -74,7 +74,8 @@ class ArteTVPlus7IE(InfoExtractor):
return self._extract_from_webpage(webpage, video_id, lang) return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang): def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') json_url = self._html_search_regex(
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
return self._extract_from_json_url(json_url, video_id, lang) return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang): def _extract_from_json_url(self, json_url, video_id, lang):

View File

@@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
ExtractorError,
int_or_none,
unified_strdate,
)
class BiliBiliIE(InfoExtractor):
_VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
_TEST = {
'url': 'http://www.bilibili.tv/video/av1074402/',
'md5': '2c301e4dab317596e837c3e7633e7d86',
'info_dict': {
'id': '1074402',
'ext': 'flv',
'title': '【金坷垃】金泡沫',
'duration': 308,
'upload_date': '20140420',
'thumbnail': 're:^https?://.+\.jpg',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_code = self._search_regex(
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
title = self._html_search_meta(
'media:title', video_code, 'title', fatal=True)
duration_str = self._html_search_meta(
'duration', video_code, 'duration')
if duration_str is None:
duration = None
else:
duration_mobj = re.match(
r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
duration_str)
duration = (
int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
int(duration_mobj.group('minutes')) * 60 +
int(duration_mobj.group('seconds')))
upload_date = unified_strdate(self._html_search_meta(
'uploadDate', video_code, fatal=False))
thumbnail = self._html_search_meta(
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
player_params = compat_parse_qs(self._html_search_regex(
r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
webpage, 'player params'))
if 'cid' in player_params:
cid = player_params['cid'][0]
lq_doc = self._download_xml(
'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
video_id,
note='Downloading LQ video info'
)
lq_durl = lq_doc.find('.//durl')
formats = [{
'format_id': 'lq',
'quality': 1,
'url': lq_durl.find('./url').text,
'filesize': int_or_none(
lq_durl.find('./size'), get_attr='text'),
}]
hq_doc = self._download_xml(
'http://interface.bilibili.cn/playurl?cid=%s' % cid,
video_id,
note='Downloading HQ video info',
fatal=False,
)
if hq_doc is not False:
hq_durl = hq_doc.find('.//durl')
formats.append({
'format_id': 'hq',
'quality': 2,
'ext': 'flv',
'url': hq_durl.find('./url').text,
'filesize': int_or_none(
hq_durl.find('./size'), get_attr='text'),
})
else:
raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'duration': duration,
'upload_date': upload_date,
'thumbnail': thumbnail,
}

View File

@@ -4,39 +4,72 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import (
ExtractorError,
int_or_none,
)
class BRIE(InfoExtractor): class BRIE(InfoExtractor):
IE_DESC = "Bayerischer Rundfunk Mediathek" IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$" _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html'
_BASE_URL = "http://www.br.de" _BASE_URL = 'http://www.br.de'
_TESTS = [ _TESTS = [
{ {
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html", 'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
"md5": "c4f83cf0f023ba5875aba0bf46860df2", 'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
"info_dict": { 'info_dict': {
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532", 'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
"ext": "mp4", 'ext': 'mp4',
"title": "Feiern und Verzichten", 'title': 'Feiern und Verzichten',
"description": "Anselm Grün: Feiern und Verzichten", 'description': 'Anselm Grün: Feiern und Verzichten',
"uploader": "BR/Birgit Baier", 'uploader': 'BR/Birgit Baier',
"upload_date": "20140301" 'upload_date': '20140301',
} }
}, },
{ {
"url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html", 'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
"md5": "ab451b09d861dbed7d7cc9ab0be19ebe", 'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
"info_dict": { 'info_dict': {
"id": "2c060e69-3a27-4e13-b0f0-668fac17d812", 'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
"ext": "mp4", 'ext': 'mp4',
"title": "Über den Pass", 'title': 'Über den Pass',
"description": "Die Eroberung der Alpen: Über den Pass", 'description': 'Die Eroberung der Alpen: Über den Pass',
"uploader": None,
"upload_date": None
} }
} },
{
'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
'info_dict': {
'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
'ext': 'aac',
'title': '"Keine neuen Schulden im nächsten Jahr"',
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
}
},
{
'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
'info_dict': {
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer',
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
}
},
{
'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
'md5': '23bca295f1650d698f94fc570977dae3',
'info_dict': {
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
'ext': 'mp4',
'title': 'Folge 1 - Metaphysik',
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
}
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@@ -44,56 +77,63 @@ class BRIE(InfoExtractor):
display_id = mobj.group('id') display_id = mobj.group('id')
page = self._download_webpage(url, display_id) page = self._download_webpage(url, display_id)
xml_url = self._search_regex( xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
xml = self._download_xml(self._BASE_URL + xml_url, None) xml = self._download_xml(self._BASE_URL + xml_url, None)
videos = [] medias = []
for xml_video in xml.findall("video"):
video = {
"id": xml_video.get("externalId"),
"title": xml_video.find("title").text,
"formats": self._extract_formats(xml_video.find("assets")),
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
"webpage_url": xml_video.find("permalink").text
}
if xml_video.find("author").text:
video["uploader"] = xml_video.find("author").text
if xml_video.find("broadcastDate").text:
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
videos.append(video)
if len(videos) > 1: for xml_media in xml.findall('video') + xml.findall('audio'):
media = {
'id': xml_media.get('externalId'),
'title': xml_media.find('title').text,
'formats': self._extract_formats(xml_media.find('assets')),
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
'webpage_url': xml_media.find('permalink').text
}
if xml_media.find('author').text:
media['uploader'] = xml_media.find('author').text
if xml_media.find('broadcastDate').text:
media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
medias.append(media)
if len(medias) > 1:
self._downloader.report_warning( self._downloader.report_warning(
'found multiple videos; please ' 'found multiple medias; please '
'report this with the video URL to http://yt-dl.org/bug') 'report this with the video URL to http://yt-dl.org/bug')
if not videos: if not medias:
raise ExtractorError('No video entries found') raise ExtractorError('No media entries found')
return videos[0] return medias[0]
def _extract_formats(self, assets): def _extract_formats(self, assets):
def text_or_none(asset, tag):
elem = asset.find(tag)
return None if elem is None else elem.text
formats = [{ formats = [{
"url": asset.find("downloadUrl").text, 'url': text_or_none(asset, 'downloadUrl'),
"ext": asset.find("mediaType").text, 'ext': text_or_none(asset, 'mediaType'),
"format_id": asset.get("type"), 'format_id': asset.get('type'),
"width": int(asset.find("frameWidth").text), 'width': int_or_none(text_or_none(asset, 'frameWidth')),
"height": int(asset.find("frameHeight").text), 'height': int_or_none(text_or_none(asset, 'frameHeight')),
"tbr": int(asset.find("bitrateVideo").text), 'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
"abr": int(asset.find("bitrateAudio").text), 'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
"vcodec": asset.find("codecVideo").text, 'vcodec': text_or_none(asset, 'codecVideo'),
"container": asset.find("mediaType").text, 'acodec': text_or_none(asset, 'codecAudio'),
"filesize": int(asset.find("size").text), 'container': text_or_none(asset, 'mediaType'),
} for asset in assets.findall("asset") 'filesize': int_or_none(text_or_none(asset, 'size')),
if asset.find("downloadUrl") is not None] } for asset in assets.findall('asset')
if asset.find('downloadUrl') is not None]
self._sort_formats(formats) self._sort_formats(formats)
return formats return formats
def _extract_thumbnails(self, variants): def _extract_thumbnails(self, variants):
thumbnails = [{ thumbnails = [{
"url": self._BASE_URL + variant.find("url").text, 'url': self._BASE_URL + variant.find('url').text,
"width": int(variant.find("width").text), 'width': int_or_none(variant.find('width').text),
"height": int(variant.find("height").text), 'height': int_or_none(variant.find('height').text),
} for variant in variants.findall("variant")] } for variant in variants.findall('variant')]
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True) thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails return thumbnails

View File

@@ -27,9 +27,10 @@ class BreakIE(InfoExtractor):
webpage, 'info json', flags=re.DOTALL) webpage, 'info json', flags=re.DOTALL)
info = json.loads(info_json) info = json.loads(info_json)
video_url = info['videoUri'] video_url = info['videoUri']
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) youtube_id = info.get('youtubeId')
if m_youtube is not None: if youtube_id:
return self.url_result(m_youtube.group(1), 'Youtube') return self.url_result(youtube_id, 'Youtube')
final_url = video_url + '?' + info['AuthToken'] final_url = video_url + '?' + info['AuthToken']
return { return {
'id': video_id, 'id': video_id,

View File

@@ -140,7 +140,11 @@ class BrightcoveIE(InfoExtractor):
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m: if url_m:
return [unescapeHTML(url_m.group(1))] url = unescapeHTML(url_m.group(1))
# Some sites don't add it, we can't download with this url, for example:
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
if 'playerKey' in url:
return [url]
matches = re.findall( matches = re.findall(
r'''(?sx)<object r'''(?sx)<object

View File

@@ -4,9 +4,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import ExtractorError
ExtractorError,
)
class BYUtvIE(InfoExtractor): class BYUtvIE(InfoExtractor):
@@ -16,7 +14,7 @@ class BYUtvIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'granite-flats-talking', 'id': 'granite-flats-talking',
'ext': 'mp4', 'ext': 'mp4',
'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f', 'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
'title': 'Talking', 'title': 'Talking',
'thumbnail': 're:^https?://.*promo.*' 'thumbnail': 're:^https?://.*promo.*'
}, },

View File

@@ -2,39 +2,46 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
class C56IE(InfoExtractor): class C56IE(InfoExtractor):
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)' _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
IE_NAME = '56.com' IE_NAME = '56.com'
_TEST = { _TEST = {
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
'file': '93440716.flv',
'md5': 'e59995ac63d0457783ea05f93f12a866', 'md5': 'e59995ac63d0457783ea05f93f12a866',
'info_dict': { 'info_dict': {
'id': '93440716',
'ext': 'flv',
'title': '网事知多少 第32期车怒', 'title': '网事知多少 第32期车怒',
'duration': 283.813,
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid') text_id = mobj.group('textid')
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
text_id, 'Downloading video info') page = self._download_json(
info = json.loads(info_page)['info'] 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
formats = [{
'format_id': f['type'], info = page['info']
'filesize': int(f['filesize']),
'url': f['url'] formats = [
} for f in info['rfiles']] {
'format_id': f['type'],
'filesize': int(f['filesize']),
'url': f['url']
} for f in info['rfiles']
]
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': info['vid'], 'id': info['vid'],
'title': info['Subject'], 'title': info['Subject'],
'duration': int(info['duration']) / 1000.0,
'formats': formats, 'formats': formats,
'thumbnail': info.get('bimg') or info.get('img'), 'thumbnail': info.get('bimg') or info.get('img'),
} }

View File

@@ -1,4 +1,6 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@@ -8,46 +10,56 @@ from ..utils import unified_strdate
class CanalplusIE(InfoExtractor): class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
IE_NAME = u'canalplus.fr' IE_NAME = 'canalplus.fr'
_TEST = { _TEST = {
u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', 'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
u'file': u'922470.flv', 'md5': '60c29434a416a83c15dae2587d47027d',
u'info_dict': { 'info_dict': {
u'title': u'Zapping - 26/08/13', 'id': '922470',
u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', 'ext': 'flv',
u'upload_date': u'20130826', 'title': 'Zapping - 26/08/13',
}, 'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
u'params': { 'upload_date': '20130826',
u'skip_download': True,
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.groupdict().get('id') video_id = mobj.group('id')
if video_id is None: if video_id is None:
webpage = self._download_webpage(url, mobj.group('path')) webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id') video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, 'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id info_url = self._VIDEO_INFO_TEMPLATE % video_id
doc = self._download_xml(info_url,video_id, doc = self._download_xml(info_url, video_id, 'Downloading video XML')
u'Downloading video info')
self.report_extraction(video_id)
video_info = [video for video in doc if video.find('ID').text == video_id][0] video_info = [video for video in doc if video.find('ID').text == video_id][0]
infos = video_info.find('INFOS')
media = video_info.find('MEDIA') media = video_info.find('MEDIA')
formats = [media.find('VIDEOS/%s' % format) infos = video_info.find('INFOS')
for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
video_url = [format.text for format in formats if format is not None][-1]
return {'id': video_id, preferences = ['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']
'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text,
infos.find('TITRAGE/SOUS_TITRE').text), formats = [
'url': video_url, {
'ext': 'flv', 'url': fmt.text + '?hdcore=2.11.3' if fmt.tag == 'HDS' else fmt.text,
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), 'format_id': fmt.tag,
'thumbnail': media.find('IMAGES/GRAND').text, 'ext': 'mp4' if fmt.tag == 'HLS' else 'flv',
'description': infos.find('DESCRIPTION').text, 'preference': preferences.index(fmt.tag) if fmt.tag in preferences else -1,
'view_count': int(infos.find('NB_VUES').text), } for fmt in media.find('VIDEOS') if fmt.text
} ]
self._sort_formats(formats)
return {
'id': video_id,
'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text,
infos.find('TITRAGE/SOUS_TITRE').text),
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
'thumbnail': media.find('IMAGES/GRAND').text,
'description': infos.find('DESCRIPTION').text,
'view_count': int(infos.find('NB_VUES').text),
'like_count': int(infos.find('NB_LIKES').text),
'comment_count': int(infos.find('NB_COMMENTS').text),
'formats': formats,
}

View File

@@ -0,0 +1,87 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class CBSNewsIE(InfoExtractor):
IE_DESC = 'CBS News'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
_TESTS = [
{
'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
'info_dict': {
'id': 'tesla-and-spacex-elon-musks-industrial-empire',
'ext': 'flv',
'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
'duration': 791,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
'info_dict': {
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
'ext': 'flv',
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg',
'duration': 205,
},
'params': {
# rtmp download
'skip_download': True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_info = json.loads(self._html_search_regex(
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
webpage, 'video JSON info'))
item = video_info['item'] if 'item' in video_info else video_info
title = item.get('articleTitle') or item.get('hed')
duration = item.get('duration')
thumbnail = item.get('mediaImage') or item.get('thumbnail')
formats = []
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
uri = item.get('media' + format_id + 'URI')
if not uri:
continue
fmt = {
'url': uri,
'format_id': format_id,
}
if uri.startswith('rtmp'):
fmt.update({
'app': 'ondemand?auth=cbs',
'play_path': 'mp4:' + uri.split('<break>')[-1],
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
'page_url': 'http://www.cbsnews.com',
'ext': 'flv',
})
elif uri.endswith('.m3u8'):
fmt['ext'] = 'mp4'
formats.append(fmt)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
qualities,
)
class ClubicIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
'md5': '1592b694ba586036efac1776b0b43cd3',
'info_dict': {
'id': '448474',
'ext': 'mp4',
'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
player_page = self._download_webpage(player_url, video_id)
config_json = self._search_regex(
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
'configuration')
config = json.loads(config_json)
video_info = config['videoInfo']
sources = config['sources']
quality_order = qualities(['sd', 'hq'])
formats = [{
'format_id': src['streamQuality'],
'url': src['src'],
'quality': quality_order(src['streamQuality']),
} for src in sources]
self._sort_formats(formats)
return {
'id': video_id,
'title': video_info['title'],
'formats': formats,
'description': clean_html(video_info.get('description')),
'thumbnail': config.get('poster'),
}

View File

@@ -0,0 +1,75 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
)
class CNETIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
_TEST = {
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'md5': '041233212a0d06b179c87cbcca1577b8',
'info_dict': {
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
'ext': 'mp4',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
'uploader': 'Sarah Mitroff',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
webpage, 'data json')
data = json.loads(data_json)
vdata = data['video']
if not vdata:
vdata = data['videos'][0]
if not vdata:
raise ExtractorError('Cannot find video data')
video_id = vdata['id']
title = vdata['headline']
description = vdata.get('dek')
thumbnail = vdata.get('image', {}).get('path')
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
uploader_id = author.get('email')
else:
uploader = None
uploader_id = None
formats = [{
'format_id': '%s-%s-%s' % (
f['type'], f['format'],
int_or_none(f.get('bitrate'), 1000, default='')),
'url': f['uri'],
'tbr': int_or_none(f.get('bitrate'), 1000),
} for f in vdata['files']['data']]
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'description': description,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}

View File

@@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', 'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': '4167875aae411f903b751a21f357f1ee', 'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'info_dict': { 'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
'ext': 'mp4', 'ext': 'mp4',
@@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor):
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|https?://(:www\.)? |https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
(full-episodes/(?P<episode>.*)| ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip> (?P<clip>
(?:videos/[^/]+/(?P<videotitle>[^/?#]+)) (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)| )|

View File

@@ -251,7 +251,10 @@ class InfoExtractor(object):
with open(filename, 'wb') as outf: with open(filename, 'wb') as outf:
outf.write(webpage_bytes) outf.write(webpage_bytes)
content = webpage_bytes.decode(encoding, 'replace') try:
content = webpage_bytes.decode(encoding, 'replace')
except LookupError:
content = webpage_bytes.decode('utf-8', 'replace')
if (u'<title>Access to this site is blocked</title>' in content and if (u'<title>Access to this site is blocked</title>' in content and
u'Websense' in content[:512]): u'Websense' in content[:512]):
@@ -276,9 +279,12 @@ class InfoExtractor(object):
def _download_xml(self, url_or_request, video_id, def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML', note=u'Downloading XML', errnote=u'Unable to download XML',
transform_source=None): transform_source=None, fatal=True):
"""Return the xml as an xml.etree.ElementTree.Element""" """Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(url_or_request, video_id, note, errnote) xml_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal)
if xml_string is False:
return xml_string
if transform_source: if transform_source:
xml_string = transform_source(xml_string) xml_string = transform_source(xml_string)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))

View File

@@ -28,16 +28,18 @@ class CondeNastIE(InfoExtractor):
'glamour': 'Glamour', 'glamour': 'Glamour',
'wmagazine': 'W Magazine', 'wmagazine': 'W Magazine',
'vanityfair': 'Vanity Fair', 'vanityfair': 'Vanity Fair',
'cnevids': 'Condé Nast',
} }
_VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys()) _VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
_TEST = { _TEST = {
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
'file': '5171b343c2b4c00dd0c1ccb3.mp4',
'md5': '1921f713ed48aabd715691f774c451f7', 'md5': '1921f713ed48aabd715691f774c451f7',
'info_dict': { 'info_dict': {
'id': '5171b343c2b4c00dd0c1ccb3',
'ext': 'mp4',
'title': '3D Printed Speakers Lit With LED', 'title': '3D Printed Speakers Lit With LED',
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
} }
@@ -55,12 +57,16 @@ class CondeNastIE(InfoExtractor):
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title) return self.playlist_result(entries, playlist_title=title)
def _extract_video(self, webpage): def _extract_video(self, webpage, url_type):
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>', if url_type != 'embed':
r'<div class="video-post-content">(.+?)</div>', description = self._html_search_regex(
], [
webpage, 'description', r'<div class="cne-video-description">(.+?)</div>',
fatal=False, flags=re.DOTALL) r'<div class="video-post-content">(.+?)</div>',
],
webpage, 'description', fatal=False, flags=re.DOTALL)
else:
description = None
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
'player params', flags=re.DOTALL) 'player params', flags=re.DOTALL)
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
@@ -99,12 +105,12 @@ class CondeNastIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
site = mobj.group('site') site = mobj.group('site')
url_type = mobj.group('type') url_type = mobj.group('type')
id = mobj.group('id') item_id = mobj.group('id')
self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site]) self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, item_id)
if url_type == 'series': if url_type == 'series':
return self._extract_series(url, webpage) return self._extract_series(url, webpage)
else: else:
return self._extract_video(webpage) return self._extract_video(webpage, url_type)

View File

@@ -8,13 +8,11 @@ from .subtitles import SubtitlesInfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_request, compat_urllib_request,
compat_str, compat_str,
get_element_by_attribute,
get_element_by_id,
orderedSet, orderedSet,
str_to_int, str_to_int,
int_or_none, int_or_none,
ExtractorError, ExtractorError,
unescapeHTML,
) )
class DailymotionBaseInfoExtractor(InfoExtractor): class DailymotionBaseInfoExtractor(InfoExtractor):
@@ -180,7 +178,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = u'dailymotion:playlist' IE_NAME = u'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
def _extract_entries(self, id): def _extract_entries(self, id):
@@ -190,10 +188,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
webpage = self._download_webpage(request, webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum) id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage) video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break break
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in orderedSet(video_ids)] for video_id in orderedSet(video_ids)]
@@ -203,26 +200,26 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
return {'_type': 'playlist', return {
'id': playlist_id, '_type': 'playlist',
'title': get_element_by_id(u'playlist_name', webpage), 'id': playlist_id,
'entries': self._extract_entries(playlist_id), 'title': self._og_search_title(webpage),
} 'entries': self._extract_entries(playlist_id),
}
class DailymotionUserIE(DailymotionPlaylistIE): class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = u'dailymotion:user' IE_NAME = u'dailymotion:user'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
user = mobj.group('user') user = mobj.group('user')
webpage = self._download_webpage(url, user) webpage = self._download_webpage(url, user)
full_user = self._html_search_regex( full_user = unescapeHTML(self._html_search_regex(
r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user), r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
webpage, u'user', flags=re.DOTALL) webpage, u'user', flags=re.DOTALL))
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@@ -0,0 +1,27 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class DivxStageIE(NovaMovIE):
IE_NAME = 'divxstage'
IE_DESC = 'DivxStage'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
_HOST = 'www.divxstage.eu'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
_DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
'md5': '63969f6eb26533a1968c4d325be63e72',
'info_dict': {
'id': '57f238e2e5e01',
'ext': 'flv',
'title': 'youtubedl test video',
'description': 'This is a test video for youtubedl.',
}
}

View File

@@ -1,4 +1,5 @@
import os from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@@ -8,18 +9,23 @@ from ..utils import (
compat_urllib_parse, compat_urllib_parse,
) )
class ExtremeTubeIE(InfoExtractor): class ExtremeTubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_TEST = { _TESTS = [{
u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
u'file': u'652431.mp4', 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0', 'info_dict': {
u'info_dict': { 'id': '652431',
u"title": u"Music Video 14 british euro brit european cumshots swallow", 'ext': 'mp4',
u"uploader": u"unknown", 'title': 'Music Video 14 british euro brit european cumshots swallow',
u"age_limit": 18, 'uploader': 'unknown',
'age_limit': 18,
} }
} }, {
'url': 'http://www.extremetube.com/gay/video/abcde-1234',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -30,11 +36,14 @@ class ExtremeTubeIE(InfoExtractor):
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') video_title = self._html_search_regex(
uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url')) uploader = self._html_search_regex(
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
fatal=False)
video_url = compat_urllib_parse.unquote(self._html_search_regex(
r'video_url=(.+?)&amp;', webpage, 'video_url'))
path = compat_urllib_parse_urlparse(video_url).path path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2] format = path.split('/')[5].split('_')[:2]
format = "-".join(format) format = "-".join(format)
@@ -43,7 +52,6 @@ class ExtremeTubeIE(InfoExtractor):
'title': video_title, 'title': video_title,
'uploader': uploader, 'uploader': uploader,
'url': video_url, 'url': video_url,
'ext': extension,
'format': format, 'format': format,
'format_id': format, 'format_id': format,
'age_limit': 18, 'age_limit': 18,

View File

@@ -76,9 +76,8 @@ class FacebookIE(InfoExtractor):
check_form = { check_form = {
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'), 'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
'name_action_selected': 'dont_save', 'name_action_selected': 'dont_save',
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
} }
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')

View File

@@ -6,7 +6,6 @@ from .common import InfoExtractor
class FirstpostIE(InfoExtractor): class FirstpostIE(InfoExtractor):
IE_NAME = 'Firstpost.com'
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
@@ -16,7 +15,6 @@ class FirstpostIE(InfoExtractor):
'id': '1025403', 'id': '1025403',
'ext': 'mp4', 'ext': 'mp4',
'title': 'India to launch indigenous aircraft carrier INS Vikrant today', 'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
} }
} }
@@ -24,15 +22,26 @@ class FirstpostIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) data = self._download_xml(
video_url = self._html_search_regex( 'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
r'<div.*?name="div_video".*?flashvars="([^"]+)">', 'Downloading video XML')
webpage, 'video URL')
item = data.find('./playlist/item')
thumbnail = item.find('./image').text
title = item.find('./title').text
formats = [
{
'url': details.find('./file').text,
'format_id': details.find('./label').text.strip(),
'width': int(details.find('./width').text.strip()),
'height': int(details.find('./height').text.strip()),
} for details in item.findall('./source/file_details') if details.find('./file').text
]
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'title': title,
'title': self._og_search_title(webpage), 'thumbnail': thumbnail,
'description': self._og_search_description(webpage), 'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage),
} }

View File

@@ -5,6 +5,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str, compat_str,
compat_urllib_parse,
ExtractorError,
) )
@@ -16,16 +18,28 @@ class FiveMinIE(InfoExtractor):
(?P<id>\d+) (?P<id>\d+)
''' '''
_TEST = { _TESTS = [
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/ {
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791', # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
'md5': '4f7b0b79bf1a470e5004f7112385941d', 'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
'info_dict': { 'md5': '4f7b0b79bf1a470e5004f7112385941d',
'id': '518013791', 'info_dict': {
'ext': 'mp4', 'id': '518013791',
'title': 'iPad Mini with Retina Display Review', 'ext': 'mp4',
'title': 'iPad Mini with Retina Display Review',
},
}, },
} {
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
'url': '5min:518086247',
'md5': 'e539a9dd682c288ef5a498898009f69e',
'info_dict': {
'id': '518086247',
'ext': 'mp4',
'title': 'How to Make a Next-Level Fruit Salad',
},
},
]
@classmethod @classmethod
def _build_result(cls, video_id): def _build_result(cls, video_id):
@@ -34,10 +48,28 @@ class FiveMinIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
info = self._download_json( embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&' embed_page = self._download_webpage(embed_url, video_id,
'playlist=%s&url=https' % video_id, 'Downloading embed page')
video_id)['binding'][0] sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
query = compat_urllib_parse.urlencode({
'func': 'GetResults',
'playlist': video_id,
'sid': sid,
'isPlayerSeed': 'true',
'url': embed_url,
})
response = self._download_json(
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
video_id)
if not response['success']:
err_msg = response['errorMessage']
if err_msg == 'ErrorVideoUserNotGeo':
msg = 'Video not available from your location'
else:
msg = 'Aol said: %s' % err_msg
raise ExtractorError(msg, expected=True, video_id=video_id)
info = response['binding'][0]
second_id = compat_str(int(video_id[:-2]) + 1) second_id = compat_str(int(video_id[:-2]) + 1)
formats = [] formats = []

View File

@@ -35,9 +35,10 @@ class GenericIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
'file': '13601338388002.mp4', 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
'md5': '6e15c93721d7ec9e9ca3fdbf07982cfd',
'info_dict': { 'info_dict': {
'id': '13601338388002',
'ext': 'mp4',
'uploader': 'www.hodiho.fr', 'uploader': 'www.hodiho.fr',
'title': 'R\u00e9gis plante sa Jeep', 'title': 'R\u00e9gis plante sa Jeep',
} }
@@ -46,8 +47,9 @@ class GenericIE(InfoExtractor):
{ {
'add_ie': ['Bandcamp'], 'add_ie': ['Bandcamp'],
'url': 'http://bronyrock.com/track/the-pony-mash', 'url': 'http://bronyrock.com/track/the-pony-mash',
'file': '3235767654.mp3',
'info_dict': { 'info_dict': {
'id': '3235767654',
'ext': 'mp3',
'title': 'The Pony Mash', 'title': 'The Pony Mash',
'uploader': 'M_Pallante', 'uploader': 'M_Pallante',
}, },
@@ -73,9 +75,10 @@ class GenericIE(InfoExtractor):
{ {
# https://github.com/rg3/youtube-dl/issues/2253 # https://github.com/rg3/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3', 'url': 'http://bcove.me/i6nfkrc3',
'file': '3101154703001.mp4',
'md5': '0ba9446db037002366bab3b3eb30c88c', 'md5': '0ba9446db037002366bab3b3eb30c88c',
'info_dict': { 'info_dict': {
'id': '3101154703001',
'ext': 'mp4',
'title': 'Still no power', 'title': 'Still no power',
'uploader': 'thestar.com', 'uploader': 'thestar.com',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
@@ -114,20 +117,6 @@ class GenericIE(InfoExtractor):
'title': '2cc213299525360.mov', # that's what we get 'title': '2cc213299525360.mov', # that's what we get
}, },
}, },
# second style of embedded ooyala videos
{
'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
'info_dict': {
'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
'ext': 'mp4',
'title': 'Behind-the-scenes: Financial Review Sunday ',
'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
# google redirect # google redirect
{ {
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -198,6 +187,17 @@ class GenericIE(InfoExtractor):
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
} }
}, },
# Embeded Ustream video
{
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
'md5': '27b99cdb639c9b12a79bca876a073417',
'info_dict': {
'id': '45734260',
'ext': 'flv',
'uploader': 'AU SPA: The NSA and Privacy',
'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
}
},
# nowvideo embed hidden behind percent encoding # nowvideo embed hidden behind percent encoding
{ {
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@@ -239,6 +239,28 @@ class GenericIE(InfoExtractor):
'uploader_id': 'rbctv_2012_4', 'uploader_id': 'rbctv_2012_4',
}, },
}, },
# Condé Nast embed
{
'url': 'http://www.wired.com/2014/04/honda-asimo/',
'md5': 'ba0dfe966fa007657bd1443ee672db0f',
'info_dict': {
'id': '53501be369702d3275860000',
'ext': 'mp4',
'title': 'Hondas New Asimo Robot Is More Human Than Ever',
}
},
# Dailymotion embed
{
'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
'md5': '441aeeb82eb72c422c7f14ec533999cd',
'info_dict': {
'id': 'k2mm4bCdJ6CQ2i7c8o2',
'ext': 'mp4',
'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
'uploader': 'Spi0n',
},
'add_ie': ['Dailymotion'],
}
] ]
def report_download_webpage(self, video_id): def report_download_webpage(self, video_id):
@@ -323,6 +345,15 @@ class GenericIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
if url.startswith('//'):
return {
'_type': 'url',
'url': (
'http:'
if self._downloader.params.get('prefer_insecure', False)
else 'https:') + url,
}
parsed_url = compat_urlparse.urlparse(url) parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme: if not parsed_url.scheme:
default_search = self._downloader.params.get('default_search') default_search = self._downloader.params.get('default_search')
@@ -459,7 +490,7 @@ class GenericIE(InfoExtractor):
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
if matches: if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion') urlrs = [self.url_result(unescapeHTML(tuppl[1]))
for tuppl in matches] for tuppl in matches]
return self.playlist_result( return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title) urlrs, playlist_id=video_id, playlist_title=video_title)
@@ -485,6 +516,22 @@ class GenericIE(InfoExtractor):
if mobj: if mobj:
return self.url_result(mobj.group(1), 'BlipTV') return self.url_result(mobj.group(1), 'BlipTV')
# Look for embedded condenast player
matches = re.findall(
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
webpage)
if matches:
return {
'_type': 'playlist',
'entries': [{
'_type': 'url',
'ie_key': 'CondeNast',
'url': ma,
} for ma in matches],
'title': video_title,
'id': video_id,
}
# Look for Bandcamp pages with custom domain # Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None: if mobj is not None:
@@ -505,7 +552,7 @@ class GenericIE(InfoExtractor):
return OoyalaIE._build_url_result(mobj.group('ec')) return OoyalaIE._build_url_result(mobj.group('ec'))
# Look for Aparat videos # Look for Aparat videos
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage) mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group(1), 'Aparat') return self.url_result(mobj.group(1), 'Aparat')
@@ -514,17 +561,18 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora') return self.url_result(mobj.group(1), 'Mpora')
# Look for embedded NovaMov player # Look for embedded NovaMov-based player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) r'''(?x)<iframe[^>]+?src=(["\'])
(?P<url>http://(?:(?:embed|www)\.)?
(?:novamov\.com|
nowvideo\.(?:ch|sx|eu|at|ag|co)|
videoweed\.(?:es|com)|
movshare\.(?:net|sx|ag)|
divxstage\.(?:eu|net|ch|co|at|ag))
/embed\.php.+?)\1''', webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'NovaMov') return self.url_result(mobj.group('url'))
# Look for embedded NowVideo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'NowVideo')
# Look for embedded Facebook player # Look for embedded Facebook player
mobj = re.search( mobj = re.search(
@@ -570,6 +618,12 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'TED') return self.url_result(mobj.group('url'), 'TED')
# Look for embedded Ustream videos
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Ustream')
# Look for embedded arte.tv player # Look for embedded arte.tv player
mobj = re.search( mobj = re.search(
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"', r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
@@ -586,7 +640,13 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None: if mobj is None:
# Look for gorilla-vid style embedding # Look for gorilla-vid style embedding
mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage) mobj = re.search(r'''(?sx)
(?:
jw_plugins|
JWPlayerOptions|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
)
.*?file\s*:\s*["\'](.*?)["\']''', webpage)
if mobj is None: if mobj is None:
# Broaden the search a little bit # Broaden the search a little bit
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)

View File

@@ -106,7 +106,7 @@ class OneUPIE(IGNIE):
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
_TEST = { _TESTS = [{
'url': 'http://gamevideos.1up.com/video/id/34976', 'url': 'http://gamevideos.1up.com/video/id/34976',
'md5': '68a54ce4ebc772e4b71e3123d413163d', 'md5': '68a54ce4ebc772e4b71e3123d413163d',
'info_dict': { 'info_dict': {
@@ -115,10 +115,7 @@ class OneUPIE(IGNIE):
'title': 'Sniper Elite V2 - Trailer', 'title': 'Sniper Elite V2 - Trailer',
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf', 'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
} }
} }]
# Override IGN tests
_TESTS = []
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@@ -11,16 +11,15 @@ from ..utils import (
class InfoQIE(InfoExtractor): class InfoQIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$' _VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
_TEST = { _TEST = {
"name": "InfoQ", 'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", 'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
"file": "12-jan-pythonthings.mp4", 'info_dict': {
"info_dict": { 'id': '12-jan-pythonthings',
"description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", 'ext': 'mp4',
"title": "A Few of My Favorite [Python] Things", 'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
}, 'title': 'A Few of My Favorite [Python] Things',
"params": {
"skip_download": True,
}, },
} }
@@ -30,26 +29,39 @@ class InfoQIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
video_description = self._html_search_meta('description', webpage, 'description')
# The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/'
# Extract video URL # Extract video URL
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id') encoded_id = self._search_regex(
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id playpath = 'mp4:' + real_id
# Extract title video_filename = playpath.split('/')[-1]
video_title = self._search_regex(r'contentTitle = "(.*?)";',
webpage, 'title')
# Extract description
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
webpage, 'description', fatal=False)
video_filename = video_url.split('/')[-1]
video_id, extension = video_filename.split('.') video_id, extension = video_filename.split('.')
http_base = self._search_regex(
r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
'HTTP base URL')
formats = [{
'format_id': 'rtmp',
'url': video_url,
'ext': extension,
'play_path': playpath,
}, {
'format_id': 'http',
'url': http_base + real_id,
}]
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'url': video_url,
'title': video_title, 'title': video_title,
'ext': extension, # Extension is always(?) mp4, but seems to be flv
'description': video_description, 'description': video_description,
'formats': formats,
} }

View File

@@ -14,7 +14,7 @@ class JukeboxIE(InfoExtractor):
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html' _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html', 'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
'md5': '5dc6477e74b1e37042ac5acedd8413e5', 'md5': '1574e9b4d6438446d5b7dbcdf2786276',
'info_dict': { 'info_dict': {
'id': 'r303r', 'id': 'r303r',
'ext': 'flv', 'ext': 'flv',

View File

@@ -1,9 +1,12 @@
from __future__ import unicode_literals
import json import json
import os import os
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
ExtractorError, ExtractorError,
formatSeconds, formatSeconds,
) )
@@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor):
/?(?:\#.*)?$ /?(?:\#.*)?$
""" """
_JUSTIN_PAGE_LIMIT = 100 _JUSTIN_PAGE_LIMIT = 100
IE_NAME = u'justin.tv' IE_NAME = 'justin.tv'
IE_DESC = 'justin.tv and twitch.tv'
_TEST = { _TEST = {
u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360', 'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
u'file': u'296128360.flv', 'md5': 'ecaa8a790c22a40770901460af191c9a',
u'md5': u'ecaa8a790c22a40770901460af191c9a', 'info_dict': {
u'info_dict': { 'id': '296128360',
u"upload_date": u"20110927", 'ext': 'flv',
u"uploader_id": 25114803, 'upload_date': '20110927',
u"uploader": u"thegamedevhub", 'uploader_id': 25114803,
u"title": u"Beginner Series - Scripting With Python Pt.1" 'uploader': 'thegamedevhub',
'title': 'Beginner Series - Scripting With Python Pt.1'
} }
} }
def report_download_page(self, channel, offset):
"""Report attempt to download a single page of videos."""
self.to_screen(u'%s: Downloading video information from %d to %d' %
(channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
# Return count of items, list of *valid* items # Return count of items, list of *valid* items
def _parse_page(self, url, video_id): def _parse_page(self, url, video_id):
info_json = self._download_webpage(url, video_id, info_json = self._download_webpage(url, video_id,
u'Downloading video info JSON', 'Downloading video info JSON',
u'unable to download video info JSON') 'unable to download video info JSON')
response = json.loads(info_json) response = json.loads(info_json)
if type(response) != list: if type(response) != list:
error_text = response.get('error', 'unknown error') error_text = response.get('error', 'unknown error')
raise ExtractorError(u'Justin.tv API: %s' % error_text) raise ExtractorError('Justin.tv API: %s' % error_text)
info = [] info = []
for clip in response: for clip in response:
video_url = clip['video_file_url'] video_url = clip['video_file_url']
@@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor):
video_id = clip['id'] video_id = clip['id']
video_title = clip.get('title', video_id) video_title = clip.get('title', video_id)
info.append({ info.append({
'id': video_id, 'id': compat_str(video_id),
'url': video_url, 'url': video_url,
'title': video_title, 'title': video_title,
'uploader': clip.get('channel_name', video_uploader_id), 'uploader': clip.get('channel_name', video_uploader_id),
@@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'invalid URL: %s' % url)
api_base = 'http://api.justin.tv' api_base = 'http://api.justin.tv'
paged = False paged = False
@@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor):
webpage = self._download_webpage(url, chapter_id) webpage = self._download_webpage(url, chapter_id)
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
if not m: if not m:
raise ExtractorError(u'Cannot find archive of a chapter') raise ExtractorError('Cannot find archive of a chapter')
archive_id = m.group(1) archive_id = m.group(1)
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
doc = self._download_xml(api, chapter_id, doc = self._download_xml(
note=u'Downloading chapter information', api, chapter_id,
errnote=u'Chapter information download failed') note='Downloading chapter information',
errnote='Chapter information download failed')
for a in doc.findall('.//archive'): for a in doc.findall('.//archive'):
if archive_id == a.find('./id').text: if archive_id == a.find('./id').text:
break break
else: else:
raise ExtractorError(u'Could not find chapter in chapter information') raise ExtractorError('Could not find chapter in chapter information')
video_url = a.find('./video_file_url').text video_url = a.find('./video_file_url').text
video_ext = video_url.rpartition('.')[2] or u'flv' video_ext = video_url.rpartition('.')[2] or 'flv'
chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id, chapter_info = self._download_json(
note='Downloading chapter metadata', chapter_api_url, 'c' + chapter_id,
errnote='Download of chapter metadata failed') note='Downloading chapter metadata',
chapter_info = json.loads(chapter_info_json) errnote='Download of chapter metadata failed')
bracket_start = int(doc.find('.//bracket_start').text) bracket_start = int(doc.find('.//bracket_start').text)
bracket_end = int(doc.find('.//bracket_end').text) bracket_end = int(doc.find('.//bracket_end').text)
# TODO determine start (and probably fix up file) # TODO determine start (and probably fix up file)
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
#video_url += u'?start=' + TODO:start_timestamp #video_url += '?start=' + TODO:start_timestamp
# bracket_start is 13290, but we want 51670615 # bracket_start is 13290, but we want 51670615
self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. ' self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) 'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
info = { info = {
'id': u'c' + chapter_id, 'id': 'c' + chapter_id,
'url': video_url, 'url': video_url,
'ext': video_ext, 'ext': video_ext,
'title': chapter_info['title'], 'title': chapter_info['title'],
@@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor):
'uploader': chapter_info['channel']['display_name'], 'uploader': chapter_info['channel']['display_name'],
'uploader_id': chapter_info['channel']['name'], 'uploader_id': chapter_info['channel']['name'],
} }
return [info] return info
else: else:
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
api = api_base + '/broadcast/by_archive/%s.json' % video_id api = api_base + '/broadcast/by_archive/%s.json' % video_id
self.report_extraction(video_id) entries = []
info = []
offset = 0 offset = 0
limit = self._JUSTIN_PAGE_LIMIT limit = self._JUSTIN_PAGE_LIMIT
while True: while True:
@@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor):
self.report_download_page(video_id, offset) self.report_download_page(video_id, offset)
page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
page_count, page_info = self._parse_page(page_url, video_id) page_count, page_info = self._parse_page(page_url, video_id)
info.extend(page_info) entries.extend(page_info)
if not paged or page_count != limit: if not paged or page_count != limit:
break break
offset += limit offset += limit
return info return {
'_type': 'playlist',
'id': video_id,
'entries': entries,
}

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import os import os
import re import re
@@ -11,22 +13,22 @@ from ..aes import (
aes_decrypt_text aes_decrypt_text
) )
class KeezMoviesIE(InfoExtractor): class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
_TEST = { _TEST = {
u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
u'file': u'1214711.mp4', 'file': '1214711.mp4',
u'md5': u'6e297b7e789329923fcf83abb67c9289', 'md5': '6e297b7e789329923fcf83abb67c9289',
u'info_dict': { 'info_dict': {
u"title": u"Petite Asian Lady Mai Playing In Bathtub", 'title': 'Petite Asian Lady Mai Playing In Bathtub',
u"age_limit": 18, 'age_limit': 18,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url) req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
@@ -38,10 +40,10 @@ class KeezMoviesIE(InfoExtractor):
embedded_url = mobj.group(1) embedded_url = mobj.group(1)
return self.url_result(embedded_url) return self.url_result(embedded_url)
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title') video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url')) video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url'))
if webpage.find('encrypted=true')!=-1: if 'encrypted=true' in webpage:
password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password') password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:] extension = os.path.splitext(path)[1][1:]

View File

@@ -1,15 +1,18 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class MDRIE(InfoExtractor): class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*' _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
# No tests, MDR regularily deletes its videos # No tests, MDR regularily deletes its videos
_TEST = {
'url': 'http://www.mdr.de/fakt/video189002.html',
'only_matching': True,
}
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)
@@ -19,9 +22,9 @@ class MDRIE(InfoExtractor):
# determine title and media streams from webpage # determine title and media streams from webpage
html = self._download_webpage(url, video_id) html = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title') title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
xmlurl = self._search_regex( xmlurl = self._search_regex(
r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL') r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
doc = self._download_xml(domain + xmlurl, video_id) doc = self._download_xml(domain + xmlurl, video_id)
formats = [] formats = []
@@ -41,7 +44,7 @@ class MDRIE(InfoExtractor):
if vbr_el is None: if vbr_el is None:
format.update({ format.update({
'vcodec': 'none', 'vcodec': 'none',
'format_id': u'%s-%d' % (media_type, abr), 'format_id': '%s-%d' % (media_type, abr),
}) })
else: else:
vbr = int(vbr_el.text) // 1000 vbr = int(vbr_el.text) // 1000
@@ -49,12 +52,9 @@ class MDRIE(InfoExtractor):
'vbr': vbr, 'vbr': vbr,
'width': int(a.find('frameWidth').text), 'width': int(a.find('frameWidth').text),
'height': int(a.find('frameHeight').text), 'height': int(a.find('frameHeight').text),
'format_id': u'%s-%d' % (media_type, vbr), 'format_id': '%s-%d' % (media_type, vbr),
}) })
formats.append(format) formats.append(format)
if not formats:
raise ExtractorError(u'Could not find any valid formats')
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@@ -0,0 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class MorningstarIE(InfoExtractor):
IE_DESC = 'morningstar.com'
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
'info_dict': {
'id': '615869',
'ext': 'mp4',
'title': 'Get Ahead of the Curve on 2013 Taxes',
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
video_url = self._html_search_regex(
r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
webpage, 'video URL')
thumbnail = self._html_search_regex(
r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
webpage, 'thumbnail', fatal=False)
description = self._html_search_regex(
r'<div id="mstarDeck".*?>(.*?)</div>',
webpage, 'description', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'description': description,
}

View File

@@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
import hashlib
import json
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
compat_str,
int_or_none,
)
class MotorsportIE(InfoExtractor):
IE_DESC = 'motorsport.com'
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
_TEST = {
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
'info_dict': {
'id': '7063',
'ext': 'mp4',
'title': 'Red Bull Racing: 2014 Rules Explained',
'duration': 207,
'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations which are arguably the most complex the sport has ever seen.',
'uploader': 'rainiere',
'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
flashvars_code = self._html_search_regex(
r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
flashvars = compat_parse_qs(flashvars_code)
params = json.loads(flashvars['parameters'][0])
e = compat_str(int(time.time()) + 24 * 60 * 60)
base_video_url = params['location'] + '?e=' + e
s = 'h3hg713fh32'
h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
video_url = base_video_url + '&h=' + h
uploader = self._html_search_regex(
r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
'uploader', fatal=False)
return {
'id': params['video_id'],
'display_id': display_id,
'title': params['title'],
'url': video_url,
'description': params.get('description'),
'thumbnail': params.get('main_thumb'),
'duration': int_or_none(params.get('duration')),
'uploader': uploader,
}

View File

@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class MoviezineIE(InfoExtractor):
_VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)'
_TEST = {
'url': 'http://www.moviezine.se/video/205866',
'info_dict': {
'id': '205866',
'ext': 'mp4',
'title': 'Oculus - Trailer 1',
'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
'thumbnail': 're:http://.*\.jpg',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
formats =[{
'format_id': 'sd',
'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
'quality': 0,
'ext': 'mp4',
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'),
'formats': formats,
'description': self._og_search_description(webpage),
}

View File

@@ -0,0 +1,27 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class MovShareIE(NovaMovIE):
IE_NAME = 'movshare'
IE_DESC = 'MovShare'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
_HOST = 'www.movshare.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.movshare.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}

View File

@@ -4,9 +4,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import int_or_none
int_or_none,
)
class MporaIE(InfoExtractor): class MporaIE(InfoExtractor):
@@ -20,7 +18,7 @@ class MporaIE(InfoExtractor):
'info_dict': { 'info_dict': {
'title': 'Katy Curd - Winter in the Forest', 'title': 'Katy Curd - Winter in the Forest',
'duration': 416, 'duration': 416,
'uploader': 'petenewman', 'uploader': 'Peter Newman Media',
}, },
} }

View File

@@ -1,15 +1,22 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import str_to_int
class NineGagIE(InfoExtractor): class NineGagIE(InfoExtractor):
IE_NAME = '9gag' IE_NAME = '9gag'
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)' _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
(?:
v/(?P<numid>[0-9]+)|
p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
)
'''
_TEST = { _TESTS = [{
"url": "http://9gag.tv/v/1912", "url": "http://9gag.tv/v/1912",
"info_dict": { "info_dict": {
"id": "1912", "id": "1912",
@@ -20,34 +27,42 @@ class NineGagIE(InfoExtractor):
"thumbnail": "re:^https?://", "thumbnail": "re:^https?://",
}, },
'add_ie': ['Youtube'] 'add_ie': ['Youtube']
} },
{
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
'info_dict': {
'id': 'KklwM',
'ext': 'mp4',
'display_id': 'alternate-banned-opening-scene-of-gravity',
"description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
},
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('numid') or mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, display_id)
youtube_id = self._html_search_regex( post_view = json.loads(self._html_search_regex(
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"', r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
webpage, 'video ID')
description = self._html_search_regex( youtube_id = post_view['videoExternalId']
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage, title = post_view['title']
'description', fatal=False) description = post_view['description']
view_count_str = self._html_search_regex( view_count = str_to_int(post_view['externalView'])
r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count', thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
fatal=False)
view_count = (
None if view_count_str is None
else int(view_count_str.replace(',', '')))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': youtube_id, 'url': youtube_id,
'ie_key': 'Youtube', 'ie_key': 'Youtube',
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'display_id': display_id,
'title': title,
'description': description, 'description': description,
'view_count': view_count, 'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': thumbnail,
} }

View File

@@ -0,0 +1,105 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
compat_str,
)
class NocoIE(InfoExtractor):
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
_TEST = {
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
'md5': '0a993f0058ddbcd902630b2047ef710e',
'info_dict': {
'id': '11538',
'ext': 'mp4',
'title': 'Ami Ami Idol - Hello! France',
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
'upload_date': '20140412',
'uploader': 'Nolife',
'uploader_id': 'NOL',
'duration': 2851.2,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
medias = self._download_json(
'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
formats = []
for fmt in medias['fr']['video_list']['default']['quality_list']:
format_id = fmt['quality_key']
file = self._download_json(
'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
video_id, 'Downloading %s video JSON' % format_id)
file_url = file['file']
if not file_url:
continue
if file_url == 'forbidden':
raise ExtractorError(
'%s returned error: %s - %s' % (
self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
expected=True)
formats.append({
'url': file_url,
'format_id': format_id,
'width': fmt['res_width'],
'height': fmt['res_lines'],
'abr': fmt['audiobitrate'],
'vbr': fmt['videobitrate'],
'filesize': fmt['filesize'],
'format_note': fmt['quality_name'],
'preference': fmt['priority'],
})
self._sort_formats(formats)
show = self._download_json(
'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
upload_date = unified_strdate(show['indexed'])
uploader = show['partner_name']
uploader_id = show['partner_key']
duration = show['duration_ms'] / 1000.0
thumbnail = show['screenshot']
episode = show.get('show_TT') or show.get('show_OT')
family = show.get('family_TT') or show.get('family_OT')
episode_number = show.get('episode_number')
title = ''
if family:
title += family
if episode_number:
title += ' #' + compat_str(episode_number)
if episode:
title += ' - ' + episode
description = show.get('show_resume') or show.get('family_resume')
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'uploader': uploader,
'uploader_id': uploader_id,
'duration': duration,
'formats': formats,
}

View File

@@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor):
IE_NAME = 'novamov' IE_NAME = 'novamov'
IE_DESC = 'NovaMov' IE_DESC = 'NovaMov'
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'} _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
_VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
_HOST = 'www.novamov.com' _HOST = 'www.novamov.com'
@@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('id')
page = self._download_webpage( page = self._download_webpage(
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page') 'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
if re.search(self._FILE_DELETED_REGEX, page) is not None: if re.search(self._FILE_DELETED_REGEX, page) is not None:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey') filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False) title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False) description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
api_response = self._download_webpage( api_response = self._download_webpage(

View File

@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo' IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo' IE_DESC = 'NowVideo'
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'} _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}
_HOST = 'www.nowvideo.ch' _HOST = 'www.nowvideo.ch'

View File

@@ -0,0 +1,67 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class NRKIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
_TESTS = [
{
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
'md5': '12618eef328c9a35c1b47d5583d9c30d',
'info_dict': {
'id': '150533',
'ext': 'flv',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
}
},
{
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
'md5': '390b2ce15c0d6aa376ef5059ac9f865e',
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id)
video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
data = self._download_json(
'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
if data['usageRights']['isGeoBlocked']:
raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
images = data.get('images')
if images:
thumbnails = images['webImages']
thumbnails.sort(key=lambda image: image['pixelWidth'])
thumbnail = thumbnails[-1]['imageUrl']
else:
thumbnail = None
return {
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': data['title'],
'description': data['description'],
'thumbnail': thumbnail,
}

View File

@@ -59,11 +59,11 @@ class NTVIE(InfoExtractor):
{ {
'url': 'http://www.ntv.ru/kino/Koma_film', 'url': 'http://www.ntv.ru/kino/Koma_film',
'info_dict': { 'info_dict': {
'id': '750783', 'id': '758100',
'ext': 'flv', 'ext': 'flv',
'title': 'Остросюжетный фильм «Кома» — 4 апреля вечером на НТВ', 'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома» — 4 апреля вечером на НТВ', 'description': 'Остросюжетный фильм «Кома»',
'duration': 28, 'duration': 5592,
}, },
'params': { 'params': {
# rtmp download # rtmp download

View File

@@ -6,22 +6,36 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
class PodomaticIE(InfoExtractor): class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic' IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
_TEST = { _TESTS = [
"url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", {
"file": "2009-01-02T16_03_35-08_00.mp3", 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
"md5": "84bb855fcf3429e6bf72460e1eed782d", 'md5': '84bb855fcf3429e6bf72460e1eed782d',
"info_dict": { 'info_dict': {
"uploader": "Science Teaching Tips", 'id': '2009-01-02T16_03_35-08_00',
"uploader_id": "scienceteachingtips", 'ext': 'mp3',
"title": "64. When the Moon Hits Your Eye", 'uploader': 'Science Teaching Tips',
"duration": 446, 'uploader_id': 'scienceteachingtips',
} 'title': '64. When the Moon Hits Your Eye',
} 'duration': 446,
}
},
{
'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
'md5': 'd2cf443931b6148e27638650e2638297',
'info_dict': {
'id': '2013-11-15T16_31_21-08_00',
'ext': 'mp3',
'uploader': 'Ostbahnhof / Techno Mix',
'uploader_id': 'ostbahnhof',
'title': 'Einunddreizig',
'duration': 3799,
}
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -32,10 +46,12 @@ class PodomaticIE(InfoExtractor):
'?permalink=true&rtmp=0') % '?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id)) (mobj.group('proto'), channel, video_id))
data_json = self._download_webpage( data_json = self._download_webpage(
json_url, video_id, note=u'Downloading video info') json_url, video_id, 'Downloading video info')
data = json.loads(data_json) data = json.loads(data_json)
video_url = data['downloadLink'] video_url = data['downloadLink']
if not video_url:
video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation'])
uploader = data['podcast'] uploader = data['podcast']
title = data['title'] title = data['title']
thumbnail = data['imageLocation'] thumbnail = data['imageLocation']

View File

@@ -1,44 +1,81 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import compat_urllib_parse from ..utils import int_or_none
class PornHdIE(InfoExtractor): class PornHdIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)' _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
'file': '1962.flv', 'md5': '956b8ca569f7f4d8ec563e2c41598441',
'md5': '35272469887dca97abd30abecc6cdf75',
'info_dict': { 'info_dict': {
"title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video", 'id': '1962',
"age_limit": 18, 'ext': 'mp4',
'title': 'Sierra loves doing laundry',
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
'age_limit': 18,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = mobj.group('video_id')
video_title = mobj.group('video_title')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
next_url = self._html_search_regex( title = self._og_search_title(webpage)
r'&hd=(http.+?)&', webpage, 'video URL') TITLE_SUFFIX = ' porn HD Video | PornHD.com '
next_url = compat_urllib_parse.unquote(next_url) if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
video_url = self._download_webpage( description = self._html_search_regex(
next_url, video_id, note='Retrieving video URL', r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
errnote='Could not retrieve video URL') view_count = int_or_none(self._html_search_regex(
age_limit = 18 r'(\d+) views </span>', webpage, 'view count', fatal=False))
formats = [
{
'url': format_url,
'ext': format.lower(),
'format_id': '%s-%s' % (format.lower(), quality.lower()),
'quality': 1 if quality.lower() == 'high' else 0,
} for format, quality, format_url in re.findall(
r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
]
mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
if mobj:
flashvars = json.loads(mobj.group('flashvars'))
formats.extend([
{
'url': flashvars['hashlink'].replace('?noProxy=1', ''),
'ext': 'flv',
'format_id': 'flv-low',
'quality': 0,
},
{
'url': flashvars['hd'].replace('?noProxy=1', ''),
'ext': 'flv',
'format_id': 'flv-high',
'quality': 1,
}
])
thumbnail = flashvars['urlWallpaper']
else:
thumbnail = self._og_search_thumbnail(webpage)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'title': title,
'ext': 'flv', 'description': description,
'title': video_title, 'thumbnail': thumbnail,
'age_limit': age_limit, 'view_count': view_count,
'formats': formats,
'age_limit': 18,
} }

View File

@@ -160,6 +160,7 @@ class ProSiebenSat1IE(InfoExtractor):
_CLIPID_REGEXES = [ _CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"', r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"', r'clipid: "(\d+)"',
r'clipId=(\d+)',
] ]
_TITLE_REGEXES = [ _TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',

View File

@@ -46,7 +46,8 @@ class PyvideoIE(InfoExtractor):
return self.url_result(m_youtube.group(1), 'Youtube') return self.url_result(m_youtube.group(1), 'Youtube')
title = self._html_search_regex( title = self._html_search_regex(
r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL) r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
webpage, 'title', flags=re.DOTALL)
video_url = self._search_regex( video_url = self._search_regex(
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'], [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
webpage, 'video url', flags=re.DOTALL) webpage, 'video url', flags=re.DOTALL)

View File

@@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor):
'md5': '03af18b73a07b4088753930db7a34add', 'md5': '03af18b73a07b4088753930db7a34add',
'info_dict': { 'info_dict': {
"title": "Luati-le Banii sez 4 ep 1", "title": "Luati-le Banii sez 4 ep 1",
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.", "description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$",
} }
} }

View File

@@ -0,0 +1,49 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class RTBFIE(InfoExtractor):
_VALID_URL = r'https?://www.rtbf.be/video/[^\?]+\?id=(?P<id>\d+)'
_TEST = {
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
'md5': '799f334ddf2c0a582ba80c44655be570',
'info_dict': {
'id': '1921274',
'ext': 'mp4',
'title': 'Les Diables au coeur (épisode 2)',
'description': 'Football - Diables Rouges',
'duration': 3099,
'timestamp': 1398456336,
'upload_date': '20140425',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
data = json.loads(self._html_search_regex(
r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data']
video_url = data.get('downloadUrl') or data.get('url')
if data['provider'].lower() == 'youtube':
return self.url_result(video_url, 'Youtube')
return {
'id': video_id,
'url': video_url,
'title': data['title'],
'description': data.get('description') or data.get('subtitle'),
'thumbnail': data['thumbnail']['large'],
'duration': data.get('duration') or data.get('realDuration'),
'timestamp': data['created'],
'view_count': data['viewCount'],
}

View File

@@ -9,46 +9,136 @@ from ..utils import (
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
unescapeHTML, unescapeHTML,
compat_str,
) )
class RTSIE(InfoExtractor): class RTSIE(InfoExtractor):
IE_DESC = 'RTS.ch' IE_DESC = 'RTS.ch'
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html' _VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
_TEST = { _TESTS = [
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', {
'md5': '753b877968ad8afaeddccc374d4256a5', 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
'info_dict': { 'md5': '753b877968ad8afaeddccc374d4256a5',
'id': '3449373', 'info_dict': {
'ext': 'mp4', 'id': '3449373',
'duration': 1488, 'ext': 'mp4',
'title': 'Les Enfants Terribles', 'duration': 1488,
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', 'title': 'Les Enfants Terribles',
'uploader': 'Divers', 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
'upload_date': '19680921', 'uploader': 'Divers',
'timestamp': -40280400, 'upload_date': '19680921',
'thumbnail': 're:^https?://.*\.image' 'timestamp': -40280400,
'thumbnail': 're:^https?://.*\.image'
},
}, },
} {
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
'info_dict': {
'id': '5624067',
'ext': 'mp4',
'duration': 3720,
'title': 'Les yeux dans les cieux - Mon homard au Canada',
'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
'uploader': 'Passe-moi les jumelles',
'upload_date': '20140404',
'timestamp': 1396635300,
'thumbnail': 're:^https?://.*\.image'
},
},
{
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
'info_dict': {
'id': '5745975',
'ext': 'mp4',
'duration': 48,
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
'description': 'Hockey - Playoff',
'uploader': 'Hockey',
'upload_date': '20140403',
'timestamp': 1396556882,
'thumbnail': 're:^https?://.*\.image'
},
'skip': 'Blocked outside Switzerland',
},
{
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
'md5': '9bb06503773c07ce83d3cbd793cebb91',
'info_dict': {
'id': '5745356',
'ext': 'mp4',
'duration': 33,
'title': 'Londres cachée par un épais smog',
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
'uploader': 'Le Journal en continu',
'upload_date': '20140403',
'timestamp': 1396537322,
'thumbnail': 're:^https?://.*\.image'
},
},
{
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
'info_dict': {
'id': '5706148',
'ext': 'mp3',
'duration': 123,
'title': '"Urban Hippie", de Damien Krisl',
'description': 'Des Hippies super glam.',
'upload_date': '20140403',
'timestamp': 1396551600,
},
},
]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)
video_id = m.group('id') video_id = m.group('id')
all_info = self._download_json( def download_json(internal_id):
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id) return self._download_json(
info = all_info['video']['JSONinfo'] 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
video_id)
all_info = download_json(video_id)
# video_id extracted out of URL is not always a real id
if 'video' not in all_info and 'audio' not in all_info:
page = self._download_webpage(url, video_id)
internal_id = self._html_search_regex(
r'<(?:video|audio) data-id="([0-9]+)"', page,
'internal video id')
all_info = download_json(internal_id)
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
upload_timestamp = parse_iso8601(info.get('broadcast_date')) upload_timestamp = parse_iso8601(info.get('broadcast_date'))
duration = parse_duration(info.get('duration')) duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
if isinstance(duration, compat_str):
duration = parse_duration(duration)
view_count = info.get('plays')
thumbnail = unescapeHTML(info.get('preview_image_url')) thumbnail = unescapeHTML(info.get('preview_image_url'))
def extract_bitrate(url):
return int_or_none(self._search_regex(
r'-([0-9]+)k\.', url, 'bitrate', default=None))
formats = [{ formats = [{
'format_id': fid, 'format_id': fid,
'url': furl, 'url': furl,
'tbr': int_or_none(self._search_regex( 'tbr': extract_bitrate(furl),
r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
} for fid, furl in info['streams'].items()] } for fid, furl in info['streams'].items()]
if 'media' in info:
formats.extend([{
'format_id': '%s-%sk' % (media['ext'], media['rate']),
'url': 'http://download-video.rts.ch/%s' % media['url'],
'tbr': media['rate'] or extract_bitrate(media['url']),
} for media in info['media'] if media.get('rate')])
self._sort_formats(formats) self._sort_formats(formats)
return { return {
@@ -57,6 +147,7 @@ class RTSIE(InfoExtractor):
'title': info['title'], 'title': info['title'],
'description': info.get('intro'), 'description': info.get('intro'),
'duration': duration, 'duration': duration,
'view_count': view_count,
'uploader': info.get('programName'), 'uploader': info.get('programName'),
'timestamp': upload_timestamp, 'timestamp': upload_timestamp,
'thumbnail': thumbnail, 'thumbnail': thumbnail,

View File

@@ -0,0 +1,84 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import base64
from .common import InfoExtractor
from ..utils import (
struct_unpack,
)
class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta'
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
'info_dict': {
'id': '2491869',
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
},
}
def _decrypt_url(self, png):
encrypted_data = base64.b64decode(png)
text_index = encrypted_data.find(b'tEXt')
text_chunk = encrypted_data[text_index-4:]
length = struct_unpack('!I', text_chunk[:4])[0]
# Use bytearray to get integers when iterating in both python 2.x and 3.x
data = bytearray(text_chunk[8:8+length])
data = [chr(b) for b in data if b != 0]
hash_index = data.index('#')
alphabet_data = data[:hash_index]
url_data = data[hash_index+1:]
alphabet = []
e = 0
d = 0
for l in alphabet_data:
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data:
if f == 0:
l = int(letter)*10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info = self._download_json(
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
video_id)['page']['items'][0]
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = self._decrypt_url(png)
return {
'id': video_id,
'title': info['title'],
'url': video_url,
'thumbnail': info['image'],
}

View File

@@ -43,13 +43,14 @@ class RutubeIE(InfoExtractor):
'http://rutube.ru/api/video/%s/?format=json' % video_id, 'http://rutube.ru/api/video/%s/?format=json' % video_id,
video_id, 'Downloading video JSON') video_id, 'Downloading video JSON')
trackinfo = self._download_json(
'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
video_id, 'Downloading trackinfo JSON')
# Some videos don't have the author field # Some videos don't have the author field
author = trackinfo.get('author') or {} author = video.get('author') or {}
m3u8_url = trackinfo['video_balancer'].get('m3u8')
options = self._download_json(
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
video_id, 'Downloading options JSON')
m3u8_url = options['video_balancer'].get('m3u8')
if m3u8_url is None: if m3u8_url is None:
raise ExtractorError('Couldn\'t find m3u8 manifest url') raise ExtractorError('Couldn\'t find m3u8 manifest url')

View File

@@ -0,0 +1,60 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class SciVeeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)'
_TEST = {
'url': 'http://www.scivee.tv/node/62352',
#'md5': 'b16699b74c9e6a120f6772a44960304f',
'info_dict': {
'id': '62352',
'ext': 'mp4',
'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
'description': 'md5:81f1710638e11a481358fab1b11059d7',
},
'params': {
# Range HTTP header is ignored
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
# annotations XML is malformed
annotations = self._download_webpage(
'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations')
title = self._html_search_regex(r'<title>([^<]+)</title>', annotations, 'title')
description = self._html_search_regex(r'<abstract>([^<]+)</abstract>', annotations, 'abstract', fatal=False)
filesize = int_or_none(self._html_search_regex(
r'<filesize>([^<]+)</filesize>', annotations, 'filesize', fatal=False))
formats = [
{
'url': 'http://www.scivee.tv/assets/audio/%s' % video_id,
'ext': 'mp3',
'format_id': 'audio',
},
{
'url': 'http://www.scivee.tv/assets/video/%s' % video_id,
'ext': 'mp4',
'format_id': 'video',
'filesize': filesize,
},
]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
'formats': formats,
}

View File

@@ -39,7 +39,8 @@ class SlideshareIE(InfoExtractor):
ext = info['jsplayer']['video_extension'] ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex( description = self._html_search_regex(
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description') r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
'description', fatal=False)
return { return {
'_type': 'video', '_type': 'video',

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@@ -8,78 +10,114 @@ from ..utils import (
class SteamIE(InfoExtractor): class SteamIE(InfoExtractor):
_VALID_URL = r"""http://store\.steampowered\.com/ _VALID_URL = r"""(?x)
(agecheck/)? https?://store\.steampowered\.com/
(?P<urltype>video|app)/ #If the page is only for videos or for a game (agecheck/)?
(?P<gameID>\d+)/? (?P<urltype>video|app)/ #If the page is only for videos or for a game
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID (?P<gameID>\d+)/?
""" (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
|
https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
"""
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
_TEST = { _TESTS = [{
u"url": u"http://store.steampowered.com/video/105600/", "url": "http://store.steampowered.com/video/105600/",
u"playlist": [ "playlist": [
{ {
u"file": u"81300.flv", "md5": "f870007cee7065d7c76b88f0a45ecc07",
u"md5": u"f870007cee7065d7c76b88f0a45ecc07", "info_dict": {
u"info_dict": { 'id': '81300',
u"title": u"Terraria 1.1 Trailer", 'ext': 'flv',
u'playlist_index': 1, "title": "Terraria 1.1 Trailer",
'playlist_index': 1,
} }
}, },
{ {
u"file": u"80859.flv", "md5": "61aaf31a5c5c3041afb58fb83cbb5751",
u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751", "info_dict": {
u"info_dict": { 'id': '80859',
u"title": u"Terraria Trailer", 'ext': 'flv',
u'playlist_index': 2, "title": "Terraria Trailer",
'playlist_index': 2,
} }
} }
] ],
} 'params': {
'playlistend': 2,
}
@classmethod }, {
def suitable(cls, url): 'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
"""Receives a URL and returns True if suitable for this IE.""" 'info_dict': {
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None 'id': 'WB5DvDOOvAY',
'ext': 'mp4',
'upload_date': '20140329',
'title': 'FRONTIERS - Final Greenlight Trailer',
'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
'uploader': 'AAD Productions',
'uploader_id': 'AtomicAgeDogGames',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE) m = re.match(self._VALID_URL, url)
gameID = m.group('gameID') fileID = m.group('fileID')
if fileID:
videourl = self._VIDEO_PAGE_TEMPLATE % gameID videourl = url
webpage = self._download_webpage(videourl, gameID) playlist_id = fileID
else:
gameID = m.group('gameID')
playlist_id = gameID
videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
webpage = self._download_webpage(videourl, playlist_id)
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
videourl = self._AGECHECK_TEMPLATE % gameID videourl = self._AGECHECK_TEMPLATE % playlist_id
self.report_age_confirmation() self.report_age_confirmation()
webpage = self._download_webpage(videourl, gameID) webpage = self._download_webpage(videourl, playlist_id)
self.report_extraction(gameID) if fileID:
game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>', playlist_title = self._html_search_regex(
webpage, 'game title') r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
mweb = re.finditer(r'''(?x)
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
''', webpage)
videos = [{
'_type': 'url',
'url': vid.group('youtube_id'),
'ie_key': 'Youtube',
} for vid in mweb]
else:
playlist_title = self._html_search_regex(
r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," mweb = re.finditer(r'''(?x)
mweb = re.finditer(urlRE, webpage) 'movie_(?P<videoID>[0-9]+)':\s*\{\s*
namesRE = r'<span class="title">(?P<videoName>.+?)</span>' FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
titles = re.finditer(namesRE, webpage) (,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">' ''', webpage)
thumbs = re.finditer(thumbsRE, webpage) titles = re.finditer(
videos = [] r'<span class="title">(?P<videoName>.+?)</span>', webpage)
for vid,vtitle,thumb in zip(mweb,titles,thumbs): thumbs = re.finditer(
video_id = vid.group('videoID') r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
title = vtitle.group('videoName') videos = []
video_url = vid.group('videoURL')
video_thumb = thumb.group('thumbnail') for vid, vtitle, thumb in zip(mweb, titles, thumbs):
if not video_url: video_id = vid.group('videoID')
raise ExtractorError(u'Cannot find video url for %s' % video_id) title = vtitle.group('videoName')
info = { video_url = vid.group('videoURL')
'id':video_id, video_thumb = thumb.group('thumbnail')
'url':video_url, if not video_url:
'ext': 'flv', raise ExtractorError('Cannot find video url for %s' % video_id)
'title': unescapeHTML(title), videos.append({
'thumbnail': video_thumb 'id': video_id,
} 'url': video_url,
videos.append(info) 'ext': 'flv',
return [self.playlist_result(videos, gameID, game_title)] 'title': unescapeHTML(title),
'thumbnail': video_thumb
})
if not videos:
raise ExtractorError('Could not find any videos')
return self.playlist_result(videos, playlist_id, playlist_title)

View File

@@ -6,9 +6,9 @@ from .common import InfoExtractor
class SyfyIE(InfoExtractor): class SyfyIE(InfoExtractor):
_VALID_URL = r'https?://www\.syfy\.com/videos/.+?vid:(?P<id>\d+)' _VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))'
_TEST = { _TESTS = [{
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', 'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
'md5': 'e07de1d52c7278adbb9b9b1c93a66849', 'md5': 'e07de1d52c7278adbb9b9b1c93a66849',
'info_dict': { 'info_dict': {
@@ -18,10 +18,30 @@ class SyfyIE(InfoExtractor):
'description': 'Listen to what insights George Lucas give his daughter Amanda.', 'description': 'Listen to what insights George Lucas give his daughter Amanda.',
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
} }, {
'url': 'http://www.syfy.com/wilwheaton',
'md5': '94dfa54ee3ccb63295b276da08c415f6',
'info_dict': {
'id': '4yoffOOXC767',
'ext': 'flv',
'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.',
'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.',
},
'add_ie': ['ThePlatform'],
'skip': 'Blocked outside the US',
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_name = mobj.group('video_name')
if video_name:
generic_webpage = self._download_webpage(url, video_name)
video_id = self._search_regex(
r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">',
generic_webpage, 'video ID')
url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % (
video_name, video_name, video_id)
else:
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
return self.url_result(self._og_search_video_url(webpage)) return self.url_result(self._og_search_video_url(webpage))

View File

@@ -3,14 +3,21 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class TeamcocoIE(InfoExtractor): class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)' _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
_TEST = { _TESTS = [
{
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
'file': '80187.mp4',
'md5': '3f7746aa0dc86de18df7539903d399ea',
'info_dict': {
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
}
},
{
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'file': '19705.mp4', 'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
@@ -19,22 +26,23 @@ class TeamcocoIE(InfoExtractor):
"title": "Louis C.K. Interview Pt. 1 11/3/11" "title": "Louis C.K. Interview Pt. 1 11/3/11"
} }
} }
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
url_title = mobj.group('url_title')
webpage = self._download_webpage(url, url_title)
video_id = self._html_search_regex( display_id = mobj.group('display_id')
r'<article class="video" data-id="(\d+?)"', webpage = self._download_webpage(url, display_id)
webpage, 'video id')
self.report_extraction(video_id) video_id = mobj.group("video_id")
if not video_id:
video_id = self._html_search_regex(
r'<article class="video" data-id="(\d+?)"',
webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_xml(data_url, video_id, 'Downloading data webpage') data = self._download_xml(
data_url, display_id, 'Downloading data webpage')
qualities = ['500k', '480p', '1000k', '720p', '1080p'] qualities = ['500k', '480p', '1000k', '720p', '1080p']
formats = [] formats = []
@@ -69,6 +77,7 @@ class TeamcocoIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id,
'formats': formats, 'formats': formats,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),

View File

@@ -37,6 +37,7 @@ class TEDIE(SubtitlesInfoExtractor):
'consciousness, but that half the time our brains are ' 'consciousness, but that half the time our brains are '
'actively fooling us.'), 'actively fooling us.'),
'uploader': 'Dan Dennett', 'uploader': 'Dan Dennett',
'width': 854,
} }
}, { }, {
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms', 'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@@ -48,12 +49,22 @@ class TEDIE(SubtitlesInfoExtractor):
'thumbnail': 're:^https?://.+\.jpg', 'thumbnail': 're:^https?://.+\.jpg',
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.', 'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
} }
}, {
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
'md5': '49144e345a899b8cb34d315f3b9cfeeb',
'info_dict': {
'id': '1972',
'ext': 'mp4',
'title': 'Be passionate. Be courageous. Be your best.',
'uploader': 'Gabby Giffords and Mark Kelly',
'description': 'md5:5174aed4d0f16021b704120360f72b92',
},
}] }]
_FORMATS_PREFERENCE = { _NATIVE_FORMATS = {
'low': 1, 'low': {'preference': 1, 'width': 320, 'height': 180},
'medium': 2, 'medium': {'preference': 2, 'width': 512, 'height': 288},
'high': 3, 'high': {'preference': 3, 'width': 854, 'height': 480},
} }
def _extract_info(self, webpage): def _extract_info(self, webpage):
@@ -83,7 +94,7 @@ class TEDIE(SubtitlesInfoExtractor):
playlist_info = info['playlist'] playlist_info = info['playlist']
playlist_entries = [ playlist_entries = [
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key()) self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
for talk in info['talks'] for talk in info['talks']
] ]
return self.playlist_result( return self.playlist_result(
@@ -98,12 +109,26 @@ class TEDIE(SubtitlesInfoExtractor):
talk_info = self._extract_info(webpage)['talks'][0] talk_info = self._extract_info(webpage)['talks'][0]
formats = [{ formats = [{
'ext': 'mp4',
'url': format_url, 'url': format_url,
'format_id': format_id, 'format_id': format_id,
'format': format_id, 'format': format_id,
'preference': self._FORMATS_PREFERENCE.get(format_id, -1), } for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
} for (format_id, format_url) in talk_info['nativeDownloads'].items()] if formats:
for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
else:
# Use rtmp downloads
formats = [{
'format_id': f['name'],
'url': talk_info['streamer'],
'play_path': f['file'],
'ext': 'flv',
'width': f['width'],
'height': f['height'],
'tbr': f['bitrate'],
} for f in talk_info['resources']['rtmp']]
self._sort_formats(formats) self._sort_formats(formats)
video_id = compat_str(talk_info['id']) video_id = compat_str(talk_info['id'])
@@ -135,7 +160,7 @@ class TEDIE(SubtitlesInfoExtractor):
sub_lang_list[l] = url sub_lang_list[l] = url
return sub_lang_list return sub_lang_list
else: else:
self._downloader.report_warning(u'video doesn\'t have subtitles') self._downloader.report_warning('video doesn\'t have subtitles')
return {} return {}
def _watch_info(self, url, name): def _watch_info(self, url, name):
@@ -150,7 +175,10 @@ class TEDIE(SubtitlesInfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title') r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>', [
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
],
webpage, 'description', fatal=False) webpage, 'description', fatal=False)
return { return {

View File

@@ -52,7 +52,7 @@ class ThePlatformIE(InfoExtractor):
head = meta.find(_x('smil:head')) head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body')) body = meta.find(_x('smil:body'))
f4m_node = body.find(_x('smil:seq/smil:video')) f4m_node = body.find(_x('smil:seq//smil:video'))
if f4m_node is not None: if f4m_node is not None:
f4m_url = f4m_node.attrib['src'] f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url: if 'manifest.f4m?' not in f4m_url:

View File

@@ -0,0 +1,60 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from .discovery import DiscoveryIE
class TlcIE(DiscoveryIE):
IE_NAME = 'tlc.com'
_VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
_TEST = {
'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a',
'info_dict': {
'id': '853232',
'ext': 'mp4',
'title': 'Cake Boss: Too Big to Fly',
'description': 'Buddy has taken on a high flying task.',
'duration': 119,
},
}
class TlcDeIE(InfoExtractor):
IE_NAME = 'tlc.de'
_VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
_TEST = {
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
'info_dict': {
'id': '3235167922001',
'ext': 'mp4',
'title': 'Breaking Amish: Die Welt da draußen',
'uploader': 'Discovery Networks - Germany',
'description': 'Vier Amische und eine Mennonitin wagen in New York'
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
' ihrem spannenden Weg.',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
iframe_url = self._search_regex(
'<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
'iframe url')
# Otherwise we don't get the correct 'BrightcoveExperience' element,
# example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
iframe_url = iframe_url.replace('.htm?', '.php?')
iframe = self._download_webpage(iframe_url, title)
return {
'_type': 'url',
'url': BrightcoveIE._extract_brightcove_url(iframe),
'ie': BrightcoveIE.ie_key(),
}

View File

@@ -1,63 +1,83 @@
import os from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
int_or_none,
str_to_int,
) )
from ..aes import ( from ..aes import aes_decrypt_text
aes_decrypt_text
)
class Tube8IE(InfoExtractor): class Tube8IE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$' _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
_TEST = { _TEST = {
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/', 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
u'file': u'229795.mp4', 'file': '229795.mp4',
u'md5': u'e9e0b0c86734e5e3766e653509475db0', 'md5': 'e9e0b0c86734e5e3766e653509475db0',
u'info_dict': { 'info_dict': {
u"description": u"hot teen Kasia grinding", 'description': 'hot teen Kasia grinding',
u"uploader": u"unknown", 'uploader': 'unknown',
u"title": u"Kasia music video", 'title': 'Kasia music video',
u"age_limit": 18, 'age_limit': 18,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url) req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title') flashvars = json.loads(self._html_search_regex(
video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False) r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
if thumbnail:
thumbnail = thumbnail.replace('\\/', '/')
video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url') video_url = flashvars['video_url']
if webpage.find('"encrypted":true')!=-1: if flashvars.get('encrypted') is True:
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password') video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:] format_id = '-'.join(path.split('/')[4].split('_')[:2])
format = path.split('/')[4].split('_')[:2]
format = "-".join(format) thumbnail = flashvars.get('image_url')
title = self._html_search_regex(
r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex(
r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
uploader = self._html_search_regex(
r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
webpage, 'uploader', fatal=False)
like_count = int_or_none(self._html_search_regex(
r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
dislike_count = int_or_none(self._html_search_regex(
r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
view_count = self._html_search_regex(
r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
if view_count:
view_count = str_to_int(view_count)
comment_count = self._html_search_regex(
r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
if comment_count:
comment_count = str_to_int(comment_count)
return { return {
'id': video_id, 'id': video_id,
'uploader': video_uploader,
'title': video_title,
'thumbnail': thumbnail,
'description': video_description,
'url': video_url, 'url': video_url,
'ext': extension, 'title': title,
'format': format, 'description': description,
'format_id': format, 'thumbnail': thumbnail,
'uploader': uploader,
'format_id': format_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
'age_limit': 18, 'age_limit': 18,
} }

View File

@@ -11,7 +11,7 @@ from ..utils import (
class UstreamIE(InfoExtractor): class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
IE_NAME = 'ustream' IE_NAME = 'ustream'
_TEST = { _TEST = {
'url': 'http://www.ustream.tv/recorded/20274954', 'url': 'http://www.ustream.tv/recorded/20274954',
@@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)
if m.group('type') == 'embed':
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id)
desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
return self.url_result(desktop_url, 'Ustream')
video_id = m.group('videoID') video_id = m.group('videoID')
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id video_url = 'http://tcdn.ustream.tv/video/%s' % video_id

View File

@@ -0,0 +1,26 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': 'b42178afbea14',
'ext': 'flv',
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
}

View File

@@ -17,10 +17,38 @@ from ..utils import (
RegexNotFoundError, RegexNotFoundError,
std_headers, std_headers,
unsmuggle_url, unsmuggle_url,
urlencode_postdata,
) )
class VimeoIE(SubtitlesInfoExtractor): class VimeoBaseInfoExtractor(InfoExtractor):
_NETRC_MACHINE = 'vimeo'
_LOGIN_REQUIRED = False
def _login(self):
(username, password) = self._get_login_info()
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return
self.report_login()
login_url = 'https://vimeo.com/log_in'
webpage = self._download_webpage(login_url, None, False)
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = urlencode_postdata({
'email': username,
'password': password,
'action': 'login',
'service': 'vimeo',
'token': token,
})
login_request = compat_urllib_request.Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_request.add_header('Cookie', 'xsrft=%s' % token)
self._download_webpage(login_request, None, False, 'Wrong login info')
class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
@@ -33,7 +61,6 @@ class VimeoIE(SubtitlesInfoExtractor):
(?:videos?/)? (?:videos?/)?
(?P<id>[0-9]+) (?P<id>[0-9]+)
/?(?:[?&].*)?(?:[#].*)?$''' /?(?:[?&].*)?(?:[#].*)?$'''
_NETRC_MACHINE = 'vimeo'
IE_NAME = 'vimeo' IE_NAME = 'vimeo'
_TESTS = [ _TESTS = [
{ {
@@ -111,25 +138,6 @@ class VimeoIE(SubtitlesInfoExtractor):
else: else:
return super(VimeoIE, cls).suitable(url) return super(VimeoIE, cls).suitable(url)
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
self.report_login()
login_url = 'https://vimeo.com/log_in'
webpage = self._download_webpage(login_url, None, False)
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = compat_urllib_parse.urlencode({'email': username,
'password': password,
'action': 'login',
'service': 'vimeo',
'token': token,
})
login_request = compat_urllib_request.Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_request.add_header('Cookie', 'xsrft=%s' % token)
self._download_webpage(login_request, None, False, 'Wrong login info')
def _verify_video_password(self, url, video_id, webpage): def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None) password = self._downloader.params.get('videopassword', None)
if password is None: if password is None:
@@ -438,3 +446,25 @@ class VimeoReviewIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
player_url = 'https://player.vimeo.com/player/' + video_id player_url = 'https://player.vimeo.com/player/' + video_id
return self.url_result(player_url, 'Vimeo', video_id) return self.url_result(player_url, 'Vimeo', video_id)
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
IE_NAME = 'vimeo:watchlater'
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
_VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
_LOGIN_REQUIRED = True
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
def _real_initialize(self):
self._login()
def _page_url(self, base_url, pagenum):
url = '%s/page:%d/' % (base_url, pagenum)
request = compat_urllib_request.Request(url)
# Set the header to get a partial html page with the ids,
# the normal page doesn't contain them.
request.add_header('X-Requested-With', 'XMLHttpRequest')
return request
def _real_extract(self, url):
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')

View File

@@ -0,0 +1,66 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
parse_duration,
qualities,
)
class VuClipIE(InfoExtractor):
_VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://m.vuclip.com/w?cid=843902317&fid=63532&z=1007&nvar&frm=index.html&bu=4757321434',
'md5': '92ac9d1ccefec4f0bb474661ab144fcf',
'info_dict': {
'id': '843902317',
'ext': '3gp',
'title': 'Movie Trailer: Noah',
'duration': 139,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
ad_m = re.search(
r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
if ad_m:
urlr = compat_urllib_parse_urlparse(url)
adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
webpage = self._download_webpage(
adfree_url, video_id, note='Download post-ad page')
links_code = self._search_regex(
r'(?s)<div class="social align_c".*?>(.*?)<hr\s*/?>', webpage,
'links')
title = self._html_search_regex(
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
quality_order = qualities(['Reg', 'Hi'])
formats = []
for url, q in re.findall(
r'<a href="(?P<url>[^"]+)".*?>(?P<q>[^<]+)</a>', links_code):
format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
formats.append({
'format_id': format_id,
'url': url,
'quality': quality_order(q),
})
self._sort_formats(formats)
duration = parse_duration(self._search_regex(
r'\(([0-9:]+)\)</span></h1>', webpage, 'duration', fatal=False))
return {
'id': video_id,
'formats': formats,
'title': title,
'duration': duration,
}

View File

@@ -1,10 +1,11 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
class WeiboIE(InfoExtractor): class WeiboIE(InfoExtractor):
""" """
The videos in Weibo come from different sites, this IE just finds the link The videos in Weibo come from different sites, this IE just finds the link
@@ -13,16 +14,16 @@ class WeiboIE(InfoExtractor):
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm' _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
_TEST = { _TEST = {
u'add_ie': ['Sina'], 'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm', 'info_dict': {
u'file': u'98322879.flv', 'id': '98322879',
u'info_dict': { 'ext': 'flv',
u'title': u'魔声耳机最新广告“All Eyes On Us”', 'title': '魔声耳机最新广告“All Eyes On Us”',
}, },
u'note': u'Sina video', 'params': {
u'params': { 'skip_download': True,
u'skip_download': True,
}, },
'add_ie': ['Sina'],
} }
# Additional example videos from different sites # Additional example videos from different sites
@@ -33,17 +34,16 @@ class WeiboIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
video_id = mobj.group('id') video_id = mobj.group('id')
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
info_page = self._download_webpage(info_url, video_id) info = self._download_json(info_url, video_id)
info = json.loads(info_page)
videos_urls = map(lambda v: v['play_page_url'], info['result']['data']) videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
#Prefer sina video since they have thumbnails # Prefer sina video since they have thumbnails
videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u) videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
player_url = videos_urls[-1] player_url = videos_urls[-1]
m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url) m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
player_url)
if m_sina is not None: if m_sina is not None:
self.to_screen('Sina video detected') self.to_screen('Sina video detected')
sina_id = m_sina.group(1) sina_id = m_sina.group(1)
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
return self.url_result(player_url) return self.url_result(player_url)

View File

@@ -3,11 +3,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
class WimpIE(InfoExtractor): class WimpIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/' _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
_TEST = { _TESTS = [{
'url': 'http://www.wimp.com/maruexhausted/', 'url': 'http://www.wimp.com/maruexhausted/',
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1', 'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
'info_dict': { 'info_dict': {
@@ -16,7 +17,20 @@ class WimpIE(InfoExtractor):
'title': 'Maru is exhausted.', 'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8', 'description': 'md5:57e099e857c0a4ea312542b684a869b8',
} }
} }, {
# youtube video
'url': 'http://www.wimp.com/clowncar/',
'info_dict': {
'id': 'cG4CEr2aiSg',
'ext': 'mp4',
'title': 'Basset hound clown car...incredible!',
'description': 'md5:8d228485e0719898c017203f900b3a35',
'uploader': 'Gretchen Hoey',
'uploader_id': 'gretchenandjeff1',
'upload_date': '20140303',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -24,6 +38,13 @@ class WimpIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._search_regex( video_url = self._search_regex(
r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL') r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
if YoutubeIE.suitable(video_url):
self.to_screen('Found YouTube video')
return {
'_type': 'url',
'url': video_url,
'ie_key': YoutubeIE.ie_key(),
}
return { return {
'id': video_id, 'id': video_id,

View File

@@ -14,27 +14,39 @@ from ..utils import (
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen' IE_DESC = 'Yahoo screen and movies'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' _VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
_TESTS = [ _TESTS = [
{ {
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
'file': '214727115.mp4',
'md5': '4962b075c08be8690a922ee026d05e69', 'md5': '4962b075c08be8690a922ee026d05e69',
'info_dict': { 'info_dict': {
'id': '214727115',
'ext': 'mp4',
'title': 'Julian Smith & Travis Legg Watch Julian Smith', 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
'description': 'Julian and Travis watch Julian Smith', 'description': 'Julian and Travis watch Julian Smith',
}, },
}, },
{ {
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
'file': '103000935.mp4',
'md5': 'd6e6fc6e1313c608f316ddad7b82b306', 'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
'info_dict': { 'info_dict': {
'id': '103000935',
'ext': 'mp4',
'title': 'Codefellas - The Cougar Lies with Spanish Moss', 'title': 'Codefellas - The Cougar Lies with Spanish Moss',
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', 'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
}, },
}, },
{
'url': 'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html',
'md5': '410b7104aa9893b765bc22787a22f3d9',
'info_dict': {
'id': '516ed8e2-2c4f-339f-a211-7a8b49d30845',
'ext': 'mp4',
'title': 'The World Loves Spider-Man',
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
}
}
] ]
def _real_extract(self, url): def _real_extract(self, url):
@@ -42,13 +54,20 @@ class YahooIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
items_json = self._search_regex(r'mediaItems: ({.*?})$', items_json = self._search_regex(
webpage, 'items', flags=re.MULTILINE) r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
items = json.loads(items_json) default=None)
info = items['mediaItems']['query']['results']['mediaObj'][0] if items_json is None:
# The 'meta' field is not always in the video webpage, we request it long_id = self._search_regex(
# from another page r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
long_id = info['id'] webpage, 'content ID')
video_id = long_id
else:
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
# The 'meta' field is not always in the video webpage, we request it
# from another page
long_id = info['id']
return self._get_info(long_id, video_id) return self._get_info(long_id, video_id)
def _get_info(self, long_id, video_id): def _get_info(self, long_id, video_id):
@@ -60,10 +79,9 @@ class YahooIE(InfoExtractor):
'env': 'prod', 'env': 'prod',
'format': 'json', 'format': 'json',
}) })
query_result_json = self._download_webpage( query_result = self._download_json(
'http://video.query.yahoo.com/v1/public/yql?' + data, 'http://video.query.yahoo.com/v1/public/yql?' + data,
video_id, 'Downloading video info') video_id, 'Downloading video info')
query_result = json.loads(query_result_json)
info = query_result['query']['results']['mediaObj'][0] info = query_result['query']['results']['mediaObj'][0]
meta = info['meta'] meta = info['meta']
@@ -86,7 +104,6 @@ class YahooIE(InfoExtractor):
else: else:
format_url = compat_urlparse.urljoin(host, path) format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url format_info['url'] = format_url
formats.append(format_info) formats.append(format_info)
self._sort_formats(formats) self._sort_formats(formats)
@@ -104,7 +121,7 @@ class YahooNewsIE(YahooIE):
IE_NAME = 'yahoo:news' IE_NAME = 'yahoo:news'
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html' _VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
_TEST = { _TESTS = [{
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
'md5': '67010fdf3a08d290e060a4dd96baa07b', 'md5': '67010fdf3a08d290e060a4dd96baa07b',
'info_dict': { 'info_dict': {
@@ -113,10 +130,7 @@ class YahooNewsIE(YahooIE):
'title': 'China Moses Is Crazy About the Blues', 'title': 'China Moses Is Crazy About the Blues',
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
}, },
} }]
# Overwrite YahooIE properties we don't want
_TESTS = []
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -134,27 +148,25 @@ class YahooSearchIE(SearchInfoExtractor):
def _get_n_results(self, query, n): def _get_n_results(self, query, n):
"""Get a specified number of results for a query""" """Get a specified number of results for a query"""
entries = []
res = {
'_type': 'playlist',
'id': query,
'entries': []
}
for pagenum in itertools.count(0): for pagenum in itertools.count(0):
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
webpage = self._download_webpage(result_url, query, info = self._download_json(result_url, query,
note='Downloading results page '+str(pagenum+1)) note='Downloading results page '+str(pagenum+1))
info = json.loads(webpage)
m = info['m'] m = info['m']
results = info['results'] results = info['results']
for (i, r) in enumerate(results): for (i, r) in enumerate(results):
if (pagenum * 30) +i >= n: if (pagenum * 30) + i >= n:
break break
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
e = self.url_result('http://' + mobj.group('url'), 'Yahoo') e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
res['entries'].append(e) entries.append(e)
if (pagenum * 30 +i >= n) or (m['last'] >= (m['total'] -1)): if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
break break
return res return {
'_type': 'playlist',
'id': query,
'entries': entries,
}

View File

@@ -151,6 +151,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
) )
)) ))
|youtu\.be/ # just youtu.be/xxxx |youtu\.be/ # just youtu.be/xxxx
|https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
) )
)? # all until now is optional -> you can pass the naked ID )? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
@@ -209,23 +210,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
# Dash webm # Dash webm
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40}, '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40}, '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40}, '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40}, '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40}, '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40}, '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40}, '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio # Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50}, '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50}, '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
# RTMP (unnamed) # RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'}, '_rtmp': {'protocol': 'rtmp'},
@@ -251,7 +252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"info_dict": { u"info_dict": {
u"upload_date": u"20120506", u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
u"description": u"md5:5b292926389560516e384ac437c0ec07", u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
u"uploader": u"Icona Pop", u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop" u"uploader_id": u"IconaPop"
} }
@@ -303,7 +304,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u'id': u'IB3lcPjvWLA', u'id': u'IB3lcPjvWLA',
u'ext': u'm4a', u'ext': u'm4a',
u'title': u'Afrojack - The Spark ft. Spree Wilson', u'title': u'Afrojack - The Spark ft. Spree Wilson',
u'description': u'md5:3199ed45ee8836572865580804d7ac0f', u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
u'uploader': u'AfrojackVEVO', u'uploader': u'AfrojackVEVO',
u'uploader_id': u'AfrojackVEVO', u'uploader_id': u'AfrojackVEVO',
u'upload_date': u'20131011', u'upload_date': u'20131011',
@@ -1081,9 +1082,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
break break
if 'token' not in video_info: if 'token' not in video_info:
if 'reason' in video_info: if 'reason' in video_info:
raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True) raise ExtractorError(
u'YouTube said: %s' % video_info['reason'][0],
expected=True, video_id=video_id)
else: else:
raise ExtractorError(u'"token" parameter not in video info for unknown reason') raise ExtractorError(
u'"token" parameter not in video info for unknown reason',
video_id=video_id)
if 'view_count' in video_info: if 'view_count' in video_info:
view_count = int(video_info['view_count'][0]) view_count = int(video_info['view_count'][0])
@@ -1112,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# title # title
if 'title' in video_info: if 'title' in video_info:
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) video_title = video_info['title'][0]
else: else:
self._downloader.report_warning(u'Unable to extract video title') self._downloader.report_warning(u'Unable to extract video title')
video_title = u'_' video_title = u'_'
@@ -1418,7 +1423,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result(video_id, 'Youtube', video_id=video_id) return self.url_result(video_id, 'Youtube', video_id=video_id)
else: else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
if playlist_id.startswith('RD'): if playlist_id.startswith('RD'):
# Mixes require a custom extraction process # Mixes require a custom extraction process
@@ -1453,7 +1458,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
more_widget_html = more['load_more_widget_html'] more_widget_html = more['load_more_widget_html']
playlist_title = self._html_search_regex( playlist_title = self._html_search_regex(
r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title') r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
page, u'title')
url_results = self._ids_to_results(ids) url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title) return self.playlist_result(url_results, playlist_id, playlist_title)
@@ -1753,7 +1759,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)' IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
_FEED_NAME = 'subscriptions' _FEED_NAME = 'subscriptions'
_PLAYLIST_TITLE = u'Youtube Subscriptions' _PLAYLIST_TITLE = u'Youtube Subscriptions'

View File

@@ -1,5 +1,7 @@
from .atomicparsley import AtomicParsleyPP
from .ffmpeg import ( from .ffmpeg import (
FFmpegAudioFixPP,
FFmpegMergerPP, FFmpegMergerPP,
FFmpegMetadataPP, FFmpegMetadataPP,
FFmpegVideoConvertor, FFmpegVideoConvertor,
@@ -9,6 +11,8 @@ from .ffmpeg import (
from .xattrpp import XAttrMetadataPP from .xattrpp import XAttrMetadataPP
__all__ = [ __all__ = [
'AtomicParsleyPP',
'FFmpegAudioFixPP',
'FFmpegMergerPP', 'FFmpegMergerPP',
'FFmpegMetadataPP', 'FFmpegMetadataPP',
'FFmpegVideoConvertor', 'FFmpegVideoConvertor',

View File

@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import subprocess
from .common import PostProcessor
from ..utils import (
check_executable,
compat_urlretrieve,
encodeFilename,
PostProcessingError,
prepend_extension,
shell_quote
)
class AtomicParsleyPPError(PostProcessingError):
pass
class AtomicParsleyPP(PostProcessor):
def run(self, info):
if not check_executable('AtomicParsley', ['-v']):
raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
temp_thumbnail = prepend_extension(filename, 'thumb')
if not info.get('thumbnail'):
raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
compat_urlretrieve(info['thumbnail'], temp_thumbnail)
cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
msg = stderr.decode('utf-8', 'replace').strip()
raise AtomicParsleyPPError(msg)
os.remove(encodeFilename(filename))
os.remove(encodeFilename(temp_thumbnail))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info

View File

@@ -53,8 +53,7 @@ class FFmpegPostProcessor(PostProcessor):
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
bcmd = [self._downloader.encode(c) for c in cmd] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace') stderr = stderr.decode('utf-8', 'replace')
@@ -465,7 +464,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
filename = info['filepath'] filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp') temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy'] if info['ext'] == u'm4a':
options = ['-vn', '-acodec', 'copy']
else:
options = ['-c', 'copy']
for (name, value) in metadata.items(): for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)]) options.extend(['-metadata', '%s=%s' % (name, value)])
@@ -484,3 +487,17 @@ class FFmpegMergerPP(FFmpegPostProcessor):
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info return True, info
class FFmpegAudioFixPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
options = ['-vn', '-acodec', 'copy']
self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info

View File

@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import calendar import calendar
import codecs
import contextlib import contextlib
import ctypes import ctypes
import datetime import datetime
@@ -593,13 +594,15 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
class ExtractorError(Exception): class ExtractorError(Exception):
"""Error during info extraction.""" """Error during info extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None): def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
""" tb, if given, is the original traceback (so that it can be printed out). """ tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl. If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
""" """
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True expected = True
if video_id is not None:
msg = video_id + ': ' + msg
if not expected: if not expected:
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.' msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
super(ExtractorError, self).__init__(msg) super(ExtractorError, self).__init__(msg)
@@ -607,6 +610,7 @@ class ExtractorError(Exception):
self.traceback = tb self.traceback = tb
self.exc_info = sys.exc_info() # preserve original exception self.exc_info = sys.exc_info() # preserve original exception
self.cause = cause self.cause = cause
self.video_id = video_id
def format_traceback(self): def format_traceback(self):
if self.traceback is None: if self.traceback is None:
@@ -909,25 +913,93 @@ def platform_name():
return res return res
def write_string(s, out=None): def _windows_write_string(s, out):
""" Returns True if the string was written using special methods,
False if it has yet to be written out."""
# Adapted from http://stackoverflow.com/a/3259271/35070
import ctypes
import ctypes.wintypes
WIN_OUTPUT_IDS = {
1: -11,
2: -12,
}
fileno = out.fileno()
if fileno not in WIN_OUTPUT_IDS:
return False
GetStdHandle = ctypes.WINFUNCTYPE(
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
("GetStdHandle", ctypes.windll.kernel32))
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
WriteConsoleW = ctypes.WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
written = ctypes.wintypes.DWORD(0)
GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
GetConsoleMode = ctypes.WINFUNCTYPE(
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
ctypes.POINTER(ctypes.wintypes.DWORD))(
("GetConsoleMode", ctypes.windll.kernel32))
INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
def next_nonbmp_pos(s):
try:
return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
except StopIteration:
return len(s)
while s:
count = min(next_nonbmp_pos(s), 1024)
ret = WriteConsoleW(
h, s, count if count else 2, ctypes.byref(written), None)
if ret == 0:
raise OSError('Failed to write string')
if not count: # We just wrote a non-BMP character
assert written.value == 2
s = s[1:]
else:
assert written.value > 0
s = s[written.value:]
return True
def write_string(s, out=None, encoding=None):
if out is None: if out is None:
out = sys.stderr out = sys.stderr
assert type(s) == compat_str assert type(s) == compat_str
if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
if _windows_write_string(s, out):
return
if ('b' in getattr(out, 'mode', '') or if ('b' in getattr(out, 'mode', '') or
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
s = s.encode(preferredencoding(), 'ignore') byt = s.encode(encoding or preferredencoding(), 'ignore')
try: out.write(byt)
elif hasattr(out, 'buffer'):
enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
byt = s.encode(enc, 'ignore')
out.buffer.write(byt)
else:
out.write(s) out.write(s)
except UnicodeEncodeError:
# In Windows shells, this can fail even when the codec is just charmap!?
# See https://wiki.python.org/moin/PrintFails#Issue
if sys.platform == 'win32' and hasattr(out, 'encoding'):
s = s.encode(out.encoding, 'ignore').decode(out.encoding)
out.write(s)
else:
raise
out.flush() out.flush()
@@ -1176,12 +1248,15 @@ class HEADRequest(compat_urllib_request.Request):
return "HEAD" return "HEAD"
def int_or_none(v, scale=1): def int_or_none(v, scale=1, default=None, get_attr=None):
return v if v is None else (int(v) // scale) if get_attr:
if v is not None:
v = getattr(v, get_attr, None)
return default if v is None else (int(v) // scale)
def float_or_none(v, scale=1): def float_or_none(v, scale=1, default=None):
return v if v is None else (float(v) / scale) return default if v is None else (float(v) / scale)
def parse_duration(s): def parse_duration(s):
@@ -1263,9 +1338,11 @@ class PagedList(object):
def uppercase_escape(s): def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub( return re.sub(
r'\\U[0-9a-fA-F]{8}', r'\\U[0-9a-fA-F]{8}',
lambda m: m.group(0).decode('unicode-escape'), s) lambda m: unicode_escape(m.group(0))[0],
s)
try: try:
struct.pack(u'!I', 0) struct.pack(u'!I', 0)
@@ -1335,3 +1412,14 @@ US_RATINGS = {
def strip_jsonp(code): def strip_jsonp(code):
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code) return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
def qualities(quality_ids):
""" Get a numeric quality value out of a list of possible values """
def q(qid):
try:
return quality_ids.index(qid)
except ValueError:
return -1
return q

View File

@@ -1,2 +1,2 @@
__version__ = '2014.04.03.1' __version__ = '2014.04.30'