Compare commits

...

295 Commits

Author SHA1 Message Date
Philipp Hagemeister
eaaafc59c2 release 2013.11.24 2013-11-24 07:30:34 +01:00
Philipp Hagemeister
382ed50e0e [viki] Add extractor (fixes #1813) 2013-11-24 07:30:05 +01:00
Philipp Hagemeister
66ec019240 [youtube] do not use variable name twice 2013-11-24 06:54:26 +01:00
Philipp Hagemeister
bd49928f7a [niconico] Clarify download 2013-11-24 06:53:50 +01:00
Philipp Hagemeister
23e6d50d73 [bandcamp] Remove unused variable 2013-11-24 06:52:53 +01:00
Philipp Hagemeister
2e767313e4 [update] fix error 2013-11-24 06:52:21 +01:00
Philipp Hagemeister
38b2db6a66 Credit @takuya0301 for niconico 2013-11-24 06:39:49 +01:00
Philipp Hagemeister
13ebea791f [niconico] Simplify and make work with old Python versions
The website requires SSLv3, otherwise it just times out during SSL negotiation.
2013-11-24 06:39:10 +01:00
Philipp Hagemeister
4c9c57428f Merge remote-tracking branch 'takuya0301/niconico' 2013-11-24 06:09:11 +01:00
Philipp Hagemeister
8bf9319e9c Simplify logger code(#1811) 2013-11-24 06:08:11 +01:00
Philipp Hagemeister
4914120727 Merge remote-tracking branch 'iTaybb/master' 2013-11-24 06:07:12 +01:00
Jaime Marquínez Ferrándiz
36de0a0e1a [brightcove] Set the 'videoPlayer' value to the 'videoId' if it's missing in the parameters (fixes #1815) 2013-11-23 23:27:15 +01:00
Philipp Hagemeister
e5c146d586 [streamcloud] skip test on travis 2013-11-23 15:57:42 +01:00
Takuya Tsuchida
52ad14aeb0 Add support for niconico 2013-11-23 18:19:44 +09:00
Itay Brandes
43afe28588 Log to an external logger (fixes #1810)
Sadly applications using youtube-dl's python sources can't directly
access it's log stream. It's pretty much limited to stdout and stderr
only.

It should log to logging.Logger instance passed to YoutubeDL's params
dictionary.
2013-11-23 10:22:18 +02:00
Philipp Hagemeister
a87b0615aa release 2013.11.22.2 2013-11-22 23:08:15 +01:00
Philipp Hagemeister
d7386f6276 [update] Check if version from repository is newer before updating
Closes #1704
2013-11-22 23:05:58 +01:00
Philipp Hagemeister
081640940e Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 22:46:57 +01:00
Philipp Hagemeister
7012b23c94 Match --download-archive during playlist processing (Fixes #1745) 2013-11-22 22:46:46 +01:00
Jaime Marquínez Ferrándiz
d3b30148ed [bambuser:channel] Update test 2013-11-22 21:26:31 +01:00
Jaime Marquínez Ferrándiz
9f79463803 [howcast] update test's checksum 2013-11-22 21:25:12 +01:00
Jaime Marquínez Ferrándiz
d35dc6d3b5 [bandcamp] move the album test to the album extractor and return a single track instead of a playlist 2013-11-22 21:19:31 +01:00
Philipp Hagemeister
50123be421 release 2013.11.22.1 2013-11-22 20:23:55 +01:00
Philipp Hagemeister
3f8ced5144 Merge remote-tracking branch 'jaimeMF/yt-playlists' 2013-11-22 20:11:54 +01:00
Philipp Hagemeister
00ea0f11eb Print full title in --get-title output (#1806) 2013-11-22 20:00:35 +01:00
Philipp Hagemeister
0b63aed8df [update] do not assign to unused variables 2013-11-22 19:15:36 +01:00
Philipp Hagemeister
15c3adbb16 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 19:08:33 +01:00
Philipp Hagemeister
f143a42fe6 [bandcamp] Skip album test 2013-11-22 19:08:25 +01:00
Jaime Marquínez Ferrándiz
241650c7ff [vimeo] Fix the extraction of vimeo pro and player.vimeo.com videos 2013-11-22 18:20:31 +01:00
Philipp Hagemeister
bfe7439a20 release 2013.11.22 2013-11-22 17:46:26 +01:00
Philipp Hagemeister
cffa6aa107 [bandcamp] Support trackinfo-style songs (Fixes #1270) 2013-11-22 17:44:55 +01:00
Philipp Hagemeister
02e4ebbbad [streamcloud] Add IE (Fixes #1801) 2013-11-22 17:19:22 +01:00
Philipp Hagemeister
ab009f59ef [toutv] Fix a typo 2013-11-22 17:18:03 +01:00
Jaime Marquínez Ferrándiz
0980426559 [bandcamp] add support for albums (reported in #1270) 2013-11-22 16:05:14 +01:00
Philipp Hagemeister
b1c9c66936 Remove unnecessary slash in setup.py (Fixes #1778) 2013-11-21 23:26:28 +01:00
Jaime Marquínez Ferrándiz
a6a173c2fd utils.shell_quote: Convert the args to unicode strings
The youtube test video failed with `UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 34: ordinal not in range(128)`, the problem was with the filenames being encoded.
2013-11-21 14:09:28 +01:00
Philipp Hagemeister
2bb683c201 release 2013.11.21 2013-11-21 13:59:33 +01:00
Jaime Marquínez Ferrándiz
64bb5187f5 [soundcloud] Retrieve the file url using the client_id for the iPhone (fixes #1798)
The desktop's client_id always give the rtmp url, but with the iPhone one it returns the http url if it's available.
2013-11-21 13:16:19 +01:00
Philipp Hagemeister
9e4f50a8ae [sztv] skip test, site is undergoing mid-term maintenance 2013-11-20 09:59:03 +01:00
Philipp Hagemeister
0190eecc00 [nhl] Make NHLVideocenter IE_DESC fit with other descriptions 2013-11-20 09:45:29 +01:00
Philipp Hagemeister
ca872a4c0b [spankwire] Fix description search 2013-11-20 09:23:53 +01:00
Philipp Hagemeister
f2e87ef4fa [anitube] Skip test (on travis) 2013-11-20 07:46:44 +01:00
Philipp Hagemeister
0ad97bbc05 [spankwire] fix check for description 2013-11-20 07:45:32 +01:00
Philipp Hagemeister
c4864091a1 [videopremium] Support new crazy redirect scheme 2013-11-20 07:43:21 +01:00
Philipp Hagemeister
9a98a466b3 [toutv] really skip test 2013-11-20 07:37:22 +01:00
Philipp Hagemeister
f99e0f1ed6 Adapt age restriction tests to new .info.json filenames 2013-11-20 07:37:07 +01:00
Philipp Hagemeister
d323bcb152 release 2013.11.20 2013-11-20 07:25:17 +01:00
Philipp Hagemeister
da6a795fdb [escapist] Fix title search 2013-11-20 07:23:23 +01:00
Philipp Hagemeister
c5edcde21f [escapist] upper-case URL 2013-11-20 06:56:59 +01:00
Philipp Hagemeister
15ff3c831e [escapist] Fix syntax error 2013-11-20 06:55:07 +01:00
Philipp Hagemeister
100959a6d9 [escapist] Add support for HD format (Closes #1755) 2013-11-20 06:52:08 +01:00
Philipp Hagemeister
0a120f74b2 Credit @diffycat for anitube 2013-11-20 06:36:00 +01:00
Philipp Hagemeister
8f05351984 [anitube] Minor fixes (#1776) 2013-11-20 06:35:02 +01:00
Philipp Hagemeister
4eb92208a3 Adapt test to changed .info.json name 2013-11-20 06:34:48 +01:00
Philipp Hagemeister
71791f414c Merge remote-tracking branch 'diffycat/master' 2013-11-20 06:28:13 +01:00
Philipp Hagemeister
f3682997d7 Clean up unused imports and other minor mistakes 2013-11-20 06:27:48 +01:00
Philipp Hagemeister
cc13cc0251 [teamcoco] Correct error 2013-11-20 06:25:33 +01:00
Philipp Hagemeister
86bd5f2ca9 Merge remote-tracking branch 'dz0ny/patch-1' 2013-11-20 06:21:05 +01:00
Philipp Hagemeister
8694c60000 import json for --dump-json 2013-11-20 06:18:24 +01:00
Mohamedh Fazal
9d1538182f Add an option to dump json information 2013-11-20 06:14:57 +01:00
Philipp Hagemeister
5904088811 Add support for tou.tv (Fixes #1792) 2013-11-20 06:13:19 +01:00
Jaime Marquínez Ferrándiz
69545c2aff [d8] inherit from CanalplusIE
it reuses the same extraction process
2013-11-19 20:44:20 +01:00
Jaime Marquínez Ferrándiz
495da337ae Merge pull request #1758 from migbac/master
Add support for d8.tv
2013-11-19 20:43:14 +01:00
Philipp Hagemeister
34b3afc7be release 2013.11.19 2013-11-19 12:41:01 +01:00
Philipp Hagemeister
00373a4c5d Merge pull request #1790 from rg3/console-title
Correctly write and restore the console title on the stack (fixes #1782)
2013-11-18 07:50:10 -08:00
Philipp Hagemeister
cb7dfeeac4 [youtube] only allow domain name to be upper-case (#1786) 2013-11-18 16:42:35 +01:00
Jaime Marquínez Ferrándiz
efd6c574a2 Correctly write and restore the console title on the stack (fixes #1782) 2013-11-18 16:35:41 +01:00
Philipp Hagemeister
4113e6ab56 [auengine] Do not return unnecessary ext 2013-11-18 14:36:01 +01:00
Philipp Hagemeister
9a942a4671 release 2013.11.18.1 2013-11-18 13:56:53 +01:00
Philipp Hagemeister
9906d397a0 [auengine] Simplify 2013-11-18 13:56:45 +01:00
Philipp Hagemeister
ae8f787141 Remove iPhone from user agent. This breaks a lot of extractors
In the future, it might be worth investigating whether we get better content when we claime to be an iPhone.
2013-11-18 13:52:26 +01:00
Philipp Hagemeister
a81b4d5c8f release 2013.11.18 2013-11-18 13:30:43 +01:00
Philipp Hagemeister
887c6acdf2 Support multiple embedded YouTube URLs (Fixes #1787) 2013-11-18 13:28:26 +01:00
Philipp Hagemeister
83aa529330 Support protocol-independent URLs (#1787) 2013-11-18 13:18:17 +01:00
Philipp Hagemeister
96b31b6533 Add iPhone to UA (#1746) 2013-11-18 13:05:58 +01:00
Philipp Hagemeister
fccd377198 Suppor embed-only videos (Fixes #1746) 2013-11-18 13:05:18 +01:00
Philipp Hagemeister
73c566695f release 2013.11.17 2013-11-17 22:14:13 +01:00
Philipp Hagemeister
63b7b7224a [MTVIE] Try with RTMP URL if download fails
This fixes youtube-dl http://www.southpark.de/clips/155251/cartman-vs-the-dog-whisperer
2013-11-17 22:11:40 +01:00
Philipp Hagemeister
ce80c8b8ee Merge pull request #1784 from rzhxeo/southpark
Add support for southpark.de
2013-11-17 12:15:13 -08:00
Philipp Hagemeister
749febf4d1 Allow --console-title when --quiet is given (Fixes #1783) 2013-11-17 21:12:50 +01:00
Philipp Hagemeister
bdde425cbe Save and restore console title (Fixes #1782) 2013-11-17 21:10:11 +01:00
rzhxeo
746f491f82 Add support for southpark.de 2013-11-17 17:54:47 +01:00
rzhxeo
1672647ade [SouthParkStudiosIE] Move from _TEST to _TESTS 2013-11-17 17:43:58 +01:00
rzhxeo
90b6bbc38c [SouthParkStudiosIE] Also detect urls without http:// or www 2013-11-17 17:42:24 +01:00
Philipp Hagemeister
ce02ed60f2 Remove * imports 2013-11-17 16:47:52 +01:00
Philipp Hagemeister
1e5b9a95fd Move console_title to YoutubeDL 2013-11-17 11:39:52 +01:00
Philipp Hagemeister
1d699755e0 [youtube] Add view_count (Fixes #1781) 2013-11-17 11:06:16 +01:00
Philipp Hagemeister
ddf49c6344 [arte] remove two typos 2013-11-17 11:05:49 +01:00
Anton Larionov
ba3881dffd Add support for anitube.se (#1417) 2013-11-16 18:26:34 +04:00
Philipp Hagemeister
d1c252048b [redtube] Do not test md5, seems to vary 2013-11-16 10:30:09 +01:00
Philipp Hagemeister
eab2724138 [gamekings] Do not test md5 sum, precise file changes regularly 2013-11-16 02:32:23 +01:00
Philipp Hagemeister
21ea3e06c9 [gamekings] remove unnecessary import 2013-11-16 02:31:02 +01:00
Philipp Hagemeister
52d703d3d1 [tvp] Skip tests 2013-11-16 02:09:30 +01:00
Philipp Hagemeister
ce152341a1 [bambuser] Do not test for MD5, seems to be flaky 2013-11-16 01:59:28 +01:00
Philipp Hagemeister
f058e34011 [dailymotion] Fix playlists 2013-11-16 01:56:23 +01:00
Philipp Hagemeister
b5349e8721 Fix indentation of (best) and (worst) in --list-formats 2013-11-16 01:39:45 +01:00
Philipp Hagemeister
7150858d49 [spiegel] Implement format selection 2013-11-16 01:33:12 +01:00
Philipp Hagemeister
91c7271aab Add automatic generation of format note based on bitrate and codecs 2013-11-16 01:08:43 +01:00
Philipp Hagemeister
aa13b2dffd release 2013.11.15.1 2013-11-15 14:35:00 +01:00
Philipp Hagemeister
fc2ef392be [ted] Fix playlists (Fixes #1770) 2013-11-15 14:33:51 +01:00
Philipp Hagemeister
463a908705 [ted] simplify 2013-11-15 14:06:38 +01:00
Jaime Marquínez Ferrándiz
d24ffe1cfa [rtlnow] Remove the test for nitro
The videos expire.
2013-11-15 12:57:59 +01:00
Jaime Marquínez Ferrándiz
78fb87b283 Don't accept '>' inside the content attribute in OpenGraph regexes 2013-11-15 12:54:13 +01:00
Jaime Marquínez Ferrándiz
ab2d524780 Improve the OpenGraph regex
* Do not accept '>' between the property and content attributes.
* Recognize the properties if the content attribute is before the property attribute using two regexes (fixes the extraction of the description for SlideshareIE).
2013-11-15 12:24:54 +01:00
Jaime Marquínez Ferrándiz
85d61685f1 [tvp] Update the title and the description of the test video 2013-11-15 12:10:22 +01:00
Jaime Marquínez Ferrándiz
b9643eed7c [youtube:channel] Fix the extraction of autogenerated channels
The ajax pages are empty, now it looks directly in the channel's /videos page
2013-11-15 11:51:45 +01:00
Jaime Marquínez Ferrándiz
feee2ecfa9 Pass the 'download' argument to 'process_video_result' (fixes #1769) 2013-11-15 11:04:26 +01:00
Philipp Hagemeister
a25a5cfeec release 2013.11.15 2013-11-15 01:47:15 +01:00
Philipp Hagemeister
0e145dd541 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-15 01:46:50 +01:00
Philipp Hagemeister
9f9be844fc [youtube] Fix protocol-independent URLs (Fixes #1768) 2013-11-15 01:45:39 +01:00
Jaime Marquínez Ferrándiz
e3b9ab5e18 [soundlcoud] Set the correct extension for the tracks (fixes #1766)
Some tracks are not in mp3 format, they can be wav files.
2013-11-14 19:45:39 +01:00
Jaime Marquínez Ferrándiz
c66d2baa9c [livestream] Add an extractor for the original version of livestream (closes #1764)
The two versions use different systems.
2013-11-14 13:16:32 +01:00
Janez Troha
08bc37cdd0 Update test_write_info_json.py 2013-11-13 18:55:49 +01:00
Janez Troha
9771cceb2c Fix filename extension leaking to json filename
Makes writeinfojson behaving exactly as writethumbnail in case where filename contains mediafile extension.

Case:

video.mp4 converted to music.mp3 would yield music.mp4.info.json instead music.mp3.info.json or music.info.json
2013-11-13 18:34:03 +01:00
Jaime Marquínez Ferrándiz
ca715127a2 Don't assume the 'subtitlesformat' is set in the params dict (fixes #1750) 2013-11-13 17:14:10 +01:00
Jaime Marquínez Ferrándiz
ea7a7af1d4 [gamekings] Fix the test video checksum 2013-11-13 17:13:06 +01:00
Jaime Marquínez Ferrándiz
880e1c529d [youtube:playlist] Login into youtube if requested (fixes #1757)
Allows to download private playlists
2013-11-13 16:39:11 +01:00
Jaime Marquínez Ferrándiz
dcbb45803f [youtube:playlist] Don't use the gdata api (closes #1508)
Parse the playlist pages instead
2013-11-13 16:26:50 +01:00
Philipp Hagemeister
80b9bbce86 release 2013.11.13 2013-11-13 11:09:04 +01:00
Philipp Hagemeister
d37936386f Credit @saper for tvp IE (#1730) 2013-11-13 11:08:07 +01:00
Philipp Hagemeister
c3a3028f9f [tvp] Minor improvements (#1730) 2013-11-13 11:06:53 +01:00
Philipp Hagemeister
6c5ad80cdc Merge remote-tracking branch 'saper/tvp' 2013-11-13 11:03:49 +01:00
Philipp Hagemeister
b5bdc2699a Credit @jelly for gamekings extractor (#1759) 2013-11-13 10:52:22 +01:00
Philipp Hagemeister
384b98cd8f [gamekings] Minor fixes (#1759) 2013-11-13 10:51:00 +01:00
Jelle van der Waa
eb9b5bffef Add extractor for gamekings.tv 2013-11-13 10:38:47 +01:00
migbac
0bd59f3723 Add support for d8.tv 2013-11-12 23:32:03 +01:00
Jaime Marquínez Ferrándiz
8b8cbd8f6d [vine] Fix uploader extraction 2013-11-12 20:50:52 +01:00
Jaime Marquínez Ferrándiz
72b18c5d34 FFmpegMetadataPP: don't enclose the values with " (fixes #1756) 2013-11-12 20:38:13 +01:00
Philipp Hagemeister
eb0a839866 [common] Simplify og_search_property 2013-11-12 10:36:23 +01:00
Philipp Hagemeister
1777d5a952 release 2013.11.11 2013-11-11 18:28:17 +01:00
Philipp Hagemeister
d4b7da84c3 Clarify -c. Do not pass it in if you don't know what you're doing
Suggested in #1743
2013-11-11 14:21:14 +01:00
Jaime Marquínez Ferrándiz
801dbbdffd Use avconv for downloading with m3u8 manifests if it's available (fixes #1735) 2013-11-10 16:47:03 +01:00
Jaime Marquínez Ferrándiz
0ed05a1d2d Use the 'rtmp_live' field for the live parameter of rtmpdump 2013-11-10 12:45:17 +01:00
Jaime Marquínez Ferrándiz
1008bebade Merge remote-tracking branch 'rzhxeo/rtmpdump_live' 2013-11-10 12:38:40 +01:00
Jaime Marquínez Ferrándiz
ae84f879d7 Merge all the subtitles test into a single file
They reuse a base class
2013-11-10 12:28:21 +01:00
Jaime Marquínez Ferrándiz
be6dfd1b49 [ted] Return a single info_dict for talks urls
It failed with the --list-subs option
2013-11-10 12:09:12 +01:00
Jaime Marquínez Ferrándiz
231516b6c9 Merge pull request #1705 from iemejia/master
[ted] support for subtitles
2013-11-10 11:54:18 +01:00
Jaime Marquínez Ferrándiz
fb53d58dcf Merge pull request #1726 from saper/escaped
Fix AssertionError when og property not found
2013-11-10 02:51:52 -08:00
Jaime Marquínez Ferrándiz
2a9e9b210b Fix the documentation of '--autonumber-size' (#1743)
it's '--auto-number' not '--autonumber'
2013-11-09 19:21:30 +01:00
Jaime Marquínez Ferrándiz
897d6cc43a Improve format listing for long format ids
Now arte.tv videos have quite long ids.
2013-11-09 19:07:34 +01:00
Jaime Marquínez Ferrándiz
f470c6c812 [arte] Improve the format sorting
Also use the bitrate.
Prefer normal version and sourds/mal version over original version with subtitles.
2013-11-09 19:05:19 +01:00
Jaime Marquínez Ferrándiz
566d4e0425 [arte] Make sure the format_id is unique (closes #1739)
Include the bitrate and use the height instead of the quality field.
2013-11-09 19:01:23 +01:00
Jaime Marquínez Ferrándiz
81be02d2f9 [cnn] Accept www.cnn.com urls (fixes #1740) 2013-11-09 18:16:32 +01:00
Jaime Marquínez Ferrándiz
c2b6a482d5 [brightcove] the format function requires to specify the index in python2.6 2013-11-09 18:10:11 +01:00
Jaime Marquínez Ferrándiz
12c167c881 [soundcloud] Allow to download tracks marked as not 'streamable'
They use the rtmp protocol but if the are marked as 'downloadable' it can use the direct download link.
2013-11-09 18:08:03 +01:00
Jaime Marquínez Ferrándiz
20aafee7fa [kankan] Fix the video url
It now requires two additional parameters, one is a timestamp we get from the getCdnresource_flv page and the other is a key we have to build.
2013-11-09 16:51:11 +01:00
Jaime Marquínez Ferrándiz
be07375b66 Don't recode the video with m3u8 downloads (fixes #1741) 2013-11-09 16:40:00 +01:00
Jaime Marquínez Ferrándiz
dd5bcdc4c9 [brightcove] Set the 'Referer' header if the url has the 'linkBaseUrl' parameter (fixes #1553) 2013-11-07 21:06:48 +01:00
Philipp Hagemeister
6161d17579 release 2013.11.07 2013-11-07 11:06:34 +01:00
Jaime Marquínez Ferrándiz
4ac5306ae7 Fix the report progress when file_size is unknown (#1731)
The report_progress function will accept eta and percent with None value and will set the message to 'Unknow ETA' or 'Unknown %'.
Otherwise the values must be numbers.
2013-11-07 08:03:35 +01:00
Jaime Marquínez Ferrándiz
b1a80ec1a9 [xnxx] Accept urls that start with 'www' (fixes #1734) 2013-11-06 23:45:01 +01:00
Philipp Hagemeister
672fe94dcb release 2013.11.06.1 2013-11-06 22:11:46 +01:00
Jaime Marquínez Ferrándiz
51040b72ed [brightcove] Support redirected urls from bcove.me (fixes #1732)
'bctid' needs to be changed to '@videoPlayer', and 'bckey' to 'playerKey'.
2013-11-06 22:03:00 +01:00
Jaime Marquínez Ferrándiz
4f045eef8f [youtube:channel] Fix the extraction
The page don't include the 'load more' button anymore, now we directly get the 'c4_browse_ajax' pages.
2013-11-06 21:42:33 +01:00
Jaime Marquínez Ferrándiz
5d7b253ea0 Add an extractor for eitb.tv (fixes #1608)
The BrighcoveExperience object doesn't contain the video id, the extractor adds it and passes the url to BrightcoveIE.
2013-11-06 20:06:14 +01:00
Jaime Marquínez Ferrándiz
b0759f0c19 [brightcove] Extract all the available formats 2013-11-06 19:05:41 +01:00
Jaime Marquínez Ferrándiz
065472936a Add an extractor for space.com (fixes #1718)
It uses Brightcove, but requires some special process for getting a url with the playerKey field in some videos
2013-11-06 17:37:39 +01:00
Jaime Marquínez Ferrándiz
fc4a0c2aec [brightcove] Change the 'videoId' or 'videoID' field to '@videoPlayer' (fixes #1697)
It seems to be needed when using the htmlFederated page
2013-11-06 17:31:47 +01:00
Jaime Marquínez Ferrándiz
eeb165e674 [brightcove] Add the extraction of the url from generic 2013-11-06 16:58:03 +01:00
Jaime Marquínez Ferrándiz
9ee2b5f6f2 tests: don't run the test if any of the extractors listed in the 'add_ie' field is marked as not working 2013-11-06 16:43:26 +01:00
Philipp Hagemeister
da54be877a release 2013.11.06 2013-11-06 14:02:52 +01:00
Philipp Hagemeister
50a886b7ab Fix reporting when file size is unkown (Fixes #1731) 2013-11-06 14:02:33 +01:00
Philipp Hagemeister
76e67c2cb6 Clean up imports 2013-11-06 14:01:43 +01:00
Marcin Cieślak
5137ebac0b [tvp] Telewizja Polska: new extractor for tvp.pl, fixes #1719
Thanks-To: mplonski

https://github.com/mplonski/linux/blob/master/tvp-dl.py
2013-11-05 23:47:40 +01:00
Marcin Cieślak
a8eeb0597b Fix AssertionError when og property not found
On tvp.pl some webpages contain OpenGraph
metadata and some don't.

If og property is not found, _og_search_description
fails with

WARNING: unable to extract OpenGraph description; please report this issue on http://yt-dl.org/bug
Traceback (most recent call last):
  File "/usr/home/saper/bin/youtube-dl", line 18, in <module>
    youtube_dl.main()
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 766, in main
    _real_main(argv)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 719, in _real_main
    retcode = ydl.download(all_urls)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 715, in download
    videos = self.extract_info(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 348, in extract_info
    ie_result = ie.extract(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 125, in extract
    return self._real_extract(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/tvp.py", line 56, in _real_extract
    info['description'] = self._og_search_description(webpage)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 331, in _og_search_description
    return self._og_search_property('description', html, fatal=False, **kargs)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 325, in _og_search_property
    return unescapeHTML(escaped)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/utils.py", line 494, in unescapeHTML
    assert type(s) == type(u'')
AssertionError

The patch allows me to use:

  try:
    info['description'] = self._og_search_description(webpage)
    info['thumbnail'] = self._og_search_thumbnail(webpage)
  except RegexNotFoundError:
    pass
2013-11-05 23:19:29 +01:00
Ismaël Mejía
4ed3e51080 [ted] fixed error in case of no subtitles present
I created a test, but I leave it commented since TED videos get
new subtitles frequently.
2013-11-05 12:00:13 +01:00
Jaime Marquínez Ferrándiz
7f34001d57 Merge pull request #1724 from rzhxeo/generic_youtube
[GenericIE] Also detect youtube if src url of iframe is embedded in ' instead of "
2013-11-04 23:00:46 -08:00
rzhxeo
2dcf7d8f99 [GenericIE] Also detect youtube if src url of iframe is embedded in ' instaed of " 2013-11-05 02:08:02 +01:00
Jaime Marquínez Ferrándiz
19b0668251 [canal2c] Accept more urls (fixes #1723)
The url only needs to have the 'idVideo' field in the query, in any position.
We have to set the 'void=oui' in the webpage url, so that we get the file name.
2013-11-04 22:26:19 +01:00
Jaime Marquínez Ferrándiz
e7e6b54d8a [teamcoco] Parse the xml file and extract all the formats 2013-11-03 17:48:12 +01:00
Jaime Marquínez Ferrándiz
2a1a8ffe41 Merge pull request #1693 from alexvh/teamcoco_fix
[teamcoco] Fix video url extraction for some videos
2013-11-03 17:19:51 +01:00
Philipp Hagemeister
08fb86c49b [youtube] Add description for YoutubeSearchDateIE (#1710) 2013-11-03 15:59:10 +01:00
Philipp Hagemeister
3633d77c0f Merge remote-tracking branch 'CBGoodBuddy/ytsearchtime' 2013-11-03 15:56:55 +01:00
Philipp Hagemeister
165e179764 release 2013.11.03 2013-11-03 15:50:36 +01:00
Philipp Hagemeister
12ebdd1506 [viddler] Support non-digit IDs (Fixes #1714) 2013-11-03 15:49:59 +01:00
Jaime Marquínez Ferrándiz
1baf9a5938 Merge pull request #1698 from rzhxeo/cinemassacre
[CinemassacreIE] Support more embed urls
2013-11-03 05:17:12 -08:00
Jaime Marquínez Ferrándiz
a56f9de156 Style fixes for extractors: remove spaces around (,),{ and } 2013-11-03 14:06:47 +01:00
Jaime Marquínez Ferrándiz
fa5d47af4b Merge pull request #1679 from rzhxeo/mofosex
Add support for http://www.mofosex.com
2013-11-03 05:04:14 -08:00
Jaime Marquínez Ferrándiz
d607038753 Merge pull request #1677 from rzhxeo/xtube
Add support for http://www.xtube.com
2013-11-03 03:28:02 -08:00
Jaime Marquínez Ferrándiz
9ac6a01aaf Merge pull request #1676 from rzhxeo/extremetube
Add support for http://www.extremetube.com
2013-11-03 03:25:46 -08:00
Jaime Marquínez Ferrándiz
be97abc247 Set the 'extractor_key' field in the info_dict
It's the string returned by the class method 'ie_key', which allows to retrieve the extractor with 'get_info_extractor'
2013-11-03 12:14:44 +01:00
Jaime Marquínez Ferrándiz
9103bbc5cd Add the 'webpage_url' field to info_dict
The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
2013-11-03 12:11:13 +01:00
Jaime Marquínez Ferrándiz
b6c45014ae Set the extra_info inside YoutubeDL.process_ie_result and set only if the keys are missing 2013-11-03 11:57:04 +01:00
Craig Markwardt
a3dd924871 Add YoutubeSearchDateIE extractor to youtube.py & __init__.py, which searches by publication date. 2013-11-02 22:40:48 -04:00
rzhxeo
137bbb3e37 [XTubeIE] Add description to TEST 2013-11-02 22:45:48 +01:00
rzhxeo
86ad94bb2e [ExtremeTubeIE] Set age_limit to 18 and fix uploader extraction 2013-11-02 22:33:49 +01:00
Jaime Marquínez Ferrándiz
3e56add7c9 Merge pull request #1678 from rzhxeo/keezmovies
[KeezMoviesIE] Detect URLs with numbers in the SEO part correct
2013-11-02 14:15:52 -07:00
Jaime Marquínez Ferrándiz
f52f01b5d2 [brightcove] Don't set the extension
If the video only has the 'FLVFullLengthURL' key, it can still be an mp4 file.
2013-11-02 21:20:46 +01:00
Jaime Marquínez Ferrándiz
98d7efb537 [exfm] skip tests
The site is down too often.
2013-11-02 20:51:09 +01:00
Jaime Marquínez Ferrándiz
cf51923545 [youtube] Remove vevo test
The video is no longer available and it seems that vevo video don't use encrypted signatures anymore.
2013-11-02 20:46:26 +01:00
Ismaël Mejía
38fcd4597a Merge remote-tracking branch 'iemejia/master' 2013-11-02 19:56:06 +01:00
Jaime Marquínez Ferrándiz
165e3bb67a [bambuser] Add an extractor for channels (closes #1702) 2013-11-02 19:50:57 +01:00
Ismaël Mejía
38db46794f Merge branch 'ted_subtitles' 2013-11-02 19:50:45 +01:00
Ismaël Mejía
a9a3876d55 [ted] Added support for subtitle download 2013-11-02 19:48:39 +01:00
Ismaël Mejía
1f343eaabb [subtitles] refactor to support websites with subtitle information the
webpage.

I added the parameter webpage, so now it's similar to the way automatic
captions are handled. This is an improvement needed for websites like
TED.
2013-11-02 19:29:25 +01:00
Jaime Marquínez Ferrándiz
72a5b4f702 Add an extractor for bambuser.com (#1702) 2013-11-02 19:01:01 +01:00
rzhxeo
0a43ddf320 [CinemassacreIE] Add live paramter to extracted info as a workaround 2013-11-02 18:08:35 +01:00
rzhxeo
31366066bd Add support for live parameter to rtmpdump 2013-11-02 18:08:16 +01:00
Philipp Hagemeister
aa2484e390 release 2013.11.02 2013-11-02 11:21:36 +01:00
Philipp Hagemeister
8eddf3e91d [youtube] Encode subtitle track name in request (Fixes #1700) 2013-11-02 11:21:05 +01:00
Jaime Marquínez Ferrándiz
60d142aa8d Add an extractor for vk.com (closes #1635) 2013-11-01 22:34:18 +01:00
Jaime Marquínez Ferrándiz
66cf3ac342 [metacafe] Fix support for age-restricted videos (fixes #1696)
The 'Content-Type' header must be set for disabling the family filter.
The 'flashversion' cookie  is only needed for AnyClip videos.
Added tests for standard metacafe videos and for age-restricted videos.
Also set the 'age_limit' field.
2013-11-01 11:56:15 +01:00
rzhxeo
ab4e151347 [CinemassacreIE] Support more embed urls 2013-11-01 01:24:23 +01:00
Alex Van't Hof
ac2547f5ff [teamcoco] Fix video url extraction for some videos
Video url extraction failed for some videos,
e.g. http://teamcoco.com/video/old-time-baseball

The url extracted was also occasionally suboptimal quality,
e.g. http://teamcoco.com/video/louis-ck-interview-george-w-bush
2013-10-31 15:41:14 -04:00
Jaime Marquínez Ferrándiz
5f1ea943ab [livestream] fix the extraction of events
It now uses a json dictionary from the webpage.
2013-10-31 08:07:26 +01:00
Jaime Marquínez Ferrándiz
0ef7ad5cd4 Fix the test for dailymotion subtitles
The extractor returns a single info_dict now.
2013-10-31 07:55:03 +01:00
Philipp Hagemeister
9f1109a564 [dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 00:20:49 +01:00
Philipp Hagemeister
33b1d9595d release 2013.10.30 2013-10-30 01:17:20 +01:00
Philipp Hagemeister
7193498811 Use index in formt string (Fixes vevo test on Python 2.6) 2013-10-30 01:17:00 +01:00
Philipp Hagemeister
72321ead7b [vevo] Readd support for SMIL (Fixes #1683) 2013-10-30 01:14:17 +01:00
Philipp Hagemeister
b5d0d817bc Remove superfluous space 2013-10-30 01:09:44 +01:00
Philipp Hagemeister
94badb2599 Fix output indenting for --list-formats 2013-10-30 01:09:26 +01:00
Filippo Valsorda
b9a836515f Update the Vimeo test vector md5
confirmed that this is indeed the first 10241 (we went off by one with
byte range 0-10240) of the full, playing mp4, so they probably
reencoded or something
2013-10-29 16:44:35 -04:00
Jaime Marquínez Ferrándiz
21c924f406 [arte] Download the 'Originalversion' version if it's the only one available (fixes #1682) 2013-10-29 20:58:49 +01:00
Philipp Hagemeister
e54fd4b23b [vevo] Add more format details 2013-10-29 15:10:09 +01:00
Philipp Hagemeister
57dd9a8f2f Nicer --list-formats output 2013-10-29 15:09:45 +01:00
Philipp Hagemeister
912cbf5d4e [vevo] Fix timestamp handling
( / 1000 is implicit float division )
2013-10-29 14:00:23 +01:00
Philipp Hagemeister
43d7895ea0 release 2013.10.29 2013-10-29 06:48:39 +01:00
Philipp Hagemeister
f7ff55aa78 Merge remote-tracking branch 'origin/master' 2013-10-29 06:48:18 +01:00
Philipp Hagemeister
795f28f871 [youtube] Fix login (Fixes #1681) 2013-10-29 06:45:54 +01:00
Filippo Valsorda
f6cc16f5d8 [tests] a HTTP 503 is a transient issue 2013-10-28 19:07:16 -04:00
Jaime Marquínez Ferrándiz
321a01f971 [mtv] Remove the templates from the mediagen url 2013-10-28 23:37:01 +01:00
Philipp Hagemeister
646e17a53d Fix YouTubeDL test 2013-10-28 23:18:13 +01:00
Filippo Valsorda
dd508b7c4f [tests] don't fail on network errors
This is suboptimal, but at least this way we will need to look at the logs
only to check for network errors that happen too often, instead of
parsing a ton of lines each time to see if there is some true test failing
2013-10-28 18:03:26 -04:00
Jaime Marquínez Ferrándiz
2563bcc85c Add an extractor for MySpace (closes #1666) 2013-10-28 22:02:17 +01:00
Jaime Marquínez Ferrándiz
702665c085 tests: build the filename from the info_dict if the 'file' key is missing
It will need to have the 'id' and 'ext' keys to work.
2013-10-28 22:01:37 +01:00
rzhxeo
dcc2a706ef Add support for http://www.xtube.com 2013-10-28 19:23:48 +01:00
rzhxeo
2bc67c35ac [KeezMoviesIE] Detect URLs with numbers in the SEO part correct 2013-10-28 18:22:55 +01:00
rzhxeo
77ae65877e Add support for http://www.mofosex.com 2013-10-28 18:18:58 +01:00
rzhxeo
32a35e4418 Add support for http://www.extremetube.com 2013-10-28 17:35:01 +01:00
Jaime Marquínez Ferrándiz
369a759acc setup.py: Make sure the setuptools_available variable is set
Otherwise it would crash if it can't import setuptools.
2013-10-28 16:54:48 +01:00
Philipp Hagemeister
79b3f61228 Merge pull request #1675 from rzhxeo/fix
Check if description and thumbnail are None to prevent crash
2013-10-28 08:35:40 -07:00
rzhxeo
216d71d001 Check if description and thumbnail are None to prevent crash 2013-10-28 16:28:35 +01:00
Philipp Hagemeister
78a3a9f89e Make "requested format not available" expected (#1655) 2013-10-28 11:41:59 +01:00
Philipp Hagemeister
a7685f3bf4 mixcloud does not do any format selection 2013-10-28 11:41:32 +01:00
Philipp Hagemeister
f088ea5486 release 2013.10.28 2013-10-28 11:34:21 +01:00
Philipp Hagemeister
1003d108d5 [vimeo] Support hash in URL (Fixes #1669) 2013-10-28 11:32:22 +01:00
Philipp Hagemeister
8abeeb9449 Nicer --list-formats output 2013-10-28 11:31:12 +01:00
Philipp Hagemeister
c1002e96e9 Let extractors omit ext in formats 2013-10-28 11:28:02 +01:00
Philipp Hagemeister
77d0a82fef [addanime] Use new formats system 2013-10-28 11:24:47 +01:00
Philipp Hagemeister
ebc14f251c Merge remote-tracking branch 'origin/master' 2013-10-28 10:44:13 +01:00
Philipp Hagemeister
d41e6efc85 New debug option --write-pages 2013-10-28 10:44:02 +01:00
Filippo Valsorda
8ffa13e03e [Instagram] get the non-https link, as they are serving Akamai cert from a instagram.com domain 2013-10-28 02:34:29 -04:00
Filippo Valsorda
db477d3a37 Merge pull request #1620 from jaimeMF/console_script
Use the console_scripts entry point if setuptools is available
2013-10-27 23:08:59 -07:00
Filippo Valsorda
750e9833b8 Add the missing age_limit tags; added a devscript to do a superficial check for porn sites without the age_limit tag in the test 2013-10-28 01:50:17 -04:00
Filippo Valsorda
82f0ac657c Merge pull request #1657 by @rzhxeo
[YouPornIE] Extract all encrypted links and remove doubles at the end
2013-10-28 01:45:52 -04:00
Filippo Valsorda
eb6a2277a2 Merge pull request #1659 by @rzhxeo
Add support for http://www.tube8.com
2013-10-28 01:38:28 -04:00
Filippo Valsorda
f8778fb0fa Merge pull request #1663 by @rzhxeo
Add support for http://www.spankwire.com
2013-10-28 01:35:11 -04:00
Filippo Valsorda
e2f9de207c Merge pull request #1664 by @rzhxeo
Add support for http://www.keezmovies.com
2013-10-28 01:25:46 -04:00
Filippo Valsorda
a93cc0d943 Merge pull request #1661 by @rzhxeo
Add support for http://www.pornhub.com
2013-10-28 00:50:39 -04:00
Filippo Valsorda
7d8c2e07f2 [Exfm] replace the failing Soundcloud test vector (broken also in browser) 2013-10-28 00:33:43 -04:00
Filippo Valsorda
efb4c36b18 Merge pull request #1660 from pyed/master
[addanime] try to download HQ before normal
2013-10-27 21:14:19 -07:00
Jaime Marquínez Ferrándiz
29526d0d2b Merge pull request #1656 from rzhxeo/xhamster
[XHamsterIE] Extract SD and HD video
2013-10-27 10:12:59 -07:00
Abdulelah Alfntokh
198e370f23 [addanime] better regex. 2013-10-27 19:48:02 +03:00
Jaime Marquínez Ferrándiz
c19f7764a5 [generic] Detect bandcamp pages that use custom domains (closes #1662)
They embed the original url in the 'og:url' property.
2013-10-27 14:40:25 +01:00
Jaime Marquínez Ferrándiz
bc63d9d329 [rtlnow] Change the test for rtlnitronow 2013-10-27 14:26:19 +01:00
Jaime Marquínez Ferrándiz
aa929c37d5 [generic] Fix test video's checksum 2013-10-27 14:21:37 +01:00
Jaime Marquínez Ferrándiz
af4d506eb3 [faz] Use a regex for getting the description
The page cannot be parsed in python2.6 with the html parser.
2013-10-27 14:18:55 +01:00
rzhxeo
5da0549581 [KeezMoviesIE] Correct return value for embedded videos 2013-10-27 12:48:09 +01:00
Jaime Marquínez Ferrándiz
749a4fd2fd [facebook] Don't recommend to report the issue if the video is private. 2013-10-27 12:13:55 +01:00
Jaime Marquínez Ferrándiz
6f71ef580c [facebook] Report a more meaningful message if the video cannot be accessed (closes #1658) 2013-10-27 12:09:46 +01:00
Jaime Marquínez Ferrándiz
67874aeffa [facebook] Fix the login process (fixes #1244) 2013-10-27 12:07:58 +01:00
Abdulelah Alfntokh
3e6a330d38 [addanime] fix md5sum 2013-10-27 13:51:26 +03:00
Abdulelah Alfntokh
aee5e18c8f [addanime] catch 'RegexNotFoundError' 2013-10-27 13:36:43 +03:00
rzhxeo
5b11143d05 Add support for http://www.keezmovies.com 2013-10-27 10:10:28 +01:00
rzhxeo
7b2212e954 Add support for http://www.spankwire.com 2013-10-27 01:59:26 +02:00
rzhxeo
71865091ab [Tube8IE] Fix regex for uploader extraction 2013-10-27 01:08:03 +02:00
rzhxeo
125cfd78e8 Add support for http://www.pornhub.com 2013-10-27 01:04:22 +02:00
rzhxeo
8cb57d9b91 [Tube8IE] Escape dot in regex 2013-10-27 00:21:27 +02:00
pyed
14e10b2b6e [addanime] try to download HQ before normal 2013-10-27 01:19:38 +03:00
rzhxeo
6e76104d66 [YouPornIE] Make webpage download more robust 2013-10-26 23:33:32 +02:00
rzhxeo
1d45a23b74 Add support for http://www.tube8.com 2013-10-26 23:27:30 +02:00
rzhxeo
7df286540f [YouPornIE] Extract all encrypted links and remove doubles at the end 2013-10-26 21:57:10 +02:00
rzhxeo
5d0c97541a [XHamsterIE] Extract SD and HD video 2013-10-26 20:38:54 +02:00
Philipp Hagemeister
49a25557b0 [8tracks] Use track count instead of looking at at_last_track property
This fixes the error:

$ youtube-dl http://8tracks.com/vladmc/counting-stars
[8tracks] counting-stars: Downloading webpage
[8tracks] counting-stars: Downloading song information 1/4
[8tracks] counting-stars: Downloading song information 2/4
[8tracks] counting-stars: Downloading song information 3/4
[8tracks] counting-stars: Downloading song information 4/4
[8tracks] counting-stars: Downloading song information 5/4
Traceback (most recent call last):
  File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home/phihag/projects/youtube-dl/youtube_dl/__main__.py", line 18, in <module>
    youtube_dl.main()
  File "/home/phihag/projects/youtube-dl/youtube_dl/__init__.py", line 761, in main
    _real_main(argv)
  File "/home/phihag/projects/youtube-dl/youtube_dl/__init__.py", line 714, in _real_main
    retcode = ydl.download(all_urls)
  File "/home/phihag/projects/youtube-dl/youtube_dl/YoutubeDL.py", line 701, in download
    videos = self.extract_info(url)
  File "/home/phihag/projects/youtube-dl/youtube_dl/YoutubeDL.py", line 342, in extract_info
    ie_result = ie.extract(url)
  File "/home/phihag/projects/youtube-dl/youtube_dl/extractor/common.py", line 121, in extract
    return self._real_extract(url)
  File "/home/phihag/projects/youtube-dl/youtube_dl/extractor/eighttracks.py", line 111, in _real_extract
    'id': track_data['id'],
KeyError: 'id'
2013-10-25 23:46:19 +02:00
Jaime Marquínez Ferrándiz
b5936c0059 Document the %(format_id)s field for the output template 2013-10-25 17:18:06 +02:00
Jaime Marquínez Ferrándiz
600cc1a4f0 [youtube] Set the format_id field to the itag of the format (closes #1624) 2013-10-25 17:17:46 +02:00
Jaime Marquínez Ferrándiz
ea32fbacc8 Fix the extensions of two tests with youtube videos
The best quality is now a mp4 video.
2013-10-25 16:55:37 +02:00
Jaime Marquínez Ferrándiz
00fe14fc75 [youtube] Also use the 'adaptative_fmts' field from the /get_video_info page (fixes #1649)
The 'adaptative_fmts' field from the video page is not added to the 'url_encoded_fmt_stream_map'
2013-10-25 16:52:58 +02:00
Jaime Marquínez Ferrándiz
fcc28edb2f [cinemassacre] Simplify
* Remove some rtmp parameters that are not needed.
* Remove the md5 checksums, the video is not downloaded.
* Remove the code used before the current format system.
2013-10-23 20:21:41 +02:00
Jaime Marquínez Ferrándiz
fac6be2dd5 Merge pull request #1632 from rzhxeo/cinemassacre
[Cinemassacre] Download video that is shown in flash player
2013-10-23 20:15:39 +02:00
Philipp Hagemeister
1cf64ee468 release 2013.10.23.2 2013-10-23 18:38:09 +02:00
Jaime Marquínez Ferrándiz
cdec0190c4 [dailymotion] Extract all the available formats (closes #1028) 2013-10-23 17:33:38 +02:00
Jaime Marquínez Ferrándiz
2450bcb28b [nowvideo] Fix key extraction
Extract it from the embed page
2013-10-23 17:00:33 +02:00
Jaime Marquínez Ferrándiz
3126050c0f Hide the video password on verbose mode 2013-10-23 16:32:17 +02:00
Jaime Marquínez Ferrándiz
93b22c7828 [vimeo] fix the extraction for videos protected with password
Added a test video.
2013-10-23 16:31:53 +02:00
Philipp Hagemeister
0a89b2852e release 2013.10.23.1 2013-10-23 15:12:33 +02:00
Jaime Marquínez Ferrándiz
55b3e45bba [vimeo] Fix pro videos and player.vimeo.com urls
The old process can still be used for those videos.
Added RegexNotFoundError, which is raised by _search_regex if it can't extract the info.
2013-10-23 14:38:03 +02:00
Philipp Hagemeister
365bcf6d97 Merge remote-tracking branch 'origin/master' 2013-10-23 11:40:46 +02:00
Philipp Hagemeister
71907db3ba [vimeo] Fix normal videos (Fixes #1642)
Vimeo Pro Videos are still broken
2013-10-23 11:38:53 +02:00
Philipp Hagemeister
6803655ced Merge pull request #1622 from rbrito/fix-extension
extractor: youtube: Set extension of AAC audio formats to m4a.
2013-10-22 15:16:26 -07:00
rzhxeo
b0505eb611 [CinemassacreIE] Fix information extraction 2013-10-19 16:46:17 +02:00
Rogério Brito
f6f1fc9286 extractor: youtube: Fix extension of dash formats.
While we are at it, separate the audio formats from the video formats.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
2013-10-18 18:53:00 -03:00
Rogério Brito
16f36a6fc9 extractor: youtube: Set extension of AAC audio formats to m4a.
This, in particular, eases downloading both audio and videos in DASH formats
before muxing them, which alleviates the problem that I exposed on issue

Furthermore, one may argue that this is, indeed, the case for correctness's
sake.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
2013-10-18 17:50:55 -03:00
Jaime Marquínez Ferrándiz
f44415360e Use the console_scripts entry point if setuptools is available 2013-10-18 13:49:25 +02:00
98 changed files with 3088 additions and 996 deletions

View File

@@ -79,24 +79,27 @@ which means you can modify it, redistribute it or use it however you like.
different, %(autonumber)s to get an automatically
incremented number, %(ext)s for the filename
extension, %(format)s for the format description
(like "22 - 1280x720" or "HD")%(upload_date)s for
the upload date (YYYYMMDD), %(extractor)s for the
provider (youtube, metacafe, etc), %(id)s for the
video id , %(playlist)s for the playlist the
video is in, %(playlist_index)s for the position
in the playlist and %% for a literal percent. Use
- to output to stdout. Can also be used to
download to a different directory, for example
with -o '/my/downloads/%(uploader)s/%(title)s-%(i
d)s.%(ext)s' .
(like "22 - 1280x720" or "HD"),%(format_id)s for
the unique id of the format (like Youtube's
itags: "137"),%(upload_date)s for the upload date
(YYYYMMDD), %(extractor)s for the provider
(youtube, metacafe, etc), %(id)s for the video id
, %(playlist)s for the playlist the video is in,
%(playlist_index)s for the position in the
playlist and %% for a literal percent. Use - to
output to stdout. Can also be used to download to
a different directory, for example with -o '/my/d
ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
when it is present in output filename template or
--autonumber option is given
--auto-number option is given
--restrict-filenames Restrict filenames to only ASCII characters, and
avoid "&" and spaces in filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files
-c, --continue force resume of partially downloaded files. By
default, youtube-dl will resume downloads if
possible.
--no-continue do not resume partially downloaded files (restart
from beginning)
--cookies FILE file to read cookies from and dump cookie jar in
@@ -120,12 +123,15 @@ which means you can modify it, redistribute it or use it however you like.
--get-description simulate, quiet but print video description
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
-j, --dump-json simulate, quiet but print JSON information
--newline output progress bar as new lines
--no-progress do not print progress bar
--console-title display progress in console titlebar
-v, --verbose print various debugging information
--dump-intermediate-pages print downloaded pages to debug problems(very
verbose)
--write-pages Write downloaded pages to files in the current
directory
## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of

39
devscripts/check-porn.py Normal file
View File

@@ -0,0 +1,39 @@
#!/usr/bin/env python
"""
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
if we are not 'age_limit' tagging some porn site
"""
# Allow direct execution
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases
from youtube_dl.utils import compat_urllib_request
for test in get_testcases():
try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
except:
print('\nFail: {0}'.format(test['name']))
continue
webpage = webpage.decode('utf8', 'replace')
if 'porn' in webpage.lower() and ('info_dict' not in test
or 'age_limit' not in test['info_dict']
or test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name']))
elif 'porn' not in webpage.lower() and ('info_dict' in test and
'age_limit' in test['info_dict'] and
test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name']))
else:
sys.stdout.write('.')
sys.stdout.flush()
print()

View File

@@ -8,8 +8,10 @@ import sys
try:
from setuptools import setup
setuptools_available = True
except ImportError:
from distutils.core import setup
setuptools_available = False
try:
# This will create an exe that needs Microsoft Visual C++ 2008
@@ -43,13 +45,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
params = py2exe_params
else:
params = {
'scripts': ['bin/youtube-dl'],
'data_files': [ # Installing system-wide would require sudo...
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
('share/doc/youtube_dl', ['README.txt']),
('share/man/man1/', ['youtube-dl.1'])
('share/man/man1', ['youtube-dl.1'])
]
}
if setuptools_available:
params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
else:
params['scripts'] = ['bin/youtube-dl']
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),

View File

@@ -5,9 +5,11 @@ import json
import os.path
import re
import types
import sys
import youtube_dl.extractor
from youtube_dl import YoutubeDL
from youtube_dl.utils import preferredencoding
def global_setup():
@@ -33,6 +35,21 @@ def try_rm(filename):
raise
def report_warning(message):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if sys.stderr.isatty() and os.name != 'nt':
_msg_header = u'\033[0;33mWARNING:\033[0m'
else:
_msg_header = u'WARNING:'
output = u'%s %s\n' % (_msg_header, message)
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
output = output.encode(preferredencoding())
sys.stderr.write(output)
class FakeYDL(YoutubeDL):
def __init__(self, override=None):
# Different instances of the downloader can't share the same dictionary

View File

@@ -62,10 +62,10 @@ class TestFormatSelection(unittest.TestCase):
def test_format_limit(self):
formats = [
{u'format_id': u'meh'},
{u'format_id': u'good'},
{u'format_id': u'great'},
{u'format_id': u'excellent'},
{u'format_id': u'meh', u'url': u'http://example.com/meh'},
{u'format_id': u'good', u'url': u'http://example.com/good'},
{u'format_id': u'great', u'url': u'http://example.com/great'},
{u'format_id': u'excellent', u'url': u'http://example.com/exc'},
]
info_dict = {
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
@@ -128,6 +128,18 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'35')
def test_add_extra_info(self):
test_dict = {
'extractor': 'Foo',
}
extra_info = {
'extractor': 'Bar',
'playlist': 'funny videos',
}
YDL.add_extra_info(test_dict, extra_info)
self.assertEqual(test_dict['extractor'], 'Foo')
self.assertEqual(test_dict['playlist'], 'funny videos')
if __name__ == '__main__':
unittest.main()

View File

@@ -24,7 +24,7 @@ def _download_restricted(url, filename, age):
}
ydl = YoutubeDL(params)
ydl.add_default_info_extractors()
json_filename = filename + '.info.json'
json_filename = os.path.splitext(filename)[0] + '.info.json'
try_rm(json_filename)
ydl.download([url])
res = os.path.exists(json_filename)

View File

@@ -1,70 +0,0 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import DailymotionIE
class TestDailymotionSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.url = 'http://www.dailymotion.com/video/xczg00'
def getInfoDict(self):
IE = DailymotionIE(self.DL)
info_dict = IE.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@@ -6,7 +6,14 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, get_testcases, global_setup, try_rm, md5
from test.helper import (
get_params,
get_testcases,
global_setup,
try_rm,
md5,
report_warning
)
global_setup()
@@ -19,10 +26,12 @@ import youtube_dl.YoutubeDL
from youtube_dl.utils import (
compat_str,
compat_urllib_error,
compat_HTTPError,
DownloadError,
ExtractorError,
UnavailableVideoError,
)
from youtube_dl.extractor import get_info_extractor
RETRIES = 3
@@ -55,17 +64,25 @@ def generator(test_case):
def test_template(self):
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason))
if not ie._WORKING:
if not ie.working():
print_skipping('IE marked as not _WORKING')
return
if 'playlist' not in test_case and not test_case['file']:
print_skipping('No output file specified')
return
if 'playlist' not in test_case:
info_dict = test_case.get('info_dict', {})
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
print_skipping('The output file cannot be know, the "file" '
'key is missing or the info_dict is incomplete')
return
if 'skip' in test_case:
print_skipping(test_case['skip'])
return
for other_ie in other_ies:
if not other_ie.working():
print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
return
params = get_params(test_case.get('params', {}))
@@ -77,35 +94,48 @@ def generator(test_case):
finished_hook_called.add(status['filename'])
ydl.fd.add_progress_hook(_hook)
def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
test_cases = test_case.get('playlist', [test_case])
for tc in test_cases:
try_rm(tc['file'])
try_rm(tc['file'] + '.part')
try_rm(tc['file'] + '.info.json')
def try_rm_tcs_files():
for tc in test_cases:
tc_filename = get_tc_filename(tc)
try_rm(tc_filename)
try_rm(tc_filename + '.part')
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
try_rm_tcs_files()
try:
for retry in range(1, RETRIES + 1):
try_num = 1
while True:
try:
ydl.download([test_case['url']])
except (DownloadError, ExtractorError) as err:
if retry == RETRIES: raise
# Check if the exception is not a network related one
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
raise
print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
if try_num == RETRIES:
report_warning(u'Failed due to network errors, skipping...')
return
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
try_num += 1
else:
break
for tc in test_cases:
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
self.assertTrue(tc['file'] in finished_hook_called)
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
self.assertTrue(tc_filename in finished_hook_called)
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue(os.path.exists(info_json_fn))
if 'md5' in tc:
md5_for_file = _file_md5(tc['file'])
md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):
@@ -125,11 +155,11 @@ def generator(test_case):
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
finally:
for tc in test_cases:
try_rm(tc['file'])
try_rm(tc['file'] + '.part')
try_rm(tc['file'] + '.info.json')
try_rm_tcs_files()
return test_template

View File

@@ -17,9 +17,12 @@ from youtube_dl.extractor import (
DailymotionUserIE,
VimeoChannelIE,
UstreamChannelIE,
SoundcloudSetIE,
SoundcloudUserIE,
LivestreamIE,
NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE
)
@@ -60,6 +63,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'5124905')
self.assertTrue(len(result['entries']) >= 11)
def test_soundcloud_set(self):
dl = FakeYDL()
ie = SoundcloudSetIE(dl)
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'The Royal Concept EP')
self.assertTrue(len(result['entries']) >= 6)
def test_soundcloud_user(self):
dl = FakeYDL()
ie = SoundcloudUserIE(dl)
@@ -85,5 +96,21 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Highlights')
self.assertEqual(len(result['entries']), 12)
def test_bambuser_channel(self):
dl = FakeYDL()
ie = BambuserChannelIE(dl)
result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'pixelversity')
self.assertTrue(len(result['entries']) >= 60)
def test_bandcamp_album(self):
dl = FakeYDL()
ie = BandcampAlbumIE(dl)
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4)
if __name__ == '__main__':
unittest.main()

211
test/test_subtitles.py Normal file
View File

@@ -0,0 +1,211 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import (
YoutubeIE,
DailymotionIE,
TEDIE,
)
class BaseTestSubtitles(unittest.TestCase):
url = None
IE = None
def setUp(self):
self.DL = FakeYDL()
self.ie = self.IE(self.DL)
def getInfoDict(self):
info_dict = self.ie.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict['subtitles']
class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM'
IE = YoutubeIE
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
class TestDailymotionSubtitles(BaseTestSubtitles):
url = 'http://www.dailymotion.com/video/xczg00'
IE = DailymotionIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 28)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@@ -24,6 +24,8 @@ from youtube_dl.utils import (
xpath_with_ns,
smuggle_url,
unsmuggle_url,
shell_quote,
encodeFilename,
)
if sys.version_info < (3, 0):
@@ -170,6 +172,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_url, url)
self.assertEqual(res_data, None)
def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
if __name__ == '__main__':
unittest.main()

View File

@@ -31,7 +31,7 @@ params = get_params({
TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐

View File

@@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'ytdl test PL')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
@@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):
def test_issue_673(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('PLBB231211A4F62143')[0]
result = ie.extract('PLBB231211A4F62143')
self.assertTrue(len(result['entries']) > 25)
def test_youtube_playlist_long(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 799)
@@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):
#651
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
self.assertFalse('pElCt5oNDuI' in ytie_results)
self.assertFalse('KdPEApIVdWM' in ytie_results)
@@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist_empty(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
self.assertIsPlaylist(result)
self.assertEqual(len(result['entries']), 0)
@@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
# TODO find a > 100 (paginating?) videos course
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
entries = result['entries']
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
self.assertEqual(len(entries), 25)
@@ -84,22 +84,22 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubeChannelIE(dl)
#test paginated channel
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self):
dl = FakeYDL()
ie = YoutubeUserIE(dl)
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
self.assertEqual(len(result['entries']), 2)
def test_youtube_show(self):

View File

@@ -1,95 +0,0 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import YoutubeIE
class TestYoutubeSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.url = 'QRS8MkLhQmM'
def getInfoDict(self):
IE = YoutubeIE(self.DL)
info_dict = IE.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@@ -4,12 +4,16 @@ import re
import subprocess
import sys
import time
import traceback
if os.name == 'nt':
import ctypes
from .utils import *
from .utils import (
compat_urllib_error,
compat_urllib_request,
ContentTooShortError,
determine_ext,
encodeFilename,
sanitize_open,
timeconvert,
)
class FileDownloader(object):
@@ -144,16 +148,8 @@ class FileDownloader(object):
def to_stderr(self, message):
self.ydl.to_screen(message)
def to_cons_title(self, message):
"""Set console/terminal window title to message."""
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
def to_console_title(self, message):
self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
@@ -194,7 +190,7 @@ class FileDownloader(object):
if old_filename == new_filename:
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
except (IOError, OSError):
self.report_error(u'unable to rename file')
def try_utime(self, filename, last_modified_hdr):
@@ -227,8 +223,14 @@ class FileDownloader(object):
if self.params.get('noprogress', False):
return
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
eta_str = self.format_eta(eta)
percent_str = self.format_percent(percent)
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
@@ -236,7 +238,7 @@ class FileDownloader(object):
else:
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
def report_resuming_byte(self, resume_len):
@@ -251,7 +253,7 @@ class FileDownloader(object):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except (UnicodeEncodeError) as err:
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def report_unable_to_resume(self):
@@ -267,7 +269,7 @@ class FileDownloader(object):
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
@@ -294,6 +296,8 @@ class FileDownloader(object):
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if live:
basic_args += ['--live']
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
@@ -366,15 +370,20 @@ class FileDownloader(object):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
# Check for ffmpeg first
try:
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
return False
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', tmpfilename]
retval = subprocess.call(args)
for program in ['avconv', 'ffmpeg']:
try:
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
break
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
@@ -411,7 +420,8 @@ class FileDownloader(object):
info_dict.get('player_url', None),
info_dict.get('page_url', None),
info_dict.get('play_path', None),
info_dict.get('tc_url', None))
info_dict.get('tc_url', None),
info_dict.get('rtmp_live', False))
# Attempt to download using mplayer
if url.startswith('mms') or url.startswith('rtsp'):
@@ -550,12 +560,11 @@ class FileDownloader(object):
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
eta = None
eta = percent = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,

View File

@@ -501,7 +501,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
options = ['-c', 'copy']
for (name, value) in metadata.items():
options.extend(['-metadata', '%s="%s"' % (name, value)])
options.extend(['-metadata', '%s=%s' % (name, value)])
options.extend(['-f', ext])
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)

View File

@@ -5,6 +5,7 @@ from __future__ import absolute_import
import errno
import io
import json
import os
import re
import shutil
@@ -13,7 +14,34 @@ import sys
import time
import traceback
from .utils import *
if os.name == 'nt':
import ctypes
from .utils import (
compat_http_client,
compat_print,
compat_str,
compat_urllib_error,
compat_urllib_request,
ContentTooShortError,
date_from_str,
DateRange,
determine_ext,
DownloadError,
encodeFilename,
ExtractorError,
locked_file,
MaxDownloadsReached,
PostProcessingError,
preferredencoding,
SameFileError,
sanitize_filename,
subtitles_filename,
takewhile_inclusive,
UnavailableVideoError,
write_json_file,
write_string,
)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
@@ -57,6 +85,7 @@ class YoutubeDL(object):
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
forcejson: Force printing info_dict as JSON.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
@@ -68,6 +97,7 @@ class YoutubeDL(object):
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
@@ -163,7 +193,9 @@ class YoutubeDL(object):
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
if not self.params.get('quiet', False):
if self.params.get('logger'):
self.params['logger'].debug(message)
elif not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
write_string(output, self._screen_file)
@@ -171,10 +203,44 @@ class YoutubeDL(object):
def to_stderr(self, message):
"""Print message to stderr."""
assert type(message) == type(u'')
output = message + u'\n'
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding())
sys.stderr.write(output)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
output = message + u'\n'
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding())
sys.stderr.write(output)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
write_string(u'\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
# Save the title on stack
write_string(u'\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
# Restore the title from stack
write_string(u'\033[23;0t', self._screen_file)
def __enter__(self):
self.save_console_title()
return self
def __exit__(self, *args):
self.restore_console_title()
def fixed_template(self):
"""Checks if the output template is fixed."""
@@ -254,7 +320,7 @@ class YoutubeDL(object):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except (UnicodeEncodeError) as err:
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def increment_downloads(self):
@@ -272,7 +338,7 @@ class YoutubeDL(object):
autonumber_size = 5
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
template_dict['autonumber'] = autonumber_templ % self._num_downloads
if template_dict['playlist_index'] is not None:
if template_dict.get('playlist_index') is not None:
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
sanitize = lambda k, v: sanitize_filename(
@@ -295,15 +361,17 @@ class YoutubeDL(object):
def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
if 'title' in info_dict:
# This can happen when we're just evaluating the playlist
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None)
if date is not None:
dateRange = self.params.get('daterange', DateRange())
@@ -314,10 +382,16 @@ class YoutubeDL(object):
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
return (u'%(title)s has already been recorded in archive'
% info_dict)
return (u'%s has already been recorded in archive'
% info_dict.get('title', info_dict.get('id', u'video')))
return None
@staticmethod
def add_extra_info(info_dict, extra_info):
'''Set the keys from extra_info in info dict if they are missing'''
for key, value in extra_info.items():
info_dict.setdefault(key, value)
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
'''
Returns a list with a dictionary for each video we find.
@@ -344,17 +418,17 @@ class YoutubeDL(object):
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
for result in ie_result:
result.update(extra_info)
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
else:
ie_result.update(extra_info)
if 'extractor' not in ie_result:
ie_result['extractor'] = ie.IE_NAME
return self.process_ie_result(ie_result, download=download)
self.add_extra_info(ie_result,
{
'extractor': ie.IE_NAME,
'webpage_url': url,
'extractor_key': ie.ie_key(),
})
return self.process_ie_result(ie_result, download, extra_info)
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
break
@@ -378,8 +452,8 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
if result_type == 'video':
ie_result.update(extra_info)
return self.process_video_result(ie_result)
self.add_extra_info(ie_result, extra_info)
return self.process_video_result(ie_result, download=download)
elif result_type == 'url':
# We have to add extra_info to the results because it may be
# contained in a playlist
@@ -388,6 +462,7 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist':
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -413,12 +488,16 @@ class YoutubeDL(object):
extra = {
'playlist': playlist,
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
}
if not 'extractor' in entry:
# We set the extractor, if it's an url it will be set then to
# the new extractor, but if it's already a video we must make
# sure it's present: see issue #877
entry['extractor'] = ie_result['extractor']
reason = self._match_entry(entry)
if reason is not None:
self.to_screen(u'[download] ' + reason)
continue
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
@@ -427,10 +506,15 @@ class YoutubeDL(object):
return ie_result
elif result_type == 'compat_list':
def _fixup(r):
r.setdefault('extractor', ie_result['extractor'])
self.add_extra_info(r,
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
})
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download=download)
self.process_ie_result(_fixup(r), download, extra_info)
for r in ie_result['entries']
]
return ie_result
@@ -462,7 +546,7 @@ class YoutubeDL(object):
info_dict['playlist_index'] = None
# This extractors handle format selection themselves
if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
if info_dict['extractor'] in [u'youtube', u'Youku']:
if download:
self.process_info(info_dict)
return info_dict
@@ -482,8 +566,11 @@ class YoutubeDL(object):
format['format'] = u'{id} - {res}{note}'.format(
id=format['format_id'],
res=self.format_resolution(format),
note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
)
# Automatically determine file extension if missing
if 'ext' not in format:
format['ext'] = determine_ext(format['url'])
if self.params.get('listformats', None):
self.list_formats(info_dict)
@@ -521,7 +608,8 @@ class YoutubeDL(object):
formats_to_download = [selected_format]
break
if not formats_to_download:
raise ExtractorError(u'requested format not available')
raise ExtractorError(u'requested format not available',
expected=True)
if download:
if len(formats_to_download) > 1:
@@ -565,20 +653,22 @@ class YoutubeDL(object):
# Forced printings
if self.params.get('forcetitle', False):
compat_print(info_dict['title'])
compat_print(info_dict['fulltitle'])
if self.params.get('forceid', False):
compat_print(info_dict['id'])
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
compat_print(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and 'description' in info_dict:
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
compat_print(info_dict['description'])
if self.params.get('forcefilename', False) and filename is not None:
compat_print(filename)
if self.params.get('forceformat', False):
compat_print(info_dict['format'])
if self.params.get('forcejson', False):
compat_print(json.dumps(info_dict))
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -626,7 +716,7 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat')
sub_format = self.params.get('subtitlesformat', 'srt')
for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
@@ -641,7 +731,7 @@ class YoutubeDL(object):
return
if self.params.get('writeinfojson', False):
infofn = filename + u'.info.json'
infofn = os.path.splitext(filename)[0] + u'.info.json'
self.report_writeinfojson(infofn)
try:
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
@@ -734,7 +824,16 @@ class YoutubeDL(object):
fn = self.params.get('download_archive')
if fn is None:
return False
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
extractor = info_dict.get('extractor_id')
if extractor is None:
if 'id' in info_dict:
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return False # Incomplete video information
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = extractor.lower()
vid_id = extractor + u' ' + info_dict['id']
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
@@ -754,30 +853,58 @@ class YoutubeDL(object):
archive_file.write(vid_id + u'\n')
@staticmethod
def format_resolution(format):
def format_resolution(format, default='unknown'):
if format.get('_resolution') is not None:
return format['_resolution']
if format.get('height') is not None:
if format.get('width') is not None:
res = u'%sx%s' % (format['width'], format['height'])
else:
res = u'%sp' % format['height']
else:
res = '???'
res = default
return res
def list_formats(self, info_dict):
formats_s = []
for format in info_dict.get('formats', [info_dict]):
formats_s.append(u'%-15s: %-5s %-15s[%s]' % (
def format_note(fdict):
if fdict.get('format_note') is not None:
return fdict['format_note']
res = u''
if fdict.get('vcodec') is not None:
res += u'%-5s' % fdict['vcodec']
elif fdict.get('vbr') is not None:
res += u'video'
if fdict.get('vbr') is not None:
res += u'@%4dk' % fdict['vbr']
if fdict.get('acodec') is not None:
if res:
res += u', '
res += u'%-5s' % fdict['acodec']
elif fdict.get('abr') is not None:
if res:
res += u', '
res += 'audio'
if fdict.get('abr') is not None:
res += u'@%3dk' % fdict['abr']
return res
def line(format):
return (u'%-20s%-10s%-12s%s' % (
format['format_id'],
format['ext'],
format.get('format_note') or '-',
self.format_resolution(format),
format_note(format),
)
)
if len(formats_s) != 1:
formats_s[0] += ' (worst)'
formats_s[-1] += ' (best)'
formats_s = "\n".join(formats_s)
self.to_screen(u'[info] Available formats for %s:\n'
u'format code extension note resolution\n%s' % (
info_dict['id'], formats_s))
formats = info_dict.get('formats', [info_dict])
formats_s = list(map(line, formats))
if len(formats) > 1:
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
header_line = line({
'format_id': u'format code', 'ext': u'extension',
'_resolution': u'resolution', 'format_note': u'note'})
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s)))

View File

@@ -32,6 +32,10 @@ __authors__ = (
'Ismael Mejía',
'Steffan \'Ruirize\' James',
'Andras Elso',
'Jelle van der Waa',
'Marcin Cieślak',
'Anton Larionov',
'Takuya Tsuchida',
)
__license__ = 'Public Domain'
@@ -133,7 +137,7 @@ def parseOpts(overrideArguments=None):
def _hide_login_info(opts):
opts = list(opts)
for private_opt in ['-p', '--password', '-u', '--username']:
for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
try:
i = opts.index(private_opt)
opts[i+1] = '<PRIVATE>'
@@ -304,6 +308,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--get-format',
action='store_true', dest='getformat',
help='simulate, quiet but print output format', default=False)
verbosity.add_option('-j', '--dump-json',
action='store_true', dest='dumpjson',
help='simulate, quiet but print JSON information', default=False)
verbosity.add_option('--newline',
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
verbosity.add_option('--no-progress',
@@ -316,6 +323,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
help='print downloaded pages to debug problems(very verbose)')
verbosity.add_option('--write-pages',
action='store_true', dest='write_pages', default=False,
help='Write downloaded pages to files in the current directory')
verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
@@ -336,7 +346,8 @@ def parseOpts(overrideArguments=None):
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
'%(autonumber)s to get an automatically incremented number, '
'%(ext)s for the filename extension, '
'%(format)s for the format description (like "22 - 1280x720" or "HD")'
'%(format)s for the format description (like "22 - 1280x720" or "HD"),'
'%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"),'
'%(upload_date)s for the upload date (YYYYMMDD), '
'%(extractor)s for the provider (youtube, metacafe, etc), '
'%(id)s for the video id , %(playlist)s for the playlist the video is in, '
@@ -345,7 +356,7 @@ def parseOpts(overrideArguments=None):
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
filesystem.add_option('--autonumber-size',
dest='autonumber_size', metavar='NUMBER',
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
@@ -354,7 +365,7 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('-w', '--no-overwrites',
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
filesystem.add_option('-c', '--continue',
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
filesystem.add_option('--no-continue',
action='store_false', dest='continue_dl',
help='do not resume partially downloaded files (restart from beginning)')
@@ -597,13 +608,12 @@ def _real_main(argv=None):
u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u''))
# YoutubeDL
ydl = YoutubeDL({
ydl_opts = {
'usenetrc': opts.usenetrc,
'username': opts.username,
'password': opts.password,
'videopassword': opts.videopassword,
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'forceid': opts.getid,
@@ -611,8 +621,9 @@ def _real_main(argv=None):
'forcedescription': opts.getdescription,
'forcefilename': opts.getfilename,
'forceformat': opts.getformat,
'forcejson': opts.dumpjson,
'simulate': opts.simulate,
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'format': opts.format,
'format_limit': opts.format_limit,
'listformats': opts.listformats,
@@ -651,6 +662,7 @@ def _real_main(argv=None):
'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages,
'write_pages': opts.write_pages,
'test': opts.test,
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
@@ -660,61 +672,63 @@ def _real_main(argv=None):
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': opts.download_archive,
})
}
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
with YoutubeDL(ydl_opts) as ydl:
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sys.exc_clear()
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
pass
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.add_default_info_extractors()
ydl.add_default_info_extractors()
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
# Maybe do nothing
if len(all_urls) < 1:
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
try:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
try:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
# Dump cookie jar if requested
if opts.cookiefile is not None:

View File

@@ -1,5 +1,6 @@
from .appletrailers import AppleTrailersIE
from .addanime import AddAnimeIE
from .anitube import AnitubeIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
from .arte import (
@@ -9,7 +10,8 @@ from .arte import (
ArteTVFutureIE,
)
from .auengine import AUEngineIE
from .bandcamp import BandcampIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .breakcom import BreakIE
@@ -24,6 +26,7 @@ from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import (
DailymotionIE,
DailymotionPlaylistIE,
@@ -37,8 +40,10 @@ from .defense import DefenseGouvFrIE
from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .eitb import EitbIE
from .escapist import EscapistIE
from .exfm import ExfmIE
from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
from .fktv import (
@@ -54,6 +59,7 @@ from .francetv import (
)
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
from .gamekings import GamekingsIE
from .gamespot import GameSpotIE
from .gametrailers import GametrailersIE
from .generic import GenericIE
@@ -72,16 +78,19 @@ from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .kankan import KankanIE
from .keezmovies import KeezMoviesIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
from .livestream import LivestreamIE, LivestreamOriginalIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mofosex import MofosexIE
from .mtv import MTVIE
from .muzu import MuzuTVIE
from .myspace import MySpaceIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .naver import NaverIE
@@ -89,11 +98,13 @@ from .nba import NBAIE
from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
@@ -108,22 +119,31 @@ from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import SouthParkStudiosIE
from .southparkstudios import (
SouthParkStudiosIE,
SouthparkDeIE,
)
from .space import SpaceIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE
from .sztvhu import SztvHuIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tf1 import TF1IE
from .thisav import ThisAVIE
from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .tube8 import Tube8IE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
from .tvp import TvpIE
from .unistra import UnistraIE
from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE
@@ -137,6 +157,8 @@ from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE
from .wat import WatIE
from .websurg import WeBSurgIE
from .weibo import WeiboIE
@@ -145,6 +167,7 @@ from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
from .xtube import XTubeIE
from .yahoo import YahooIE, YahooSearchIE
from .youjizz import YouJizzIE
from .youku import YoukuIE
@@ -153,6 +176,7 @@ from .youtube import (
YoutubeIE,
YoutubePlaylistIE,
YoutubeSearchIE,
YoutubeSearchDateIE,
YoutubeUserIE,
YoutubeChannelIE,
YoutubeShowIE,

View File

@@ -17,8 +17,8 @@ class AddAnimeIE(InfoExtractor):
IE_NAME = u'AddAnime'
_TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
u'file': u'24MR3YO5SAS9.flv',
u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
u'file': u'24MR3YO5SAS9.mp4',
u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
u'info_dict': {
u"description": u"One Piece 606",
u"title": u"One Piece 606"
@@ -31,7 +31,8 @@ class AddAnimeIE(InfoExtractor):
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError):
if not isinstance(ee.cause, compat_HTTPError) or \
ee.cause.code != 503:
raise
redir_webpage = ee.cause.read().decode('utf-8')
@@ -60,16 +61,26 @@ class AddAnimeIE(InfoExtractor):
note=u'Confirming after redirect')
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
webpage, u'video file URL')
formats = []
for format_id in ('normal', 'hq'):
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
video_url = self._search_regex(rex, webpage, u'video file URLx',
fatal=False)
if not video_url:
continue
formats.append({
'format_id': format_id,
'url': video_url,
})
if not formats:
raise ExtractorError(u'Cannot find any video format!')
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
return {
'_type': 'video',
'id': video_id,
'url': video_url,
'ext': 'flv',
'formats': formats,
'title': video_title,
'description': video_description
}

View File

@@ -0,0 +1,55 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
class AnitubeIE(InfoExtractor):
IE_NAME = u'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.anitube.se/video/36621',
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
u'file': u'36621.mp4',
u'info_dict': {
u'id': u'36621',
u'ext': u'mp4',
u'title': u'Recorder to Randoseru 01',
},
u'skip': u'Blocked in the US',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key')
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key)
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
video_title = config_xml.find('title').text
formats = []
video_url = config_xml.find('file')
if video_url is not None:
formats.append({
'format_id': 'sd',
'url': video_url.text,
})
video_url = config_xml.find('filehd')
if video_url is not None:
formats.append({
'format_id': 'hd',
'url': video_url.text,
})
return {
'id': video_id,
'title': video_title,
'formats': formats
}

View File

@@ -10,6 +10,7 @@ from ..utils import (
unified_strdate,
determine_ext,
get_element_by_id,
compat_str,
)
# There are different sources of video in arte.tv, the extraction process
@@ -68,7 +69,7 @@ class ArteTvIE(InfoExtractor):
lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, video_id) is not None:
if re.search(self._LIVE_URL, url) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
# return
@@ -114,7 +115,7 @@ class ArteTvIE(InfoExtractor):
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
url_node = video_doc.find('urlSd')
url_node = event_doc.find('urlSd')
return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text,
@@ -158,7 +159,9 @@ class ArteTVPlus7IE(InfoExtractor):
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
formats = player_info['VSR'].values()
all_formats = player_info['VSR'].values()
# Some formats use the m3u8 protocol
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
def _match_lang(f):
if f.get('versionCode') is None:
return True
@@ -170,24 +173,39 @@ class ArteTVPlus7IE(InfoExtractor):
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
formats = filter(_match_lang, formats)
# Some formats use the m3u8 protocol
formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
# We order the formats by quality
formats = filter(_match_lang, all_formats)
formats = list(formats) # in python3 filter returns an iterator
if not formats:
# Some videos are only available in the 'Originalversion'
# they aren't tagged as being in French or German
if all(f['versionCode'] == 'VO' for f in all_formats):
formats = all_formats
else:
raise ExtractorError(u'The formats list is empty')
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
def sort_key(f):
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
else:
sort_key = lambda f: int(f.get('height',-1))
def sort_key(f):
return (
# Sort first by quality
int(f.get('height',-1)),
int(f.get('bitrate',-1)),
# The original version with subtitles has lower relevance
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
# The version with sourds/mal subtitles has also lower relevance
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
)
formats = sorted(formats, key=sort_key)
# Prefer videos without subtitles in the same language
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
# Pick the best quality
def _format(format_info):
quality = format_info['quality']
m_quality = re.match(r'\w*? - (\d*)p', quality)
if m_quality is not None:
quality = m_quality.group(1)
quality = ''
height = format_info.get('height')
if height is not None:
quality = compat_str(height)
bitrate = format_info.get('bitrate')
if bitrate is not None:
quality += '-%d' % bitrate
if format_info.get('versionCode') is not None:
format_id = u'%s-%s' % (quality, format_info['versionCode'])
else:
@@ -196,7 +214,7 @@ class ArteTVPlus7IE(InfoExtractor):
'format_id': format_id,
'format_note': format_info.get('versionLibelle'),
'width': format_info.get('width'),
'height': format_info.get('height'),
'height': height,
}
if format_info['mediaType'] == u'rtmp':
info['url'] = format_info['streamer']

View File

@@ -1,10 +1,10 @@
import os.path
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_parse_urlparse,
determine_ext,
ExtractorError,
)
class AUEngineIE(InfoExtractor):
@@ -25,22 +25,25 @@ class AUEngineIE(InfoExtractor):
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
webpage, u'title')
title = title.strip()
links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage)
links = [compat_urllib_parse.unquote(l) for l in links]
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
links = map(compat_urllib_parse.unquote, links)
thumbnail = None
video_url = None
for link in links:
root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path)
if pathext == '.png':
if link.endswith('.png'):
thumbnail = link
elif pathext == '.mp4':
url = link
ext = pathext
elif '/videos/' in link:
video_url = link
if not video_url:
raise ExtractorError(u'Could not find video URL')
ext = u'.' + determine_ext(video_url)
if ext == title[-len(ext):]:
title = title[:-len(ext)]
ext = ext[1:]
return [{
return {
'id': video_id,
'url': url,
'ext': ext,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
}]
}

View File

@@ -0,0 +1,81 @@
import re
import json
import itertools
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
)
class BambuserIE(InfoExtractor):
IE_NAME = u'bambuser'
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
_API_KEY = '005f64509e19a868399060af746a00aa'
_TEST = {
u'url': u'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
#u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
u'info_dict': {
u'id': u'4050584',
u'ext': u'flv',
u'title': u'Education engineering days - lightning talks',
u'duration': 3741,
u'uploader': u'pixelversity',
u'uploader_id': u'344706',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)['result']
return {
'id': video_id,
'title': info['title'],
'url': info['url'],
'thumbnail': info.get('preview'),
'duration': int(info['length']),
'view_count': int(info['views_total']),
'uploader': info['username'],
'uploader_id': info['uid'],
}
class BambuserChannelIE(InfoExtractor):
IE_NAME = u'bambuser:channel'
_VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
urls = []
last_id = ''
for i in itertools.count(1):
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
info_json = self._download_webpage(req, user,
u'Downloading page %d' % i)
results = json.loads(info_json)['result']
if len(results) == 0:
break
last_id = results[-1]['vid']
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
return {
'_type': 'playlist',
'title': user,
'entries': urls,
}

View File

@@ -3,13 +3,16 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urlparse,
ExtractorError,
)
class BandcampIE(InfoExtractor):
IE_NAME = u'Bandcamp'
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
_TEST = {
_TESTS = [{
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
u'file': u'1812978515.mp3',
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
@@ -17,7 +20,7 @@ class BandcampIE(InfoExtractor):
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
},
u'skip': u'There is a limit of 200 free downloads / month for the test song'
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -26,6 +29,23 @@ class BandcampIE(InfoExtractor):
# We get the link to the free download page
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if m_download is None:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
if m_trackinfo:
json_code = m_trackinfo.group(1)
data = json.loads(json_code)
for d in data:
formats = [{
'format_id': 'format_id',
'url': format_url,
'ext': format_id.partition('-')[0]
} for format_id, format_url in sorted(d['file'].items())]
return {
'id': compat_str(d['id']),
'title': d['title'],
'formats': formats,
}
else:
raise ExtractorError(u'No free songs found')
download_link = m_download.group(1)
@@ -61,3 +81,49 @@ class BandcampIE(InfoExtractor):
}
return [track_info]
class BandcampAlbumIE(InfoExtractor):
IE_NAME = u'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
_TEST = {
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
u'playlist': [
{
u'file': u'1353101989.mp3',
u'md5': u'39bc1eded3476e927c724321ddf116cf',
u'info_dict': {
u'title': u'Intro',
}
},
{
u'file': u'38097443.mp3',
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
u'info_dict': {
u'title': u'Kero One - Keep It Alive (Blazo remix)',
}
},
],
u'params': {
u'playlistend': 2
},
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
raise ExtractorError(u'The page doesn\'t contain any track')
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths]
title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
return {
'_type': 'playlist',
'title': title,
'entries': entries,
}

View File

@@ -9,10 +9,13 @@ from ..utils import (
compat_urllib_parse,
find_xpath_attr,
compat_urlparse,
compat_str,
compat_urllib_request,
ExtractorError,
)
class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -23,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
u'md5': u'8eccab865181d29ec2958f32a6a754f5',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
@@ -41,6 +44,17 @@ class BrightcoveIE(InfoExtractor):
u'uploader': u'Oracle',
},
},
{
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
u'info_dict': {
u'id': u'2750934548001',
u'ext': u'mp4',
u'title': u'This Bracelet Acts as a Personal Thermostat',
u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
u'uploader': u'Mashable',
},
},
]
@classmethod
@@ -61,31 +75,58 @@ class BrightcoveIE(InfoExtractor):
params = {'flashID': object_doc.attrib['id'],
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
}
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
def find_param(name):
return find_xpath_attr(object_doc, './param', 'name', name)
playerKey = find_param('playerKey')
# Not all pages define this value
if playerKey is not None:
params['playerKey'] = playerKey.attrib['value']
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
# The three fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value']
linkBase = find_param('linkBaseURL')
if linkBase is not None:
params['linkBaseURL'] = linkBase.attrib['value']
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data
@classmethod
def _extract_brightcove_url(cls, webpage):
"""Try to extract the brightcove url from the wepbage, returns None
if it can't be found
"""
m_brightcove = re.search(
r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
webpage, re.DOTALL)
if m_brightcove is not None:
return cls._build_brighcove_url(m_brightcove.group())
else:
return None
def _real_extract(self, url):
# Change the 'videoId' and others field to '@videoPlayer'
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
# Change bckey (used by bcove.me urls) to playerKey
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = re.match(self._VALID_URL, url)
query_str = mobj.group('query')
query = compat_urlparse.parse_qs(query_str)
videoPlayer = query.get('@videoPlayer')
if videoPlayer:
return self._get_video_info(videoPlayer[0], query_str)
return self._get_video_info(videoPlayer[0], query_str, query)
else:
player_key = query['playerKey']
return self._get_playlist_info(player_key[0])
def _get_video_info(self, video_id, query):
request_url = self._FEDERATED_URL_TEMPLATE % query
webpage = self._download_webpage(request_url, video_id)
def _get_video_info(self, video_id, query_str, query):
request_url = self._FEDERATED_URL_TEMPLATE % query_str
req = compat_urllib_request.Request(request_url)
linkBase = query.get('linkBaseURL')
if linkBase is not None:
req.add_header('Referer', linkBase[0])
webpage = self._download_webpage(req, video_id)
self.report_extraction(video_id)
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
@@ -109,7 +150,7 @@ class BrightcoveIE(InfoExtractor):
def _extract_video_info(self, video_info):
info = {
'id': video_info['id'],
'id': compat_str(video_info['id']),
'title': video_info['displayName'],
'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
@@ -119,15 +160,14 @@ class BrightcoveIE(InfoExtractor):
renditions = video_info.get('renditions')
if renditions:
renditions = sorted(renditions, key=lambda r: r['size'])
best_format = renditions[-1]
info.update({
'url': best_format['defaultURL'],
'ext': 'mp4',
})
info['formats'] = [{
'url': rend['defaultURL'],
'height': rend.get('frameHeight'),
'width': rend.get('frameWidth'),
} for rend in renditions]
elif video_info.get('FLVFullLengthURL') is not None:
info.update({
'url': video_info['FLVFullLengthURL'],
'ext': 'flv',
})
else:
raise ExtractorError(u'Unable to extract video url for %s' % info['id'])

View File

@@ -6,7 +6,7 @@ from .common import InfoExtractor
class Canalc2IE(InfoExtractor):
IE_NAME = 'canalc2.tv'
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
_TEST = {
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
@@ -18,7 +18,9 @@ class Canalc2IE(InfoExtractor):
}
def _real_extract(self, url):
video_id = re.match(self._VALID_URL, url).group(1)
video_id = re.match(self._VALID_URL, url).group('id')
# We need to set the voir field for getting the file name
url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
webpage = self._download_webpage(url, video_id)
file_name = self._search_regex(
r"so\.addVariable\('file','(.*?)'\);",

View File

@@ -5,6 +5,7 @@ import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import unified_strdate
class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
@@ -25,7 +26,7 @@ class CanalplusIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = mobj.groupdict().get('id')
if video_id is None:
webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')

View File

@@ -41,7 +41,7 @@ class CinemassacreIE(InfoExtractor):
webpage_url = u'http://' + mobj.group('url')
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
if not mobj:
raise ExtractorError(u'Can\'t extract embed url and video id')
playerdata_url = mobj.group(u'embed_url')
@@ -55,30 +55,32 @@ class CinemassacreIE(InfoExtractor):
video_description = None
playerdata = self._download_webpage(playerdata_url, video_id)
base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://.*?)/(?:vod|Cinemassacre)\'',
playerdata, u'base_url')
base_url += '/Cinemassacre/'
# Important: The file names in playerdata are not used by the player and even wrong for some videos
sd_file = 'Cinemassacre-%s_high.mp4' % video_id
hd_file = 'Cinemassacre-%s.mp4' % video_id
video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id
url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url')
sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file')
hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file')
video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False)
formats = [
{
'url': base_url + sd_file,
'url': url,
'play_path': 'mp4:' + sd_file,
'rtmp_live': True, # workaround
'ext': 'flv',
'format': 'sd',
'format_id': 'sd',
},
{
'url': base_url + hd_file,
'url': url,
'play_path': 'mp4:' + hd_file,
'rtmp_live': True, # workaround
'ext': 'flv',
'format': 'hd',
'format_id': 'hd',
},
]
info = {
return {
'id': video_id,
'title': video_title,
'formats': formats,
@@ -86,6 +88,3 @@ class CinemassacreIE(InfoExtractor):
'upload_date': video_date,
'thumbnail': video_thumbnail,
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -6,7 +6,7 @@ from ..utils import determine_ext
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
_TESTS = [{

View File

@@ -71,10 +71,8 @@ class CollegeHumorIE(InfoExtractor):
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
node_id = media_node.attrib['url']
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError as err:
except IndexError:
raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')

View File

@@ -14,6 +14,8 @@ from ..utils import (
clean_html,
compiled_regex_type,
ExtractorError,
RegexNotFoundError,
sanitize_filename,
unescapeHTML,
)
@@ -61,7 +63,7 @@ class InfoExtractor(object):
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from the format_id, width, height
Calculated from the format_id, width, height.
and format_note fields if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19")
@@ -69,6 +71,13 @@ class InfoExtractor(object):
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
Unless mentioned otherwise, the fields should be Unicode strings.
@@ -181,6 +190,17 @@ class InfoExtractor(object):
self.to_screen(u'Dumping request to ' + url)
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
if self._downloader.params.get('write_pages', False):
try:
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
raw_filename = ('%s_%s.dump' % (video_id, url))
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen(u'Saving request to ' + filename)
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
content = webpage_bytes.decode(encoding, 'replace')
return (content, urlh)
@@ -209,12 +229,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in')
#Methods for following #608
def url_result(self, url, ie=None):
def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
@@ -231,7 +253,7 @@ class InfoExtractor(object):
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
In case of failure return a default value or raise a WARNING or a
ExtractorError, depending on fatal, specifying the field name.
RegexNotFoundError, depending on fatal, specifying the field name.
"""
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
mobj = re.search(pattern, string, flags)
@@ -251,7 +273,7 @@ class InfoExtractor(object):
elif default is not None:
return default
elif fatal:
raise ExtractorError(u'Unable to extract %s' % _name)
raise RegexNotFoundError(u'Unable to extract %s' % _name)
else:
self._downloader.report_warning(u'unable to extract %s; '
u'please report this issue on http://yt-dl.org/bug' % _name)
@@ -299,13 +321,21 @@ class InfoExtractor(object):
# Helper functions for extracting OpenGraph info
@staticmethod
def _og_regex(prop):
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
def _og_regexes(prop):
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
template % (property_re, content_re),
template % (content_re, property_re),
]
def _og_search_property(self, prop, html, name=None, **kargs):
if name is None:
name = 'OpenGraph %s' % prop
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
if escaped is None:
return None
return unescapeHTML(escaped)
def _og_search_thumbnail(self, html, **kargs):
@@ -317,10 +347,21 @@ class InfoExtractor(object):
def _og_search_title(self, html, **kargs):
return self._og_search_property('title', html, **kargs)
def _og_search_video_url(self, html, name='video url', **kargs):
return self._html_search_regex([self._og_regex('video:secure_url'),
self._og_regex('video')],
html, name, **kargs)
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
regexes = self._og_regexes('video')
if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
def _html_search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=False)
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
def _rta_search(self, html):
# See http://www.rtalabel.org/index.php?content=howtofaq#single
@@ -330,6 +371,23 @@ class InfoExtractor(object):
return 18
return 0
def _media_rating_search(self, html):
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
rating = self._html_search_meta('rating', html)
if not rating:
return None
RATING_TABLE = {
'safe for kids': 0,
'general': 8,
'14 years': 14,
'mature': 17,
'restricted': 19,
}
return RATING_TABLE.get(rating.lower(), None)
class SearchInfoExtractor(InfoExtractor):
"""

View File

@@ -0,0 +1,22 @@
# encoding: utf-8
from .canalplus import CanalplusIE
class D8IE(CanalplusIE):
_VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
IE_NAME = u'd8.tv'
_TEST = {
u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
u'file': u'966289.flv',
u'info_dict': {
u'title': u'Campagne intime - Documentaire exceptionnel',
u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
u'upload_date': u'20131108',
},
u'params': {
# rtmp
u'skip_download': True,
},
}

View File

@@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off')
request.add_header('Cookie', 'ff=off')
return request
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -28,6 +29,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
IE_NAME = u'dailymotion'
_FORMATS = [
(u'stream_h264_ld_url', u'ld'),
(u'stream_h264_url', u'standard'),
(u'stream_h264_hq_url', u'hq'),
(u'stream_h264_hd_url', u'hd'),
(u'stream_h264_hd1080_url', u'hd180'),
]
_TESTS = [
{
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
@@ -52,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
},
u'skip': u'VEVO is only available in some countries',
},
# age-restricted video
{
u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
u'file': u'xyh2zz.mp4',
u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
u'info_dict': {
u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
u'uploader': 'HotWaves1012',
u'age_limit': 18,
}
}
]
def _real_extract(self, url):
@@ -60,7 +82,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
video_id = mobj.group(1).split('_')[0].split('?')[0]
video_extension = 'mp4'
url = 'http://www.dailymotion.com/video/%s' % video_id
# Retrieve video webpage to extract further information
@@ -82,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
# Looking for official user
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
webpage, 'video uploader')
webpage, 'video uploader', fatal=False)
age_limit = self._rta_search(webpage)
video_upload_date = None
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@ -99,37 +121,43 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
raise ExtractorError(msg, expected=True)
# TODO: support choosing qualities
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
'stream_h264_hq_url','stream_h264_url',
'stream_h264_ld_url']:
if info.get(key):#key in info and info[key]:
max_quality = key
self.to_screen(u'Using %s' % key)
break
else:
formats = []
for (key, format_id) in self._FORMATS:
video_url = info.get(key)
if video_url is not None:
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
if m_size is not None:
width, height = m_size.group(1), m_size.group(2)
else:
width, height = None, None
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': format_id,
'width': width,
'height': height,
})
if not formats:
raise ExtractorError(u'Unable to extract video URL')
video_url = info[max_quality]
# subtitles
video_subtitles = self.extract_subtitles(video_id)
video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id)
self._list_available_subtitles(video_id, webpage)
return
return [{
return {
'id': video_id,
'url': video_url,
'formats': formats,
'uploader': video_uploader,
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'ext': video_extension,
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url']
}]
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
}
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@@ -158,7 +186,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:

View File

@@ -25,7 +25,7 @@ class DepositFilesIE(InfoExtractor):
url = 'http://depositfiles.com/en/files/' + file_id
# Retrieve file webpage with 'Free download' button pressed
free_download_indication = { 'gateway_result' : '1' }
free_download_indication = {'gateway_result' : '1'}
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
try:
self.report_download_webpage(file_id)

View File

@@ -1,4 +1,3 @@
import itertools
import json
import random
import re
@@ -101,7 +100,7 @@ class EightTracksIE(InfoExtractor):
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
res = []
for i in itertools.count():
for i in range(track_count):
api_json = self._download_webpage(next_url, playlist_id,
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
errnote=u'Failed to download song information')
@@ -116,7 +115,5 @@ class EightTracksIE(InfoExtractor):
'ext': 'm4a',
}
res.append(info)
if api_data['set']['at_last_track']:
break
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
return res

View File

@@ -0,0 +1,37 @@
# encoding: utf-8
import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from ..utils import ExtractorError
class EitbIE(InfoExtractor):
IE_NAME = u'eitb.tv'
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
_TEST = {
u'add_ie': ['Brightcove'],
u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
u'md5': u'edf4436247185adee3ea18ce64c47998',
u'info_dict': {
u'id': u'2743577154001',
u'ext': u'mp4',
u'title': u'60 minutos (Lasa y Zabala, 30 años)',
# All videos from eitb has this description in the brightcove info
u'description': u'.',
u'uploader': u'Euskal Telebista',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
chapter_id = mobj.group('chapter_id')
webpage = self._download_webpage(url, chapter_id)
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
if bc_url is None:
raise ExtractorError(u'Could not extract the Brightcove url')
# The BrightcoveExperience object doesn't contain the video id, we set
# it manually
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
return self.url_result(bc_url, BrightcoveIE.ie_key())

View File

@@ -11,11 +11,11 @@ from ..utils import (
class EscapistIE(InfoExtractor):
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
_TEST = {
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
u'md5': u'c6793dbda81388f4264c1ba18684a74d',
u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
u'info_dict': {
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
u"uploader": u"the-escapist-presents",
@@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
showName = mobj.group('showname')
videoId = mobj.group('episode')
self.report_extraction(videoId)
webpage = self._download_webpage(url, videoId)
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
videoDesc = self._html_search_regex(
r'<meta name="description" content="([^"]*)"',
webpage, u'description', fatal=False)
playerUrl = self._og_search_video_url(webpage, name='player url')
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
webpage, u'player url').split(' : ')[-1]
title = self._html_search_regex(
r'<meta name="title" content="([^"]*)"',
webpage, u'title').split(' : ')[-1]
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
configUrl = compat_urllib_parse.unquote(configUrl)
configJSON = self._download_webpage(configUrl, videoId,
u'Downloading configuration',
u'unable to download configuration')
formats = []
# Technically, it's JavaScript, not JSON
configJSON = configJSON.replace("'", '"')
def _add_format(name, cfgurl):
configJSON = self._download_webpage(
cfgurl, videoId,
u'Downloading ' + name + ' configuration',
u'Unable to download ' + name + ' configuration')
# Technically, it's JavaScript, not JSON
configJSON = configJSON.replace("'", '"')
try:
config = json.loads(configJSON)
except (ValueError,) as err:
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
playlist = config['playlist']
formats.append({
'url': playlist[1]['url'],
'format_id': name,
})
_add_format(u'normal', configUrl)
hq_url = (configUrl +
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
try:
config = json.loads(configJSON)
except (ValueError,) as err:
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
_add_format(u'hq', hq_url)
except ExtractorError:
pass # That's fine, we'll just use normal quality
playlist = config['playlist']
videoUrl = playlist[1]['url']
info = {
return {
'id': videoId,
'url': videoUrl,
'formats': formats,
'uploader': showName,
'upload_date': None,
'title': title,
'ext': 'mp4',
'thumbnail': self._og_search_thumbnail(webpage),
'description': videoDesc,
'player_url': playerUrl,
}
return [info]

View File

@@ -11,16 +11,17 @@ class ExfmIE(InfoExtractor):
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
_TESTS = [
{
u'url': u'http://ex.fm/song/1bgtzg',
u'file': u'95223130.mp3',
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
u'url': u'http://ex.fm/song/eh359',
u'file': u'44216187.mp3',
u'md5': u'e45513df5631e6d760970b14cc0c11e7',
u'info_dict': {
u"title": u"We Can't Stop - Miley Cyrus",
u"uploader": u"Miley Cyrus",
u'upload_date': u'20130603',
u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive",
u"uploader": u"deadjournalist",
u'upload_date': u'20120424',
u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
},
u'note': u'Soundcloud song',
u'skip': u'The site is down too often',
},
{
u'url': u'http://ex.fm/song/wddt8',
@@ -30,6 +31,7 @@ class ExfmIE(InfoExtractor):
u'title': u'Safe and Sound',
u'uploader': u'Capital Cities',
},
u'skip': u'The site is down too often',
},
]

View File

@@ -0,0 +1,50 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
class ExtremeTubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_TEST = {
u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
u'file': u'652431.mp4',
u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
u'info_dict': {
u"title": u"Music Video 14 british euro brit european cumshots swallow",
u"uploader": u"unknown",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format = "-".join(format)
return {
'id': video_id,
'title': video_title,
'uploader': uploader,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': 18,
}

View File

@@ -1,5 +1,4 @@
import json
import netrc
import re
import socket
@@ -19,7 +18,8 @@ class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
_NETRC_MACHINE = 'facebook'
IE_NAME = u'facebook'
_TEST = {
@@ -36,50 +36,56 @@ class FacebookIE(InfoExtractor):
"""Report attempt to log in."""
self.to_screen(u'Logging in')
def _real_initialize(self):
if self._downloader is None:
return
useremail = None
password = None
downloader_params = self._downloader.params
# Attempt to use provided username and password or .netrc data
if downloader_params.get('username', None) is not None:
useremail = downloader_params['username']
password = downloader_params['password']
elif downloader_params.get('usenetrc', False):
try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None:
useremail = info[0]
password = info[2]
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
return
def _login(self):
(useremail, password) = self._get_login_info()
if useremail is None:
return
# Log in
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US')
self.report_login()
login_page = self._download_webpage(login_page_req, None, note=False,
errnote=u'Unable to download login page')
lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
login_form = {
'email': useremail,
'pass': password,
'login': 'Log+In'
'lsd': lsd,
'lgnrnd': lgnrnd,
'next': 'http://facebook.com/home.php',
'default_persistent': '0',
'legacy_return': '1',
'timezone': '-60',
'trynum': '1',
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
self.report_login()
login_results = compat_urllib_request.urlopen(request).read()
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
check_form = {
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
'name_action_selected': 'dont_save',
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
}
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = compat_urllib_request.urlopen(check_req).read()
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
return
def _real_initialize(self):
self._login()
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@ -93,7 +99,13 @@ class FacebookIE(InfoExtractor):
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
if not m:
raise ExtractorError(u'Cannot parse data')
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
if m_msg is not None:
raise ExtractorError(
u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
expected=True)
else:
raise ExtractorError(u'Cannot parse data')
data = dict(json.loads(m.group(1)))
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)

View File

@@ -5,8 +5,6 @@ import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
get_element_by_attribute,
)
@@ -47,12 +45,12 @@ class FazIE(InfoExtractor):
'format_id': code.lower(),
})
descr_html = get_element_by_attribute('class', 'Content Copy', webpage)
descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
info = {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': clean_html(descr_html),
'description': descr,
'thumbnail': config.find('STILL/STILL_BIG').text,
}
# TODO: Remove when #980 has been merged

View File

@@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor):
for i, _ in enumerate(files, 1):
video_id = '%04d%d' % (episode, i)
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
video_title = 'Fernsehkritik %d.%d' % (episode, i)
videos.append({
'id': video_id,
'url': video_url,

View File

@@ -0,0 +1,38 @@
import re
from .common import InfoExtractor
class GamekingsIE(InfoExtractor):
_VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4',
# MD5 is flaky, seems to change regularly
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
u'info_dict': {
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
video_url = self._og_search_video_url(webpage)
video = re.search(r'[0-9]+', video_url)
video_id = video.group(0)
# Todo: add medium format
video_url = video_url.replace(video_id, 'large/' + video_id)
return {
'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
}

View File

@@ -24,7 +24,7 @@ class GameSpotIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_id = video_id = mobj.group('page_id')
page_id = mobj.group('page_id')
webpage = self._download_webpage(url, page_id)
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
data_video = json.loads(unescapeHTML(data_video_json))

View File

@@ -25,7 +25,7 @@ class GenericIE(InfoExtractor):
{
u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
u'file': u'13601338388002.mp4',
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
u'md5': u'6e15c93721d7ec9e9ca3fdbf07982cfd',
u'info_dict': {
u"uploader": u"www.hodiho.fr",
u"title": u"R\u00e9gis plante sa Jeep"
@@ -33,6 +33,7 @@ class GenericIE(InfoExtractor):
},
# embedded vimeo video
{
u'add_ie': ['Vimeo'],
u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
u'file': u'22444065.mp4',
u'md5': u'2903896e23df39722c33f015af0666e2',
@@ -41,7 +42,35 @@ class GenericIE(InfoExtractor):
u"uploader_id": u"skillsmatter",
u"uploader": u"Skills Matter",
}
}
},
# bandcamp page with custom domain
{
u'add_ie': ['Bandcamp'],
u'url': u'http://bronyrock.com/track/the-pony-mash',
u'file': u'3235767654.mp3',
u'info_dict': {
u'title': u'The Pony Mash',
u'uploader': u'M_Pallante',
},
u'skip': u'There is a limit of 200 free downloads / month for the test song',
},
# embedded brightcove video
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{
u'add_ie': ['Brightcove'],
u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
u'info_dict': {
u'id': u'2765128793001',
u'ext': u'mp4',
u'title': u'Le cours de bourse : lanalyse technique',
u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
u'uploader': u'BFM BUSINESS',
},
u'params': {
u'skip_download': True,
},
},
]
def report_download_webpage(self, video_id):
@@ -133,11 +162,20 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Failed to download URL: %s' % url)
self.report_extraction(video_id)
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
video_title = self._html_search_regex(r'<title>(.*)</title>',
webpage, u'video title', default=u'video', flags=re.DOTALL)
# Look for BrightCove:
m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None:
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
if bc_url is not None:
self.to_screen(u'Brightcove video detected.')
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
return self.url_result(bc_url, 'Brightcove')
# Look for embedded Vimeo player
@@ -149,11 +187,20 @@ class GenericIE(InfoExtractor):
return self.url_result(surl, 'Vimeo')
# Look for embedded YouTube player
mobj = re.search(
r'<iframe[^>]+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage)
if mobj:
surl = unescapeHTML(mobj.group(1))
return self.url_result(surl, 'Youtube')
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None:
burl = unescapeHTML(mobj.group(1))
# Don't set the extractor because it can be a track url or an album
return self.url_result(burl)
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
@@ -192,15 +239,6 @@ class GenericIE(InfoExtractor):
video_extension = os.path.splitext(video_id)[1][1:]
video_id = os.path.splitext(video_id)[0]
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
video_title = self._html_search_regex(r'<title>(.*)</title>',
webpage, u'video title', default=u'video', flags=re.DOTALL)
# video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
url, u'video uploader')

View File

@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
_TEST = {
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
u'file': u'390161.mp4',
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
u'info_dict': {
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
u"title": u"How to Tie a Square Knot Properly"

View File

@@ -30,7 +30,7 @@ class HypemIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
track_id = mobj.group(1)
data = { 'ax': 1, 'ts': time.time() }
data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url)
@@ -68,4 +68,4 @@ class HypemIE(InfoExtractor):
'ext': "mp3",
'title': title,
'artist': artist,
}]
}]

View File

@@ -26,7 +26,7 @@ class InstagramIE(InfoExtractor):
return [{
'id': video_id,
'url': self._og_search_video_url(webpage),
'url': self._og_search_video_url(webpage, secure=False),
'ext': 'mp4',
'title': u'Video by %s' % uploader_id,
'thumbnail': self._og_search_thumbnail(webpage),

View File

@@ -22,7 +22,7 @@ class JeuxVideoIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = re.match(self._VALID_URL, url).group(1)
title = mobj.group(1)
webpage = self._download_webpage(url, title)
xml_link = self._html_search_regex(
r'<param name="flashvars" value="config=(.*?)" />',

View File

@@ -1,8 +1,10 @@
import re
import hashlib
from .common import InfoExtractor
from ..utils import determine_ext
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
@@ -30,7 +32,10 @@ class KankanIE(InfoExtractor):
video_id, u'Downloading video url info')
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
video_url = 'http://%s%s' % (ip, path)
param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
key = _md5('xl_mp43651' + param1 + param2)
video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
return {'id': video_id,
'title': title,

View File

@@ -0,0 +1,61 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
from ..aes import (
aes_decrypt_text
)
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_TEST = {
u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
u'file': u'1214711.mp4',
u'md5': u'6e297b7e789329923fcf83abb67c9289',
u'info_dict': {
u"title": u"Petite Asian Lady Mai Playing In Bathtub",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
# embedded video
mobj = re.search(r'href="([^"]+)"></iframe>', webpage)
if mobj:
embedded_url = mobj.group(1)
return self.url_result(embedded_url)
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
if webpage.find('encrypted=true')!=-1:
password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join(format)
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': age_limit,
}

View File

@@ -1,16 +1,17 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urlparse,
get_meta_content,
ExtractorError,
xpath_with_ns,
)
class LivestreamIE(InfoExtractor):
IE_NAME = u'livestream'
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = {
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
@@ -40,13 +41,9 @@ class LivestreamIE(InfoExtractor):
if video_id is None:
# This is an event page:
player = get_meta_content('twitter:player', webpage)
if player is None:
raise ExtractorError('Couldn\'t extract event api url')
api_url = player.replace('/player', '')
api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
info = json.loads(self._download_webpage(api_url, event_name,
u'Downloading event info'))
config_json = self._search_regex(r'window.config = ({.*?});',
webpage, u'window config')
info = json.loads(config_json)['event']
videos = [self._extract_video_info(video_data['data'])
for video_data in info['feed']['data'] if video_data['type'] == u'video']
return self.playlist_result(videos, info['id'], info['full_name'])
@@ -58,3 +55,44 @@ class LivestreamIE(InfoExtractor):
info = json.loads(self._download_webpage(api_url, video_id,
u'Downloading video info'))
return self._extract_video_info(info)
# The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor):
IE_NAME = u'livestream:original'
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
_TEST = {
u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
u'info_dict': {
u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
u'ext': u'flv',
u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
},
u'params': {
# rtmp
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
api_response = self._download_webpage(api_url, video_id)
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
item = info.find('channel').find('item')
ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
# Remove the extension and number from the path (like 1.jpg)
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
return {
'id': video_id,
'title': item.find('title').text,
'url': 'rtmp://extondemand.livestream.com/ondemand',
'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
'ext': 'flv',
'thumbnail': thumbnail_url,
}

View File

@@ -20,10 +20,12 @@ class MetacafeIE(InfoExtractor):
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
IE_NAME = u'metacafe'
_TESTS = [{
_TESTS = [
# Youtube video
{
u"add_ie": ["Youtube"],
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
u"file": u"_aUehQsCQtM.flv",
u"file": u"_aUehQsCQtM.mp4",
u"info_dict": {
u"upload_date": u"20090102",
u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
@@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor):
u"uploader_id": u"PBS"
}
},
# Normal metacafe video
{
u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
u'info_dict': {
u'id': u'11121940',
u'ext': u'mp4',
u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
u'uploader': u'ign',
u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
},
},
# AnyClip video
{
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
u"file": u"an-dVVXnuY7Jh77J.mp4",
u"info_dict": {
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
u"uploader": u"anyclip",
u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
}
}]
u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
},
},
# age-restricted video
{
u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
u'info_dict': {
u'id': u'5186653',
u'ext': u'mp4',
u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
u'uploader': u'Dwayne Pipe',
u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
u'age_limit': 18,
},
},
]
def report_disclaimer(self):
@@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor):
'submit': "Continue - I'm over 18",
}
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
self.report_age_confirmation()
compat_urllib_request.urlopen(request).read()
@@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor):
# Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
req.headers['Cookie'] = 'flashVersion=0;'
# AnyClip videos require the flashversion cookie so that we get the link
# to the mp4 file
mobj_an = re.match(r'^an-(.*?)$', video_id)
if mobj_an:
req.headers['Cookie'] = 'flashVersion=0;'
webpage = self._download_webpage(req, video_id)
# Extract URL, uploader and title from webpage
@@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor):
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, u'uploader nickname', fatal=False)
if re.search(r'"contentRating":"restricted"', webpage) is not None:
age_limit = 18
else:
age_limit = 0
return {
'_type': 'video',
'id': video_id,
@@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor):
'upload_date': None,
'title': video_title,
'ext': video_ext,
'age_limit': age_limit,
}

View File

@@ -0,0 +1,49 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
class MofosexIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
_TEST = {
u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
u'file': u'5018.mp4',
u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
u'info_dict': {
u"title": u"Japanese Teen Music Video",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format = "-".join(format)
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': age_limit,
}

View File

@@ -26,6 +26,7 @@ class MTVIE(InfoExtractor):
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
@@ -47,7 +48,7 @@ class MTVIE(InfoExtractor):
def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
if not m:
raise ExtractorError(u'Cannot transform RTMP url')
return rtmp_video_url
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid')
@@ -58,7 +59,6 @@ class MTVIE(InfoExtractor):
if '/error_country_block.swf' in metadataXml:
raise ExtractorError(u'This video is not available from your country.', expected=True)
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
renditions = mdoc.findall('.//rendition')
formats = []
for rendition in mdoc.findall('.//rendition'):
@@ -80,6 +80,8 @@ class MTVIE(InfoExtractor):
video_id = self._id_from_uri(uri)
self.report_extraction(video_id)
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
# Remove the templates, like &device={device}
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
if 'acceptMethods' not in mediagen_url:
mediagen_url += '&acceptMethods=fms'
mediagen_page = self._download_webpage(mediagen_url, video_id,

View File

@@ -0,0 +1,48 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_str,
)
class MySpaceIE(InfoExtractor):
_VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
_TEST = {
u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
u'info_dict': {
u'id': u'100008689',
u'ext': u'flv',
u'title': u'Viva La Vida',
u'description': u'The official Viva La Vida video, directed by Hype Williams',
u'uploader': u'Coldplay',
u'uploader_id': u'coldplay',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
u'context'))
video = context['video']
rtmp_url, play_path = video['streamUrl'].split(';', 1)
return {
'id': compat_str(video['mediaId']),
'title': video['title'],
'url': rtmp_url,
'play_path': play_path,
'ext': 'flv',
'description': video['description'],
'thumbnail': video['imageUrl'],
'uploader': video['artistName'],
'uploader_id': video['artistUsername'],
}

View File

@@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor):
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com:videocenter'
IE_DESC = u'Download the first 12 videos from a videocenter category'
IE_DESC = u'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
@classmethod

View File

@@ -0,0 +1,131 @@
# encoding: utf-8
import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
ExtractorError,
unified_strdate,
)
class NiconicoIE(InfoExtractor):
IE_NAME = u'niconico'
IE_DESC = u'ニコニコ動画'
_TEST = {
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
u'file': u'sm22312215.mp4',
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
u'info_dict': {
u'title': u'Big Buck Bunny',
u'uploader': u'takuya0301',
u'uploader_id': u'2698420',
u'upload_date': u'20131123',
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
},
u'params': {
u'username': u'ydl.niconico@gmail.com',
u'password': u'youtube-dl',
},
}
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
_NETRC_MACHINE = 'niconico'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = True
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False
# Log in
login_form_strs = {
u'mail': username,
u'password': password,
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
request = compat_urllib_request.Request(
u'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, u'', note=u'Logging in', errnote=u'Unable to log in')
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password')
return False
return True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
# Get video webpage. We are not actually interested in it, but need
# the cookies in order to be able to download the info webpage
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
video_info_webpage = self._download_webpage(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page')
# Get flv info
flv_info_webpage = self._download_webpage(
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, u'Downloading flv info')
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper()
video_thumbnail = video_info.find('.//thumbnail_url').text
video_description = video_info.find('.//description').text
video_uploader_id = video_info.find('.//user_id').text
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
video_view_count = video_info.find('.//view_counter').text
video_webpage_url = video_info.find('.//watch_url').text
# uploader
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info_webpage = self._download_webpage(
url, video_id, note=u'Downloading user information')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
else:
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
video_uploader = user_info.find('.//nickname').text
return {
'id': video_id,
'url': video_real_url,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'view_count': video_view_count,
'webpage_url': video_webpage_url,
}

View File

@@ -20,7 +20,10 @@ class NowVideoIE(InfoExtractor):
video_id = mobj.group('id')
webpage_url = 'http://www.nowvideo.ch/video/' + video_id
embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
webpage = self._download_webpage(webpage_url, video_id)
embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page')
self.report_extraction(video_id)
@@ -28,7 +31,7 @@ class NowVideoIE(InfoExtractor):
webpage, u'video title')
video_key = self._search_regex(r'var fkzd="(.*)";',
webpage, u'video key')
embed_page, u'video key')
api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
api_response = self._download_webpage(api_call, video_id,

View File

@@ -0,0 +1,68 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
from ..aes import (
aes_decrypt_text
)
class PornHubIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
_TEST = {
u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
u'file': u'648719015.mp4',
u'md5': u'882f488fa1f0026f023f33576004a2ed',
u'info_dict': {
u"uploader": u"BABES-COM",
u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
u"age_limit": 18
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, u'title')
video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
if thumbnail:
thumbnail = compat_urllib_parse.unquote(thumbnail)
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password').replace('+', ' ')
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
formats = []
for video_url in video_urls:
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format = "-".join(format)
formats.append({
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
})
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
return {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'thumbnail': thumbnail,
'formats': formats,
'age_limit': 18,
}

View File

@@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
u'md5': u'374dd6dcedd24234453b295209aa69b6',
u'info_dict': {
u"upload_date": u"20090708",
u"title": u"Marilyn-Monroe-Bathing"
u"title": u"Marilyn-Monroe-Bathing",
u"age_limit": 18
}
}

View File

@@ -8,7 +8,9 @@ class RedTubeIE(InfoExtractor):
_TEST = {
u'url': u'http://www.redtube.com/66418',
u'file': u'66418.mp4',
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
# md5 varies from time to time, as in
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
#u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
u'info_dict': {
u"title": u"Sucked on a toilet",
u"age_limit": 18,

View File

@@ -62,19 +62,6 @@ class RTLnowIE(InfoExtractor):
u'skip_download': True,
},
},
{
u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1',
u'file': u'127367.flv',
u'info_dict': {
u'upload_date': u'20130926',
u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...',
u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin',
u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg',
},
u'params': {
u'skip_download': True,
},
},
{
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
u'file': u'124903.flv',

View File

@@ -7,6 +7,7 @@ class SlashdotIE(InfoExtractor):
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = {
u'add_ie': ['Ooyala'],
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',

View File

@@ -29,19 +29,37 @@ class SoundcloudIE(InfoExtractor):
)
'''
IE_NAME = u'soundcloud'
_TEST = {
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
u'file': u'62986583.mp3',
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
u'info_dict': {
u"upload_date": u"20121011",
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
u"uploader": u"E.T. ExTerrestrial Music",
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
}
}
_TESTS = [
{
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
u'file': u'62986583.mp3',
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
u'info_dict': {
u"upload_date": u"20121011",
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
u"uploader": u"E.T. ExTerrestrial Music",
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
}
},
# not streamable song
{
u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
u'info_dict': {
u'id': u'47127627',
u'ext': u'mp3',
u'title': u'Goldrushed',
u'uploader': u'The Royal Concept',
u'upload_date': u'20120521',
},
u'params': {
# rtmp
u'skip_download': True,
},
},
]
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@classmethod
def suitable(cls, url):
@@ -56,24 +74,48 @@ class SoundcloudIE(InfoExtractor):
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False):
video_id = info['id']
name = full_title or video_id
track_id = compat_str(info['id'])
name = full_title or track_id
if quiet == False:
self.report_extraction(name)
thumbnail = info['artwork_url']
if thumbnail is not None:
thumbnail = thumbnail.replace('-large', '-t500x500')
return {
'id': info['id'],
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
result = {
'id': track_id,
'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']),
'title': info['title'],
'ext': u'mp3',
'ext': info.get('original_format', u'mp3'),
'description': info['description'],
'thumbnail': thumbnail,
}
if info.get('downloadable', False):
# We can build a direct link to the song
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
else:
# We have to retrieve the url
stream_json = self._download_webpage(
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
track_id, u'Downloading track url')
# There should be only one entry in the dictionary
key, stream_url = list(json.loads(stream_json).items())[0]
if key.startswith(u'http'):
result['url'] = stream_url
elif key.startswith(u'rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
result.update({
'url': url,
'play_path': 'mp3:' + path,
})
else:
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
return result
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
@@ -106,70 +148,8 @@ class SoundcloudIE(InfoExtractor):
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
IE_NAME = u'soundcloud:set'
_TEST = {
u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
u"playlist": [
{
u"file":"30510138.mp3",
u"md5":"f9136bf103901728f29e419d2c70f55d",
u"info_dict": {
u"upload_date": u"20111213",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"D-D-Dance"
}
},
{
u"file":"47127625.mp3",
u"md5":"09b6758a018470570f8fd423c9453dd8",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"The Royal Concept - Gimme Twice"
}
},
{
u"file":"47127627.mp3",
u"md5":"154abd4e418cea19c3b901f1e1306d9c",
u"info_dict": {
u"upload_date": u"20120521",
u"uploader": u"The Royal Concept",
u"title": u"Goldrushed"
}
},
{
u"file":"47127629.mp3",
u"md5":"2f5471edc79ad3f33a683153e96a79c1",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"In the End"
}
},
{
u"file":"47127631.mp3",
u"md5":"f9ba87aa940af7213f98949254f1c6e2",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"Knocked Up"
}
},
{
u"file":"75206121.mp3",
u"md5":"f9d1fe9406717e302980c30de4af9353",
u"info_dict": {
u"upload_date": u"20130116",
u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
u"uploader": u"The Royal Concept",
u"title": u"World On Fire"
}
}
]
}
# it's in tests/test_playlists.py
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -188,7 +168,6 @@ class SoundcloudSetIE(SoundcloudIE):
resolv_url = self._resolv_url(url)
info_json = self._download_webpage(resolv_url, full_title)
videos = []
info = json.loads(info_json)
if 'errors' in info:
for err in info['errors']:
@@ -208,7 +187,7 @@ class SoundcloudUserIE(SoundcloudIE):
IE_NAME = u'soundcloud:user'
# it's in tests/test_playlists.py
_TEST = None
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -5,21 +5,19 @@ from .mtv import MTVIE, _media_xml_tag
class SouthParkStudiosIE(MTVIE):
IE_NAME = u'southparkstudios.com'
_VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
_TEST = {
# Overwrite MTVIE properties we don't want
_TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
u'info_dict': {
u'title': u'Bat Daded',
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
},
}
# Overwrite MTVIE properties we don't want
_TESTS = []
}]
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
@@ -31,8 +29,23 @@ class SouthParkStudiosIE(MTVIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url = u'http://www.' + mobj.group(u'url')
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
class SouthparkDeIE(SouthParkStudiosIE):
IE_NAME = u'southpark.de'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
_TESTS = [{
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
u'info_dict': {
u'title': u'The Government Won\'t Respect My Privacy',
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
},
}]

View File

@@ -0,0 +1,35 @@
import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
_TEST = {
u'add_ie': ['Brightcove'],
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
u'info_dict': {
u'id': u'2780937028001',
u'ext': u'mp4',
u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
u'uploader': u'TechMedia Networks',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
try:
# Some videos require the playerKey field, which isn't define in
# the BrightcoveExperience object
brightcove_url = self._og_search_video_url(webpage)
except RegexNotFoundError:
# Other videos works fine with the info from the object
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
if brightcove_url is None:
raise ExtractorError(u'The webpage does not contain a video', expected=True)
return self.url_result(brightcove_url, BrightcoveIE.ie_key())

View File

@@ -0,0 +1,74 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
from ..aes import (
aes_decrypt_text
)
class SpankwireIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
_TEST = {
u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
u'file': u'103545.mp4',
u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
u'info_dict': {
u"uploader": u"oreusz",
u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
u"description": u"Crazy Bitch X rated music video.",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
video_uploader = self._html_search_regex(
r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
description = self._html_search_regex(
r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1:
password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
formats = []
for video_url in video_urls:
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join(format)
formats.append({
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
})
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'thumbnail': thumbnail,
'description': description,
'formats': formats,
'age_limit': age_limit,
}

View File

@@ -6,14 +6,22 @@ from .common import InfoExtractor
class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TEST = {
_TESTS = [{
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
u'file': u'1259285.mp4',
u'md5': u'2c2754212136f35fb4b19767d242f66e',
u'info_dict': {
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
}
}
},
{
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
u'file': u'1309159.mp4',
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
u'info_dict': {
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
}
}]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
@@ -21,25 +29,38 @@ class SpiegelIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
webpage, u'title')
video_title = self._html_search_regex(
r'<div class="module-title">(.*?)</div>', webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_code = self._download_webpage(xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
xml_code = self._download_webpage(
xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
idoc = xml.etree.ElementTree.fromstring(xml_code)
last_type = idoc[-1]
filename = last_type.findall('./filename')[0].text
duration = float(last_type.findall('./duration')[0].text)
video_url = 'http://video2.spiegel.de/flash/' + filename
video_ext = filename.rpartition('.')[2]
formats = [
{
'format_id': n.tag.rpartition('type')[2],
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
'vbr': int(n.find('./videobitrate').text),
'vcodec': n.find('./codec').text,
'acodec': 'MP4A',
}
for n in list(idoc)
# Blacklist type 6, it's extremely LQ and not available on the same server
if n.tag.startswith('type') and n.tag != 'type6'
]
formats.sort(key=lambda f: f['vbr'])
duration = float(idoc[0].findall('./duration')[0].text)
info = {
'id': video_id,
'url': video_url,
'ext': video_ext,
'title': video_title,
'duration': duration,
'formats': formats,
}
return [info]
return info

View File

@@ -0,0 +1,66 @@
# coding: utf-8
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
)
class StreamcloudIE(InfoExtractor):
IE_NAME = u'streamcloud.eu'
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
_TEST = {
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
u'file': u'skp9j99s4bpz.mp4',
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
u'info_dict': {
u'title': u'youtube-dl test video \'/\\ ä ↭',
u'duration': 9,
},
u'skip': u'Only available from the EU'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
orig_webpage = self._download_webpage(url, video_id)
fields = re.findall(r'''(?x)<input\s+
type="(?:hidden|submit)"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', orig_webpage)
post = compat_urllib_parse.urlencode(fields)
self.to_screen('%s: Waiting for timeout' % video_id)
time.sleep(12)
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note=u'Downloading video page ...')
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)<', webpage, u'title')
video_url = self._search_regex(
r'file:\s*"([^"]+)"', webpage, u'video URL')
duration_str = self._search_regex(
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
duration = None if duration_str is None else int(duration_str)
thumbnail = self._search_regex(
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'duration': duration,
'thumbnail': thumbnail,
}

View File

@@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
return any([self._downloader.params.get('writesubtitles', False),
self._downloader.params.get('writeautomaticsub')])
def _list_available_subtitles(self, video_id, webpage=None):
def _list_available_subtitles(self, video_id, webpage):
""" outputs the available subtitles for the video """
sub_lang_list = self._get_available_subtitles(video_id)
sub_lang_list = self._get_available_subtitles(video_id, webpage)
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen(u'%s: Available subtitles for video: %s' %
@@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
self.to_screen(u'%s: Available automatic captions for video: %s' %
(video_id, auto_lang))
def extract_subtitles(self, video_id, video_webpage=None):
def extract_subtitles(self, video_id, webpage):
"""
returns {sub_lang: sub} ,{} if subtitles not found or None if the
subtitles aren't requested.
@@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
return None
available_subs_list = {}
if self._downloader.params.get('writeautomaticsub', False):
available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
if self._downloader.params.get('writesubtitles', False):
available_subs_list.update(self._get_available_subtitles(video_id))
available_subs_list.update(self._get_available_subtitles(video_id, webpage))
if not available_subs_list: # error, it didn't get the available subtitles
return {}
@@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
return
return sub
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
"""
returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses

View File

@@ -15,7 +15,8 @@ class SztvHuIE(InfoExtractor):
u'info_dict': {
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
}
},
u'skip': u'Service temporarily disabled as of 2013-11-20'
}
def _real_extract(self, url):

View File

@@ -1,4 +1,5 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -11,7 +12,7 @@ class TeamcocoIE(InfoExtractor):
_TEST = {
u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
u'file': u'19705.mp4',
u'md5': u'27b6f7527da5acf534b15f21b032656e',
u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
u'info_dict': {
u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.",
u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
@@ -31,16 +32,40 @@ class TeamcocoIE(InfoExtractor):
self.report_extraction(video_id)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
data, u'video URL')
return [{
qualities = ['500k', '480p', '1000k', '720p', '1080p']
formats = []
for file in data.findall('files/file'):
if file.attrib.get('playmode') == 'all':
# it just duplicates one of the entries
break
file_url = file.text
m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
if m_format is not None:
format_id = m_format.group(1)
else:
format_id = file.attrib['bitrate']
formats.append({
'url': file_url,
'ext': 'mp4',
'format_id': format_id,
})
def sort_key(f):
try:
return qualities.index(f['format_id'])
except ValueError:
return -1
formats.sort(key=sort_key)
if not formats:
raise ExtractorError(u'Unable to extract video URL')
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'formats': formats,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
}]
}

View File

@@ -1,10 +1,13 @@
import json
import re
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
RegexNotFoundError,
)
class TEDIE(InfoExtractor):
class TEDIE(SubtitlesInfoExtractor):
_VALID_URL=r'''http://www\.ted\.com/
(
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
@@ -32,33 +35,32 @@ class TEDIE(InfoExtractor):
def _real_extract(self, url):
m=re.match(self._VALID_URL, url, re.VERBOSE)
if m.group('type_talk'):
return [self._talk_info(url)]
return self._talk_info(url)
else :
playlist_id=m.group('playlist_id')
name=m.group('name')
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
return [self._playlist_videos_info(url,name,playlist_id)]
def _playlist_videos_info(self,url,name,playlist_id=0):
def _playlist_videos_info(self, url, name, playlist_id):
'''Returns the videos of the playlist'''
video_RE=r'''
<li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
([.\s]*?)data-playlist_item_id="(\d+)"
([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
'''
video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
m_names=re.finditer(video_name_RE,webpage)
webpage = self._download_webpage(
url, playlist_id, u'Downloading playlist webpage')
matches = re.finditer(
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
webpage)
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
webpage, 'playlist title')
playlist_entries = []
for m_video, m_name in zip(m_videos,m_names):
talk_url='http://www.ted.com%s' % m_name.group('talk_url')
playlist_entries.append(self.url_result(talk_url, 'TED'))
return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
playlist_entries = [
self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
for m in matches
]
return self.playlist_result(
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
def _talk_info(self, url, video_id=0):
"""Return the video for the talk in the url"""
@@ -81,16 +83,35 @@ class TEDIE(InfoExtractor):
'ext': 'mp4',
'url': stream['file'],
'format': stream['id']
} for stream in info['htmlStreams']]
info = {
'id': info['id'],
} for stream in info['htmlStreams']]
video_id = info['id']
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, webpage)
return
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'description': desc,
'subtitles': video_subtitles,
'formats': formats,
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
return info
def _get_available_subtitles(self, video_id, webpage):
try:
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
languages = re.findall(r'(?:<option value=")(\S+)"', options)
if languages:
sub_lang_list = {}
for l in languages:
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
sub_lang_list[l] = url
return sub_lang_list
except RegexNotFoundError:
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}

View File

@@ -0,0 +1,74 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
)
class TouTvIE(InfoExtractor):
IE_NAME = u'tou.tv'
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
_TEST = {
u'url': u'http://www.tou.tv/30-vies/S04E41',
u'file': u'30-vies_S04E41.mp4',
u'info_dict': {
u'title': u'30 vies Saison 4 / Épisode 41',
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
u'age_limit': 8,
u'uploader': u'Groupe des Nouveaux Médias',
u'duration': 1296,
u'upload_date': u'20131118',
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
},
u'params': {
u'skip_download': True, # Requires rtmpdump
},
u'skip': 'Only available in Canada'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
mediaId = self._search_regex(
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
streams_webpage = self._download_webpage(
streams_url, video_id, note=u'Downloading stream list')
streams_doc = xml.etree.ElementTree.fromstring(
streams_webpage.encode('utf-8'))
video_url = next(n.text
for n in streams_doc.findall('.//choice/url')
if u'//ad.doubleclick' not in n.text)
if video_url.endswith('/Unavailable.flv'):
raise ExtractorError(
u'Access to this video is blocked from outside of Canada',
expected=True)
duration_str = self._html_search_meta(
'video:duration', webpage, u'duration')
duration = int(duration_str) if duration_str else None
upload_date_str = self._html_search_meta(
'video:release_date', webpage, u'upload date')
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
return {
'id': video_id,
'title': self._og_search_title(webpage),
'url': video_url,
'description': self._og_search_description(webpage),
'uploader': self._dc_search_uploader(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'age_limit': self._media_rating_search(webpage),
'duration': duration,
'upload_date': upload_date,
'ext': 'mp4',
}

View File

@@ -0,0 +1,63 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
)
from ..aes import (
aes_decrypt_text
)
class Tube8IE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
_TEST = {
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
u'file': u'229795.mp4',
u'md5': u'e9e0b0c86734e5e3766e653509475db0',
u'info_dict': {
u"description": u"hot teen Kasia grinding",
u"uploader": u"unknown",
u"title": u"Kasia music video",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title')
video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
if thumbnail:
thumbnail = thumbnail.replace('\\/', '/')
video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
if webpage.find('"encrypted":true')!=-1:
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join(format)
return {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'thumbnail': thumbnail,
'description': video_description,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': 18,
}

View File

@@ -0,0 +1,42 @@
import json
import re
from .common import InfoExtractor
class TvpIE(InfoExtractor):
IE_NAME = u'tvp.pl'
_VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
u'file': u'12878238.wmv',
u'info_dict': {
u'title': u'31.10.2013 - Odcinek 2',
u'description': u'31.10.2013 - Odcinek 2',
},
u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
json_params = self._download_webpage(
json_url, video_id, u"Downloading video metadata")
params = json.loads(json_params)
self.report_extraction(video_id)
video_url = params['video_url']
title = self._og_search_title(webpage, fatal=True)
return {
'id': video_id,
'title': title,
'ext': 'wmv',
'url': video_url,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@@ -5,7 +5,7 @@ import datetime
from .common import InfoExtractor
from ..utils import (
determine_ext,
compat_HTTPError,
ExtractorError,
)
@@ -16,26 +16,22 @@ class VevoIE(InfoExtractor):
(currently used by MTVIE)
"""
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
_TEST = {
_TESTS = [{
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
u'info_dict': {
u"upload_date": u"20130624",
u"uploader": u"Hurts",
u"title": u"Somebody to Die For",
u'duration': 230,
u"duration": 230,
u"width": 1920,
u"height": 1080,
}
}
}]
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
self.report_extraction(video_id)
video_info = json.loads(info_json)['video']
def _formats_from_json(self, video_info):
last_version = {'version': -1}
for version in video_info['videoVersions']:
# These are the HTTP downloads, other types are for different manifests
@@ -50,17 +46,75 @@ class VevoIE(InfoExtractor):
# Already sorted from worst to best quality
for rend in renditions.findall('rendition'):
attr = rend.attrib
f_url = attr['url']
format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
formats.append({
'url': f_url,
'ext': determine_ext(f_url),
'url': attr['url'],
'format_id': attr['name'],
'format_note': format_note,
'height': int(attr['frameheight']),
'width': int(attr['frameWidth']),
})
return formats
date_epoch = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
upload_date = datetime.datetime.fromtimestamp(date_epoch)
def _formats_from_smil(self, smil_xml):
formats = []
smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
for el in els:
src = el.attrib['src']
m = re.match(r'''(?xi)
(?P<ext>[a-z0-9]+):
(?P<path>
[/a-z0-9]+ # The directory and main part of the URL
_(?P<cbr>[0-9]+)k
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
_(?P<vcodec>[a-z0-9]+)
_(?P<vbr>[0-9]+)
_(?P<acodec>[a-z0-9]+)
_(?P<abr>[0-9]+)
\.[a-z0-9]+ # File extension
)''', src)
if not m:
continue
format_url = self._SMIL_BASE_URL + m.group('path')
formats.append({
'url': format_url,
'format_id': u'SMIL_' + m.group('cbr'),
'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
'abr': int(m.group('abr')),
'ext': m.group('ext'),
'width': int(m.group('width')),
'height': int(m.group('height')),
})
return formats
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
video_info = json.loads(info_json)['video']
formats = self._formats_from_json(video_info)
try:
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
self._SMIL_BASE_URL, video_id, video_id.lower())
smil_xml = self._download_webpage(smil_url, video_id,
u'Downloading SMIL info')
formats.extend(self._formats_from_smil(smil_xml))
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError):
raise
self._downloader.report_warning(
u'Cannot download SMIL information, falling back to JSON ..')
timestamp_ms = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
info = {
'id': video_id,
'title': video_info['title'],
@@ -71,7 +125,4 @@ class VevoIE(InfoExtractor):
'duration': video_info['duration'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -8,7 +8,7 @@ from ..utils import (
class ViddlerIE(InfoExtractor):
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
_TEST = {
u"url": u"http://www.viddler.com/v/43903784",
u'file': u'43903784.mp4',

View File

@@ -24,12 +24,16 @@ class VideoPremiumIE(InfoExtractor):
webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
if re.match(r"^<html><head><script[^>]*>window.location\s*=", webpage):
# Download again, we need a cookie
webpage = self._download_webpage(
webpage_url, video_id,
note=u'Downloading webpage again (with cookie)')
video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<',
webpage, u'video title')
video_title = self._html_search_regex(
r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title')
return [{
return {
'id': video_id,
'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
'play_path': "mp4:%s.f4v" % video_id,
@@ -37,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
'ext': 'f4v',
'title': video_title,
}]
}

View File

@@ -0,0 +1,91 @@
import re
from ..utils import (
unified_strdate,
)
from .subtitles import SubtitlesInfoExtractor
class VikiIE(SubtitlesInfoExtractor):
IE_NAME = u'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
_TEST = {
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
u'file': u'1023585v.mp4',
u'md5': u'a21454021c2646f5433514177e2caa5f',
u'info_dict': {
u'title': u'Heirs Episode 14',
u'uploader': u'SBS',
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
u'upload_date': u'20131121',
u'age_limit': 13,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
u'uploader')
if uploader is not None:
uploader = uploader.strip()
rating_str = self._html_search_regex(
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
u'rating information', default='').strip()
RATINGS = {
'G': 0,
'PG': 10,
'PG-13': 13,
'R': 16,
'NC': 18,
}
age_limit = RATINGS.get(rating_str)
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
info_webpage = self._download_webpage(info_url, video_id)
video_url = self._html_search_regex(
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
upload_date_str = self._html_search_regex(
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
upload_date = (
unified_strdate(upload_date_str)
if upload_date_str is not None
else None
)
# subtitles
video_subtitles = self.extract_subtitles(video_id, info_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, info_webpage)
return
return {
'id': video_id,
'title': title,
'url': video_url,
'description': description,
'thumbnail': thumbnail,
'age_limit': age_limit,
'uploader': uploader,
'subtitles': video_subtitles,
'upload_date': upload_date,
}
def _get_available_subtitles(self, video_id, info_webpage):
res = {}
for sturl in re.findall(r'<track src="([^"]+)"/>'):
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
if not m:
continue
res[m.group('lang')] = sturl
return res

View File

@@ -1,3 +1,4 @@
# encoding: utf-8
import json
import re
import itertools
@@ -10,6 +11,7 @@ from ..utils import (
clean_html,
get_element_by_attribute,
ExtractorError,
RegexNotFoundError,
std_headers,
unsmuggle_url,
)
@@ -18,12 +20,12 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$'
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [
{
u'url': u'http://vimeo.com/56015672',
u'url': u'http://vimeo.com/56015672#at=0',
u'file': u'56015672.mp4',
u'md5': u'8879b6cc097e987f02484baf890129e5',
u'info_dict': {
@@ -54,7 +56,22 @@ class VimeoIE(InfoExtractor):
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
u'uploader': u'The BLN & Business of Software',
},
}
},
{
u'url': u'http://vimeo.com/68375962',
u'file': u'68375962.mp4',
u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7',
u'note': u'Video protected with password',
u'info_dict': {
u'title': u'youtube-dl password protected test video',
u'upload_date': u'20130614',
u'uploader_id': u'user18948128',
u'uploader': u'Jaime Marquínez Ferrándiz',
},
u'params': {
u'videopassword': u'youtube-dl',
},
},
]
def _login(self):
@@ -111,11 +128,9 @@ class VimeoIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
if not mobj.group('proto'):
url = 'https://' + url
elif mobj.group('pro'):
if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id
elif mobj.group('direct_link'):
else:
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
@@ -129,18 +144,26 @@ class VimeoIE(InfoExtractor):
# Extract the config JSON
try:
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
webpage, u'info section', flags=re.DOTALL)
config = json.loads(config)
except:
try:
config_url = self._html_search_regex(
r' data-config-url="(.+?)"', webpage, u'config URL')
config_json = self._download_webpage(config_url, video_id)
config = json.loads(config_json)
except RegexNotFoundError:
# For pro videos or player.vimeo.com urls
config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'],
webpage, u'info section', flags=re.DOTALL)
config = json.loads(config)
except Exception as e:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
if re.search('If so please provide the correct password.', webpage):
if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
self._verify_video_password(url, video_id, webpage)
return self._real_extract(url)
else:
raise ExtractorError(u'Unable to extract info section')
raise ExtractorError(u'Unable to extract info section',
cause=e)
# Extract title
video_title = config["video"]["title"]
@@ -180,7 +203,7 @@ class VimeoIE(InfoExtractor):
# Vimeo specific: extract video codec and quality information
# First consider quality, then codecs, then take everything
codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
files = { 'hd': [], 'sd': [], 'other': []}
files = {'hd': [], 'sd': [], 'other': []}
config_files = config["video"].get("files") or config["request"].get("files")
for codec_name, codec_extension in codecs:
for quality in config_files.get(codec_name, []):
@@ -209,7 +232,7 @@ class VimeoIE(InfoExtractor):
if len(formats) == 0:
raise ExtractorError(u'No known codec found')
return [{
return {
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
@@ -218,7 +241,8 @@ class VimeoIE(InfoExtractor):
'thumbnail': video_thumbnail,
'description': video_description,
'formats': formats,
}]
'webpage_url': url,
}
class VimeoChannelIE(InfoExtractor):

View File

@@ -27,7 +27,7 @@ class VineIE(InfoExtractor):
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
webpage, u'video URL')
uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
webpage, u'uploader', fatal=False, flags=re.DOTALL)
return [{

View File

@@ -0,0 +1,45 @@
# encoding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_str,
unescapeHTML,
)
class VKIE(InfoExtractor):
IE_NAME = u'vk.com'
_VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
_TEST = {
u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
u'md5': u'0deae91935c54e00003c2a00646315f0',
u'info_dict': {
u'id': u'162222515',
u'ext': u'flv',
u'title': u'ProtivoGunz - Хуёвая песня',
u'uploader': u'Noize MC',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
info_page = self._download_webpage(info_url, video_id)
m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
if m_yt is not None:
self.to_screen(u'Youtube video detected')
return self.url_result(m_yt.group(1), 'Youtube')
vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars')
vars = json.loads(vars_json)
return {
'id': compat_str(vars['vid']),
'url': vars['url240'],
'title': unescapeHTML(vars['md_title']),
'thumbnail': vars['jpg'],
'uploader': vars['md_author'],
}

View File

@@ -13,6 +13,7 @@ class WeiboIE(InfoExtractor):
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
_TEST = {
u'add_ie': ['Sina'],
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
u'file': u'98322879.flv',
u'info_dict': {

View File

@@ -36,21 +36,25 @@ class XHamsterIE(InfoExtractor):
}]
def _real_extract(self,url):
def extract_video_url(webpage):
mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
if len(mobj.group('server')) == 0:
return compat_urllib_parse.unquote(mobj.group('file'))
else:
return mobj.group('server')+'/key='+mobj.group('file')
def is_hd(webpage):
return webpage.find('<div class=\'icon iconHD\'>') != -1
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
seo = mobj.group('seo')
mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
webpage = self._download_webpage(mrss_url, video_id)
mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
if len(mobj.group('server')) == 0:
video_url = compat_urllib_parse.unquote(mobj.group('file'))
else:
video_url = mobj.group('server')+'/key='+mobj.group('file')
video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
webpage, u'title')
@@ -76,14 +80,32 @@ class XHamsterIE(InfoExtractor):
age_limit = self._rta_search(webpage)
return [{
'id': video_id,
'url': video_url,
'ext': determine_ext(video_url),
'title': video_title,
video_url = extract_video_url(webpage)
hd = is_hd(webpage)
formats = [{
'url': video_url,
'ext': determine_ext(video_url),
'format': 'hd' if hd else 'sd',
'format_id': 'hd' if hd else 'sd',
}]
if not hd:
webpage = self._download_webpage(mrss_url+'?hd', video_id)
if is_hd(webpage):
video_url = extract_video_url(webpage)
formats.append({
'url': video_url,
'ext': determine_ext(video_url),
'format': 'hd',
'format_id': 'hd',
})
return {
'id': video_id,
'title': video_title,
'formats': formats,
'description': video_description,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'thumbnail': video_thumbnail,
'age_limit': age_limit,
}]
}

View File

@@ -9,7 +9,7 @@ from ..utils import (
class XNXXIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'

View File

@@ -0,0 +1,54 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
)
class XTubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
_TEST = {
u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_',
u'file': u'kVTUy_G222_.mp4',
u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab',
u'info_dict': {
u"title": u"strange erotica",
u"description": u"surreal gay themed erotica...almost an ET kind of thing",
u"uploader": u"greenshowers",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format[0] += 'p'
format[1] += 'k'
format = "-".join(format)
return {
'id': video_id,
'title': video_title,
'uploader': video_uploader,
'description': video_description,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': 18,
}

View File

@@ -132,7 +132,7 @@ class YahooSearchIE(SearchInfoExtractor):
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
res['entries'].append(e)
if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1)):
break
return res

View File

@@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor):
u'file': u'2189178.flv',
u'md5': u'07e15fa469ba384c7693fd246905547c',
u'info_dict': {
u"title": u"Zeichentrick 1"
u"title": u"Zeichentrick 1",
u"age_limit": 18,
}
}
@@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor):
# Get webpage content
webpage = self._download_webpage(url, video_id)
age_limit = self._rta_search(webpage)
# Get the video title
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
webpage, u'title').strip()
@@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor):
'title': video_title,
'ext': 'flv',
'format': 'flv',
'player_url': embed_page_url}
'player_url': embed_page_url,
'age_limit': age_limit}
return [info]

View File

@@ -18,7 +18,7 @@ class YoukuIE(InfoExtractor):
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
u"file": u"XNDgyMDQ2NTQw_part00.flv",
u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
u"params": { u"test": False },
u"params": {u"test": False},
u"info_dict": {
u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
}
@@ -37,8 +37,8 @@ class YoukuIE(InfoExtractor):
source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
seed = float(seed)
for i in range(len(source)):
seed = (seed * 211 + 30031 ) % 65536
index = math.floor(seed / 65536 * len(source) )
seed = (seed * 211 + 30031) % 65536
index = math.floor(seed / 65536 * len(source))
mixed.append(source[int(index)])
source.remove(source[int(index)])
#return ''.join(mixed)

View File

@@ -17,7 +17,7 @@ from ..aes import (
)
class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
_TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4',
@@ -31,23 +31,10 @@ class YouPornIE(InfoExtractor):
}
}
def _print_formats(self, formats):
"""Print all available formats"""
print(u'Available formats:')
print(u'ext\t\tformat')
print(u'---------------------------------')
for format in formats:
print(u'%s\t\t%s' % (format['ext'], format['format']))
def _specific(self, req_format, formats):
for x in formats:
if x["format"] == req_format:
return x
return None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
@@ -71,27 +58,22 @@ class YouPornIE(InfoExtractor):
except KeyError:
raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
# Get all of the formats available
# Get all of the links from the page
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
webpage, u'download list').strip()
# Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
LINK_RE = r'<a href="([^"]+)">'
links = re.findall(LINK_RE, download_list_html)
# Get link of hd video if available
mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
if mobj != None:
encrypted_video_url = mobj.group(u'encrypted_video_url')
video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
links = [video_url] + links
# Get all encrypted links
encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage)
for encrypted_link in encrypted_links:
link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
links.append(link)
if not links:
raise ExtractorError(u'ERROR: no known formats available for video')
self.to_screen(u'Links found: %d' % len(links))
formats = []
for link in links:
@@ -99,43 +81,36 @@ class YouPornIE(InfoExtractor):
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
# A path looks like this:
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
video_url = unescapeHTML( link )
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
video_url = unescapeHTML(link)
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
# size = format[0]
# bitrate = format[1]
format = "-".join( format )
format = "-".join(format)
# title = u'%s-%s-%s' % (video_title, size, bitrate)
formats.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': upload_date,
'title': video_title,
'ext': extension,
'format': format,
'thumbnail': thumbnail,
'description': video_description,
'age_limit': age_limit,
'format_id': format,
})
if self._downloader.params.get('listformats', None):
self._print_formats(formats)
return
req_format = self._downloader.params.get('format', 'best')
self.to_screen(u'Format: %s' % req_format)
if req_format is None or req_format == 'best':
return [formats[0]]
elif req_format == 'worst':
return [formats[-1]]
elif req_format in ('-1', 'all'):
return formats
else:
format = self._specific( req_format, formats )
if format is None:
raise ExtractorError(u'Requested format not available')
return [format]
# Sort and remove doubles
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
for i in range(len(formats)-1,0,-1):
if formats[i]['format_id'] == formats[i-1]['format_id']:
del formats[i]
return {
'id': video_id,
'uploader': video_uploader,
'upload_date': upload_date,
'title': video_title,
'thumbnail': thumbnail,
'description': video_description,
'age_limit': age_limit,
'formats': formats,
}

View File

@@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
return False
galx = None
dsh = None
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
if match:
galx = match.group(1)
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
if match:
dsh = match.group(1)
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
login_page, u'Login GALX parameter')
# Log in
login_form_strs = {
@@ -95,7 +89,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
u'checkConnection': u'',
u'checkedDomains': u'youtube',
u'dnConn': u'',
u'dsh': dsh,
u'pstMsg': u'0',
u'rmShown': u'1',
u'secTok': u'',
@@ -146,10 +139,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
IE_DESC = u'YouTube.com'
_VALID_URL = r"""^
_VALID_URL = r"""(?x)^
(
(?:https?://)? # http(s):// (optional)
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
tube\.majestyc\.net/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
@@ -236,11 +229,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'136': 'mp4',
'137': 'mp4',
'138': 'mp4',
'139': 'mp4',
'140': 'mp4',
'141': 'mp4',
'160': 'mp4',
# Dash mp4 audio
'139': 'm4a',
'140': 'm4a',
'141': 'm4a',
# Dash webm
'171': 'webm',
'172': 'webm',
@@ -344,18 +339,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
}
},
{
u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
u"file": u"1ltcDfZMA3U.flv",
u"note": u"Test VEVO video (#897)",
u"info_dict": {
u"upload_date": u"20070518",
u"title": u"Maps - It Will Find You",
u"description": u"Music video by Maps performing It Will Find You.",
u"uploader": u"MuteUSA",
u"uploader_id": u"MuteUSA"
}
},
{
u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
u"file": u"UxxajLWwzqY.mp4",
@@ -380,6 +363,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"uploader_id": u"justintimberlakeVEVO"
}
},
{
u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
u"file": u"yZIXLfi8CZQ.mp4",
u"note": u"Embed-only video (#1746)",
u"info_dict": {
u"upload_date": u"20120608",
u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
u"uploader": u"SET India",
u"uploader_id": u"setindia"
}
},
]
@@ -387,7 +382,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
if YoutubePlaylistIE.suitable(url): return False
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
return re.match(cls._VALID_URL, url) is not None
def __init__(self, *args, **kwargs):
super(YoutubeIE, self).__init__(*args, **kwargs)
@@ -1036,6 +1031,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Turn the encrypted s field into a working signature"""
if player_url is not None:
if player_url.startswith(u'//'):
player_url = u'https:' + player_url
try:
player_id = (player_url, len(s))
if player_id not in self._player_cache:
@@ -1099,7 +1096,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@@ -1115,8 +1112,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
params = compat_urllib_parse.urlencode({
'lang': lang,
'v': video_id,
'fmt': self._downloader.params.get('subtitlesformat'),
'name': l[0],
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
'name': l[0].encode('utf-8'),
})
url = u'http://www.youtube.com/api/timedtext?' + params
sub_lang_list[lang] = url
@@ -1128,7 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _get_available_automatic_caption(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
sub_format = self._downloader.params.get('subtitlesformat')
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
self.to_screen(u'%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = u'Couldn\'t find automatic captions for %s' % video_id
@@ -1287,7 +1284,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
data = compat_urllib_parse.urlencode({'video_id': video_id,
'el': 'embedded',
'el': 'player_embedded',
'gl': 'US',
'hl': 'en',
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
@@ -1316,6 +1313,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
raise ExtractorError(u'"token" parameter not in video info for unknown reason')
if 'view_count' in video_info:
view_count = int(video_info['view_count'][0])
else:
view_count = None
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
raise ExtractorError(u'"rental" videos not supported')
@@ -1403,32 +1405,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# this signatures are encrypted
if 'url_encoded_fmt_stream_map' not in args:
raise ValueError(u'No stream_map present') # caught below
m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
re_signature = re.compile(r'[&,]s=')
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
if m_s is not None:
self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
m_s = re_signature.search(args.get('adaptive_fmts', u''))
if m_s is not None:
if 'url_encoded_fmt_stream_map' in video_info:
video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
if 'adaptive_fmts' in video_info:
video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
else:
video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
elif 'adaptive_fmts' in video_info:
if 'url_encoded_fmt_stream_map' in video_info:
video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
else:
video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
video_info['adaptive_fmts'] = [args['adaptive_fmts']]
except ValueError:
pass
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
video_url_list = [(None, video_info['conn'][0])]
elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
url_map = {}
for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
if 'itag' in url_data and 'url' in url_data:
url = url_data['url'][0]
@@ -1481,13 +1480,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
results = []
for format_param, video_real_url in video_url_list:
for itag, video_real_url in video_url_list:
# Extension
video_extension = self._video_extensions.get(format_param, 'flv')
video_extension = self._video_extensions.get(itag, 'flv')
video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
self._video_dimensions.get(format_param, '???'),
' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
self._video_dimensions.get(itag, '???'),
' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
results.append({
'id': video_id,
@@ -1498,17 +1497,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'title': video_title,
'ext': video_extension,
'format': video_format,
'format_id': itag,
'thumbnail': video_thumbnail,
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
})
return results
class YoutubePlaylistIE(InfoExtractor):
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists'
_VALID_URL = r"""(?:
(?:https?://)?
@@ -1524,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
_MAX_RESULTS = 50
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
IE_NAME = u'youtube:playlist'
@classmethod
@@ -1533,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def _real_initialize(self):
self._login()
def _real_extract(self, url):
# Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1546,51 +1552,33 @@ class YoutubePlaylistIE(InfoExtractor):
video_id = query_dict['v'][0]
if self._downloader.params.get('noplaylist'):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
return self.url_result(video_id, 'Youtube', video_id=video_id)
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
# Download playlist videos from API
videos = []
# Extract the video ids from the playlist pages
ids = []
for page_num in itertools.count(1):
start_index = self._MAX_RESULTS * (page_num - 1) + 1
if start_index >= 1000:
self._downloader.report_warning(u'Max number of results reached')
break
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
# The ids are duplicated
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
ids.extend(new_ids)
try:
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
if 'feed' not in response:
raise ExtractorError(u'Got a malformed response from YouTube API')
playlist_title = response['feed']['title']['$t']
if 'entry' not in response['feed']:
# Number of videos is a multiple of self._MAX_RESULTS
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
break
for entry in response['feed']['entry']:
index = entry['yt$position']['$t']
if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
videos.append((
index,
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
))
playlist_title = self._og_search_title(page)
videos = [v[1] for v in sorted(videos)]
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
return [self.playlist_result(url_results, playlist_id, playlist_title)]
url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
for vid_id in ids]
return self.playlist_result(url_results, playlist_id, playlist_title)
class YoutubeChannelIE(InfoExtractor):
IE_DESC = u'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
_MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = u'youtube:channel'
@@ -1611,36 +1599,37 @@ class YoutubeChannelIE(InfoExtractor):
# Download channel page
channel_id = mobj.group(1)
video_ids = []
pagenum = 1
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id)
if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
autogenerated = True
else:
autogenerated = False
url = self._TEMPLATE_URL % (channel_id, pagenum)
page = self._download_webpage(url, channel_id,
u'Downloading page #%s' % pagenum)
# Extract video identifiers
ids_in_page = self.extract_videos_from_page(page)
video_ids.extend(ids_in_page)
# Download any subsequent channel pages using the json-based channel_ajax query
if self._MORE_PAGES_INDICATOR in page:
if autogenerated:
# The videos are contained in a single page
# the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page)
else:
# Download all channel pages using the json-based channel_ajax query
for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_webpage(url, channel_id,
u'Downloading page #%s' % pagenum)
page = json.loads(page)
ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
return [self.playlist_result(url_entries, channel_id)]
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor):
@@ -1704,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
break
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
return [self.playlist_result(url_results, playlist_title = username)]
url_results = [
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)
class YoutubeSearchIE(SearchInfoExtractor):
IE_DESC = u'YouTube.com searches'
@@ -1747,9 +1738,14 @@ class YoutubeSearchIE(SearchInfoExtractor):
if len(video_ids) > n:
video_ids = video_ids[:n]
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE):
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = u'YouTube.com searches, newest videos first'
class YoutubeShowIE(InfoExtractor):
IE_DESC = u'YouTube.com (multi-season) shows'
@@ -1803,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
feed_html = info['feed_html']
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
feed_entries.extend(
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in ids)
if info['paging'] is None:
break
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)

View File

@@ -53,7 +53,7 @@ class ZDFIE(InfoExtractor):
video_id,
u'Get stream URL')
MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
#MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
mobj = re.search(self._MEDIA_STREAM, media_link)

View File

@@ -2,11 +2,15 @@ import io
import json
import traceback
import hashlib
import os
import subprocess
import sys
from zipimport import zipimporter
from .utils import *
from .utils import (
compat_str,
compat_urllib_request,
)
from .version import __version__
def rsa_verify(message, signature, key):
@@ -37,6 +41,7 @@ def rsa_verify(message, signature, key):
if signature != sha256(message).digest(): return False
return True
def update_self(to_screen, verbose):
"""Update the program file with the latest version from the repository"""
@@ -78,6 +83,13 @@ def update_self(to_screen, verbose):
return
version_id = versions_info['latest']
def version_tuple(version_str):
return tuple(map(int, version_str.split('.')))
if version_tuple(__version__) >= version_tuple(version_id):
to_screen(u'youtube-dl is up to date (%s)' % __version__)
return
to_screen(u'Updating to version ' + version_id + '...')
version = versions_info['versions'][version_id]
@@ -105,7 +117,7 @@ def update_self(to_screen, verbose):
urlh = compat_urllib_request.urlopen(version['exe'][0])
newcontent = urlh.read()
urlh.close()
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to download latest version')
return
@@ -118,7 +130,7 @@ def update_self(to_screen, verbose):
try:
with open(exe + '.new', 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to write the new version')
return
@@ -137,7 +149,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
subprocess.Popen([bat]) # Continues to run in the background
return # Do not show premature success messages
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version')
return
@@ -148,7 +160,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
urlh = compat_urllib_request.urlopen(version['bin'][0])
newcontent = urlh.read()
urlh.close()
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to download latest version')
return
@@ -161,7 +173,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
try:
with open(filename, 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version')
return

View File

@@ -12,6 +12,7 @@ import os
import pipes
import platform
import re
import ssl
import socket
import sys
import traceback
@@ -535,13 +536,31 @@ def formatSeconds(secs):
else:
return '%d' % secs
def make_HTTPS_handler(opts):
if sys.version_info < (3,2):
# Python's 2.x handler is very simplistic
return compat_urllib_request.HTTPSHandler()
if sys.version_info < (3, 2):
import httplib
class HTTPSConnectionV3(httplib.HTTPSConnection):
def __init__(self, *args, **kwargs):
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
def connect(self):
sock = socket.create_connection((self.host, self.port), self.timeout)
if self._tunnel_host:
self.sock = sock
self._tunnel()
try:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
except ssl.SSLError as e:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
def https_open(self, req):
return self.do_open(HTTPSConnectionV3, req)
return HTTPSHandlerV3()
else:
import ssl
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
context.set_default_verify_paths()
context.verify_mode = (ssl.CERT_NONE
@@ -572,6 +591,11 @@ class ExtractorError(Exception):
return u''.join(traceback.format_tb(self.traceback))
class RegexNotFoundError(ExtractorError):
"""Error when a regex didn't match"""
pass
class DownloadError(Exception):
"""Download Error exception.
@@ -729,6 +753,8 @@ def unified_strdate(date_str):
'%Y/%m/%d %H:%M:%S',
'%d.%m.%Y %H:%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
]
for expression in format_expressions:
@@ -944,7 +970,16 @@ class locked_file(object):
def shell_quote(args):
return ' '.join(map(pipes.quote, args))
quoted_args = []
encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
for a in args:
if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename'
a = a.decode(encoding)
quoted_args.append(pipes.quote(a))
return u' '.join(quoted_args)
def takewhile_inclusive(pred, seq):

View File

@@ -1,2 +1,2 @@
__version__ = '2013.10.23'
__version__ = '2013.11.24'