Compare commits

..

80 Commits

Author SHA1 Message Date
Philipp Hagemeister
22686b91f0 release 2013.12.08.1 2013-12-08 07:32:25 +01:00
Philipp Hagemeister
31812a9e0e [youtube:channel] Fix automated channel detection 2013-12-08 07:30:42 +01:00
Philipp Hagemeister
11bf848191 [wimp] simplify 2013-12-08 07:22:19 +01:00
Philipp Hagemeister
d4df5ed14c release 2013.12.08 2013-12-08 06:54:52 +01:00
Philipp Hagemeister
303b479e0a Automatically load SSL certs on Windows 2013-12-08 06:54:39 +01:00
Philipp Hagemeister
4c52160646 [FileDownloader] Fix progress report on Windows (Fixes #1918) 2013-12-08 06:53:46 +01:00
Philipp Hagemeister
a213880aaf Simplify status reporting (#1918) 2013-12-08 05:49:35 +01:00
Jaime Marquínez Ferrándiz
42d3bf844a Merge pull request #1919 from rzhxeo/xhamster
[XHamsterIE] Fix HD video detection
2013-12-07 14:35:17 -08:00
rzhxeo
b860967ce4 [XHamsterIE] Fix md5 in second test 2013-12-07 22:17:13 +01:00
rzhxeo
8ca6b8fba1 [XHamsterIE] Fix HD video detection 2013-12-07 21:39:32 +01:00
Jaime Marquínez Ferrándiz
c4d9e6731a [pyvideo] add support for videos that don't come from Youtube 2013-12-07 11:19:59 +01:00
Jaime Marquínez Ferrándiz
0d9ec5d963 [pyvideo] Cleanup and fix test 2013-12-07 11:00:56 +01:00
Jaime Marquínez Ferrándiz
870fc4e578 Merge remote-tracking branch 'gekitsuu/master' (closes PR #1913) 2013-12-07 10:50:06 +01:00
Adam Glenn
f623530d6e removing bad VALID_URL 2013-12-06 21:12:10 -08:00
Adam Glenn
ca9e02dc00 Adding pyvideo support 2013-12-06 21:11:01 -08:00
Jaime Marquínez Ferrándiz
fb30ec22fd [vimeo] Add an extractor for groups 2013-12-06 22:01:41 +01:00
Jaime Marquínez Ferrándiz
5cc14c2fd7 [vimeo] Add an extractor for albums (closes #1911) 2013-12-06 21:48:44 +01:00
Jaime Marquínez Ferrándiz
d349cd2240 [imdb] Fix extraction
The paths to each format's page may have leading whitespace.
The height and the duration can't be extracted.
2013-12-06 20:26:55 +01:00
Jaime Marquínez Ferrándiz
0b6a9f639f [vevo] Update test video's duration 2013-12-06 20:14:29 +01:00
Jaime Marquínez Ferrándiz
715c8e7bdb [youtube:playlist] Recognize mix ids for direct use (fixes #1295) 2013-12-06 19:52:41 +01:00
Jaime Marquínez Ferrándiz
7d4afc557f [youtube:playlist] Support mix ids longer than 13 (#1295) 2013-12-06 19:48:54 +01:00
Jaime Marquínez Ferrándiz
563e405411 [dailymotion] Fix view count regex
In some languages they can be in the format '123,456' instead of '123.456'
2013-12-06 13:41:07 +01:00
Jaime Marquínez Ferrándiz
f53c966a73 [dailymotion] Extract view count (#1895) 2013-12-06 13:36:36 +01:00
Jaime Marquínez Ferrándiz
336c3a69bd [youtube] Extract like and dislike count (#1895) 2013-12-06 13:22:27 +01:00
Jaime Marquínez Ferrándiz
4e76179476 [vimeo] Extract views count, likes count and comments count (#1895) 2013-12-06 13:03:08 +01:00
Philipp Hagemeister
ef4fd84857 [wistia] Add extractor 2013-12-06 09:15:04 +01:00
Philipp Hagemeister
72135030d1 Merge remote-tracking branch 'origin/master' 2013-12-05 22:30:04 +01:00
Jaime Marquínez Ferrándiz
3514813d5b [francetv] Add support for urls in the format http://www.france3.fr/emissions/{program}/diffusions/{date} (fixes #1898) 2013-12-05 21:49:30 +01:00
Jaime Marquínez Ferrándiz
9e60602084 [francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)
Rename the France2IE extractor to FranceTVIE
2013-12-05 21:48:41 +01:00
Philipp Hagemeister
19e3dfc9f8 [9gag] Like/dislike count (#1895) 2013-12-05 18:29:07 +01:00
Philipp Hagemeister
a1ef7e85d6 Remove unused imports 2013-12-05 14:31:54 +01:00
Philipp Hagemeister
ef2fac6f4a Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-05 14:29:14 +01:00
Philipp Hagemeister
7fc3fa0545 [9gag] Add extractor 2013-12-05 14:29:08 +01:00
Jaime Marquínez Ferrándiz
673d1273ff [vevo] Support '/watch/{id}' urls 2013-12-05 12:41:58 +01:00
Jaime Marquínez Ferrándiz
b9a2c53833 [metacafe] Add support for cbs videos (fixes #1838)
They use theplatform.com
2013-12-04 23:43:50 +01:00
Jaime Marquínez Ferrándiz
e9bf7479d2 Add an extractor for theplatform.com 2013-12-04 23:41:22 +01:00
Jaime Marquínez Ferrándiz
bfb9f7bc4c [hotnewhiphop] Update test's title 2013-12-04 20:36:26 +01:00
Jaime Marquínez Ferrándiz
6a656a843a Update description value for the write_info_json test (required after 27dcce1904) 2013-12-04 20:35:00 +01:00
Philipp Hagemeister
29030c0a4c Merge remote-tracking branch 'dstftw/correct-valid-urls' 2013-12-04 19:56:05 +01:00
dst
c0ade33e16 Correct some extractor _VALID_URL regexes 2013-12-04 20:34:47 +07:00
Philipp Hagemeister
671c0f151d release 2013.12.04 2013-12-04 14:19:07 +01:00
Philipp Hagemeister
27dcce1904 [youtube] Resolve URLs in comments 2013-12-04 14:18:49 +01:00
Jaime Marquínez Ferrándiz
84db81815a Move common code for extractors based in MTV services to a new base class
Removes the duplication of the thumbnail extraction code (only MTVIE needs to override it)
2013-12-03 14:58:24 +01:00
Jaime Marquínez Ferrándiz
fb7abb31af Remove the compatibility code used before the new format system was implemented 2013-12-03 14:31:20 +01:00
Philipp Hagemeister
ce93879a9b [daum] Fix real video ID extraction 2013-12-03 14:16:58 +01:00
Philipp Hagemeister
938384c587 [redtube] Fix search for title 2013-12-03 14:08:16 +01:00
Philipp Hagemeister
e9d8e302aa [xhamster] Change test checksum 2013-12-03 14:06:16 +01:00
Jaime Marquínez Ferrándiz
cb7fb54600 Change the ie_name of YoutubeSearchDateIE
It produced a duplicate entry when listing the extractors with '--list-extractors' and generates noise in the commit log when generating the supported sites webpage (like in 09f355f73b)
2013-12-03 13:55:25 +01:00
Philipp Hagemeister
cf6758d204 Document disabling proxy (#1882) 2013-12-03 13:33:07 +01:00
Philipp Hagemeister
731e3dde29 release 2013.12.03 2013-12-03 13:13:09 +01:00
Philipp Hagemeister
a0eaa341e1 [configuration] Undo code breakage 2013-12-03 13:11:20 +01:00
Philipp Hagemeister
fb27c2295e Correct configuration file locations 2013-12-03 13:09:48 +01:00
Philipp Hagemeister
1b753cb334 Add Windows configuration file locations (#1881) 2013-12-03 13:04:02 +01:00
Philipp Hagemeister
36a826a50d Clarify --download-archive help (#1757) 2013-12-03 11:54:52 +01:00
Philipp Hagemeister
8796857429 Credit @dstftw for smotri IE 2013-12-02 17:43:22 +01:00
Philipp Hagemeister
aaebed13a8 [smotri] Simplify 2013-12-02 17:08:17 +01:00
Philipp Hagemeister
25939ffe56 Merge branch 'smotri.com' of https://github.com/dstftw/youtube-dl 2013-12-02 15:56:35 +01:00
dst
5270d8cb13 Added extractors for smotri.com 2013-12-02 20:10:19 +07:00
Philipp Hagemeister
0037e02921 release 2013.12.02 2013-12-02 13:37:26 +01:00
Philipp Hagemeister
6ad14cab59 Add --socket-timeout option 2013-12-02 13:37:05 +01:00
Philipp Hagemeister
a9be0cc736 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-02 13:36:20 +01:00
Jaime Marquínez Ferrándiz
55a10eab48 [vimeo] Add an extractor for users (closes #1871) 2013-12-01 22:36:18 +01:00
Philipp Hagemeister
e344693b65 Make socket timeout configurable, and bump default to 10 minutes (#1862) 2013-12-01 11:42:02 +01:00
Philipp Hagemeister
355e4fd07e [generic] Find embedded dailymotion videos (Fixes #1848) 2013-12-01 01:21:33 +01:00
Philipp Hagemeister
5e09d6abbd [clipfish] Skip test on travis 2013-12-01 01:16:20 +01:00
Philipp Hagemeister
b138de72f2 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-30 00:42:56 +01:00
Philipp Hagemeister
06dcbb71d8 Clarify help of --write-pages (#1853) 2013-11-30 00:42:43 +01:00
Jaime Marquínez Ferrándiz
c5171c454b [yahoo] Force use of the http protocol for downloading the videos. 2013-11-29 22:06:17 +01:00
Philipp Hagemeister
323ec6ae56 Clarify --download-archive help 2013-11-29 15:57:43 +01:00
Jaime Marquínez Ferrándiz
befd88b786 [yahoo] Add an extractor for yahoo news (closes #1849) 2013-11-29 15:25:43 +01:00
Philipp Hagemeister
a3fb4675fb Do not mutate default arguments
In this case, it looks rather harmless (since the conditions for --restrict-filenames should not change while a process is running), but just to be sure.
This also simplifies the interface for callers, who can just pass in the idiomatic None for "I don't care, whatever is the default".
2013-11-29 15:25:11 +01:00
Philipp Hagemeister
5f077efcb1 Merge pull request #1850 from nikai3d/master
fix typo in help
2013-11-29 01:48:14 -08:00
Nicolas Kaiser
9986238ba9 fix typo in help 2013-11-29 09:48:38 +01:00
Nicolas Kaiser
e1f900d6a4 fix typo in README.md 2013-11-29 09:44:05 +01:00
Jaime Marquínez Ferrándiz
acf37ca151 [imdb] Fix the resolution values (fixes #1847)
We were using the size of the player, it was the same for all the formats
2013-11-29 07:56:14 +01:00
Philipp Hagemeister
17769d5a6c release 2013.11.29 2013-11-29 03:34:26 +01:00
Philipp Hagemeister
677c18092d [podomatic] Add extractor 2013-11-29 03:33:25 +01:00
Jaime Marquínez Ferrándiz
3862402ff3 Add an extractor for Clipsyndicate (closes #1744) 2013-11-28 14:38:10 +01:00
Jaime Marquínez Ferrándiz
b03d0d064c [imdb] Fix extraction in python 2.6
Using a regular expression because the html cannot be parsed.
2013-11-28 13:49:00 +01:00
Jaime Marquínez Ferrándiz
d8d6148628 Add an extractor for Internet Movie Database trailers (closes #1832) 2013-11-28 13:32:49 +01:00
79 changed files with 1254 additions and 313 deletions

View File

@@ -30,7 +30,8 @@ which means you can modify it, redistribute it or use it however you like.
--list-extractors List all supported extractors and the URLs they --list-extractors List all supported extractors and the URLs they
would handle would handle
--extractor-descriptions Output descriptions of all supported extractors --extractor-descriptions Output descriptions of all supported extractors
--proxy URL Use the specified HTTP/HTTPS proxy --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an
empty string (--proxy "") for direct connection
--no-check-certificate Suppress HTTPS certificate validation. --no-check-certificate Suppress HTTPS certificate validation.
--cache-dir DIR Location in the filesystem where youtube-dl can --cache-dir DIR Location in the filesystem where youtube-dl can
store downloaded information permanently. By store downloaded information permanently. By
@@ -55,8 +56,9 @@ which means you can modify it, redistribute it or use it however you like.
--dateafter DATE download only videos uploaded after this date --dateafter DATE download only videos uploaded after this date
--no-playlist download only the currently playing video --no-playlist download only the currently playing video
--age-limit YEARS download only videos suitable for the given age --age-limit YEARS download only videos suitable for the given age
--download-archive FILE Download only videos not present in the archive --download-archive FILE Download only videos not listed in the archive
file. Record all downloaded videos in it. file. Record the IDs of all downloaded videos in
it.
## Download Options: ## Download Options:
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
@@ -130,11 +132,11 @@ which means you can modify it, redistribute it or use it however you like.
-v, --verbose print various debugging information -v, --verbose print various debugging information
--dump-intermediate-pages print downloaded pages to debug problems(very --dump-intermediate-pages print downloaded pages to debug problems(very
verbose) verbose)
--write-pages Write downloaded pages to files in the current --write-pages Write downloaded intermediary pages to files in
directory the current directory to debug problems
## Video Format Options: ## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of -f, --format FORMAT video format code, specify the order of
preference using slashes: "-f 22/17/18". "-f mp4" preference using slashes: "-f 22/17/18". "-f mp4"
and "-f flv" are also supported and "-f flv" are also supported
--all-formats download all available video formats --all-formats download all available video formats
@@ -182,7 +184,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION # CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
# OUTPUT TEMPLATE # OUTPUT TEMPLATE

View File

@@ -39,5 +39,6 @@
"writeinfojson": true, "writeinfojson": true,
"writesubtitles": false, "writesubtitles": false,
"allsubtitles": false, "allsubtitles": false,
"listssubtitles": false "listssubtitles": false,
"socket_timeout": 20
} }

View File

@@ -106,6 +106,10 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch(':colbertreport', ['ComedyCentralShows']) self.assertMatch(':colbertreport', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentralShows']) self.assertMatch(':cr', ['ComedyCentralShows'])
def test_vimeo_matching(self):
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -15,13 +15,18 @@ from youtube_dl.extractor import (
DailymotionPlaylistIE, DailymotionPlaylistIE,
DailymotionUserIE, DailymotionUserIE,
VimeoChannelIE, VimeoChannelIE,
VimeoUserIE,
VimeoAlbumIE,
VimeoGroupsIE,
UstreamChannelIE, UstreamChannelIE,
SoundcloudSetIE, SoundcloudSetIE,
SoundcloudUserIE, SoundcloudUserIE,
LivestreamIE, LivestreamIE,
NHLVideocenterIE, NHLVideocenterIE,
BambuserChannelIE, BambuserChannelIE,
BandcampAlbumIE BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE
) )
@@ -54,6 +59,30 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Vimeo Tributes') self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24) self.assertTrue(len(result['entries']) > 24)
def test_vimeo_user(self):
dl = FakeYDL()
ie = VimeoUserIE(dl)
result = ie.extract('http://vimeo.com/nkistudio/videos')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nki')
self.assertTrue(len(result['entries']) > 65)
def test_vimeo_album(self):
dl = FakeYDL()
ie = VimeoAlbumIE(dl)
result = ie.extract('http://vimeo.com/album/2632481')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Staff Favorites: November 2013')
self.assertTrue(len(result['entries']) > 12)
def test_vimeo_groups(self):
dl = FakeYDL()
ie = VimeoGroupsIE(dl)
result = ie.extract('http://vimeo.com/groups/rolexawards')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Rolex Awards for Enterprise')
self.assertTrue(len(result['entries']) > 72)
def test_ustream_channel(self): def test_ustream_channel(self):
dl = FakeYDL() dl = FakeYDL()
ie = UstreamChannelIE(dl) ie = UstreamChannelIE(dl)
@@ -110,6 +139,24 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nightmare Night EP') self.assertEqual(result['title'], u'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4) self.assertTrue(len(result['entries']) >= 4)
def test_smotri_community(self):
dl = FakeYDL()
ie = SmotriCommunityIE(dl)
result = ie.extract('http://smotri.com/community/video/kommuna')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'kommuna')
self.assertEqual(result['title'], u'КПРФ')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_user(self):
dl = FakeYDL()
ie = SmotriUserIE(dl)
result = ie.extract('http://smotri.com/user/inspector')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'inspector')
self.assertEqual(result['title'], u'Inspector')
self.assertTrue(len(result['entries']) >= 9)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -26,6 +26,7 @@ from youtube_dl.utils import (
unsmuggle_url, unsmuggle_url,
shell_quote, shell_quote,
encodeFilename, encodeFilename,
str_to_int,
) )
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
@@ -176,6 +177,10 @@ class TestUtil(unittest.TestCase):
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')] args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""") self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -33,6 +33,7 @@ TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.info.json' INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description' DESCRIPTION_FILE = TEST_ID + '.mp4.description'
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐 EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
test URL: https://github.com/rg3/youtube-dl/issues/1892
This is a test video for youtube-dl. This is a test video for youtube-dl.

View File

@@ -204,11 +204,27 @@ class FileDownloader(object):
"""Report destination filename.""" """Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename) self.to_screen(u'[download] Destination: ' + filename)
def _report_progress_status(self, msg, is_last_line=False):
fullmsg = u'[download] ' + msg
if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg)
else:
if os.name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length',
0)
if prev_len > len(fullmsg):
fullmsg += u' ' * (prev_len - len(fullmsg))
self._report_progress_prev_line_length = len(fullmsg)
clear_line = u'\r'
else:
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title(u'youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta): def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress.""" """Report download progress."""
if self.params.get('noprogress', False): if self.params.get('noprogress', False):
return return
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
if eta is not None: if eta is not None:
eta_str = self.format_eta(eta) eta_str = self.format_eta(eta)
else: else:
@@ -218,14 +234,20 @@ class FileDownloader(object):
else: else:
percent_str = 'Unknown %' percent_str = 'Unknown %'
speed_str = self.format_speed(speed) speed_str = self.format_speed(speed)
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' % msg = (u'%s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str)) (percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else: else:
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' % self._report_progress_status(
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True) (u'100%% of %s in %s' %
self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' % (data_len_str, self.format_seconds(tot_time))),
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) is_last_line=True)
def report_resuming_byte(self, resume_len): def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte.""" """Report attempt to resume at given byte."""
@@ -246,15 +268,6 @@ class FileDownloader(object):
"""Report it was impossible to resume download.""" """Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume') self.to_screen(u'[download] Unable to resume')
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live): def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
def run_rtmpdump(args): def run_rtmpdump(args):
start = time.time() start = time.time()

View File

@@ -132,6 +132,7 @@ class YoutubeDL(object):
cookiefile: File name where cookies should be read from and dumped to. cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates nocheckcertificate:Do not verify SSL certificates
proxy: URL of the proxy server to use proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds
The following parameters are not used by YoutubeDL itself, they are used by The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader: the FileDownloader:
@@ -146,7 +147,7 @@ class YoutubeDL(object):
_num_downloads = None _num_downloads = None
_screen_file = None _screen_file = None
def __init__(self, params={}): def __init__(self, params=None):
"""Create a FileDownloader object with the given options.""" """Create a FileDownloader object with the given options."""
self._ies = [] self._ies = []
self._ies_instances = {} self._ies_instances = {}
@@ -155,7 +156,7 @@ class YoutubeDL(object):
self._download_retcode = 0 self._download_retcode = 0
self._num_downloads = 0 self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self.params = params self.params = {} if params is None else params
if (sys.version_info >= (3,) and sys.platform != 'win32' and if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@@ -404,7 +405,8 @@ class YoutubeDL(object):
for key, value in extra_info.items(): for key, value in extra_info.items():
info_dict.setdefault(key, value) info_dict.setdefault(key, value)
def extract_info(self, url, download=True, ie_key=None, extra_info={}): def extract_info(self, url, download=True, ie_key=None, extra_info={},
process=True):
''' '''
Returns a list with a dictionary for each video we find. Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos. If 'download', also downloads the videos.
@@ -440,7 +442,10 @@ class YoutubeDL(object):
'webpage_url': url, 'webpage_url': url,
'extractor_key': ie.ie_key(), 'extractor_key': ie.ie_key(),
}) })
return self.process_ie_result(ie_result, download, extra_info) if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
except ExtractorError as de: # An error we somewhat expected except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback()) self.report_error(compat_str(de), de.format_traceback())
break break
@@ -473,8 +478,33 @@ class YoutubeDL(object):
download, download,
ie_key=ie_result.get('ie_key'), ie_key=ie_result.get('ie_key'),
extra_info=extra_info) extra_info=extra_info)
elif result_type == 'playlist': elif result_type == 'url_transparent':
# Use the information from the embedding page
info = self.extract_info(
ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False)
def make_result(embedded_info):
new_result = ie_result.copy()
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
'entries', 'urlhandle', 'ie_key', 'duration',
'subtitles', 'annotations', 'format',
'thumbnail', 'thumbnails'):
if f in new_result:
del new_result[f]
if f in embedded_info:
new_result[f] = embedded_info[f]
return new_result
new_result = make_result(info)
assert new_result.get('_type') != 'url_transparent'
if new_result.get('_type') == 'compat_list':
new_result['entries'] = [
make_result(e) for e in new_result['entries']]
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type == 'playlist':
# We process each entry in the playlist # We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None) playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist) self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -969,7 +999,10 @@ class YoutubeDL(object):
proxy_map.update(handler.proxies) proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
def _setup_opener(self, timeout=20): def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')
timeout = 600 if timeout_val is None else float(timeout_val)
opts_cookiefile = self.params.get('cookiefile') opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy') opts_proxy = self.params.get('proxy')

View File

@@ -36,6 +36,7 @@ __authors__ = (
'Marcin Cieślak', 'Marcin Cieślak',
'Anton Larionov', 'Anton Larionov',
'Takuya Tsuchida', 'Takuya Tsuchida',
'Sergey M.',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@@ -80,11 +81,11 @@ from .PostProcessor import (
def parseOpts(overrideArguments=None): def parseOpts(overrideArguments=None):
def _readOptions(filename_bytes): def _readOptions(filename_bytes, default=[]):
try: try:
optionf = open(filename_bytes) optionf = open(filename_bytes)
except IOError: except IOError:
return [] # silently skip if file is not present return default # silently skip if file is not present
try: try:
res = [] res = []
for l in optionf: for l in optionf:
@@ -190,7 +191,9 @@ def parseOpts(overrideArguments=None):
general.add_option('--extractor-descriptions', general.add_option('--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions', action='store_true', dest='list_extractor_descriptions',
help='Output descriptions of all supported extractors', default=False) help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option(
'--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option( general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@@ -198,6 +201,9 @@ def parseOpts(overrideArguments=None):
general.add_option( general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir', '--no-cache-dir', action='store_const', const=None, dest='cachedir',
help='Disable filesystem caching') help='Disable filesystem caching')
general.add_option(
'--socket-timeout', dest='socket_timeout',
type=float, default=None, help=optparse.SUPPRESS_HELP)
selection.add_option('--playlist-start', selection.add_option('--playlist-start',
@@ -220,7 +226,7 @@ def parseOpts(overrideArguments=None):
default=None, type=int) default=None, type=int)
selection.add_option('--download-archive', metavar='FILE', selection.add_option('--download-archive', metavar='FILE',
dest='download_archive', dest='download_archive',
help='Download only videos not present in the archive file. Record all downloaded videos in it.') help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
@@ -235,7 +241,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format', video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT', default='best', action='store', dest='format', metavar='FORMAT', default='best',
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported') help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats', video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all') action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats', video_format.add_option('--prefer-free-formats',
@@ -317,7 +323,7 @@ def parseOpts(overrideArguments=None):
help='print downloaded pages to debug problems(very verbose)') help='print downloaded pages to debug problems(very verbose)')
verbosity.add_option('--write-pages', verbosity.add_option('--write-pages',
action='store_true', dest='write_pages', default=False, action='store_true', dest='write_pages', default=False,
help='Write downloaded pages to files in the current directory') help='Write downloaded intermediary pages to files in the current directory to debug problems')
verbosity.add_option('--youtube-print-sig-code', verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False, action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP) help=optparse.SUPPRESS_HELP)
@@ -415,6 +421,8 @@ def parseOpts(overrideArguments=None):
if opts.verbose: if opts.verbose:
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else: else:
systemConf = _readOptions('/etc/youtube-dl.conf')
xdg_config_home = os.environ.get('XDG_CONFIG_HOME') xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home: if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
@@ -424,8 +432,31 @@ def parseOpts(overrideArguments=None):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile): if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
systemConf = _readOptions('/etc/youtube-dl.conf') userConf = _readOptions(userConfFile, None)
userConf = _readOptions(userConfFile)
if userConf is None:
appdata_dir = os.environ.get('appdata')
if appdata_dir:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
default=None)
if userConf is None:
userConf = []
commandLineConf = sys.argv[1:] commandLineConf = sys.argv[1:]
argv = systemConf + userConf + commandLineConf argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv) opts, args = parser.parse_args(argv)
@@ -652,6 +683,7 @@ def _real_main(argv=None):
'cookiefile': opts.cookiefile, 'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate, 'nocheckcertificate': opts.no_check_certificate,
'proxy': opts.proxy, 'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
} }
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:

View File

@@ -21,6 +21,7 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cinemassacre import CinemassacreIE from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
from .cnn import CNNIE from .cnn import CNNIE
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
@@ -55,7 +56,7 @@ from .flickr import FlickrIE
from .francetv import ( from .francetv import (
PluzzIE, PluzzIE,
FranceTvInfoIE, FranceTvInfoIE,
France2IE, FranceTVIE,
GenerationQuoiIE GenerationQuoiIE
) )
from .freesound import FreesoundIE from .freesound import FreesoundIE
@@ -71,6 +72,7 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
from .ign import IGNIE, OneUPIE from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE
@@ -100,13 +102,16 @@ from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE from .niconico import NiconicoIE
from .ninegag import NineGagIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import ORFIE
from .pbs import PBSIE from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .podomatic import PodomaticIE
from .pornhub import PornHubIE from .pornhub import PornHubIE
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .pyvideo import PyvideoIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
@@ -118,6 +123,11 @@ from .rutube import RutubeIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE from .slashdot import SlashdotIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .smotri import (
SmotriIE,
SmotriCommunityIE,
SmotriUserIE,
)
from .sohu import SohuIE from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import ( from .southparkstudios import (
@@ -136,6 +146,7 @@ from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .theplatform import ThePlatformIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .toutv import TouTvIE from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
@@ -156,7 +167,13 @@ from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE from .vimeo import (
VimeoIE,
VimeoChannelIE,
VimeoUserIE,
VimeoAlbumIE,
VimeoGroupsIE,
)
from .vine import VineIE from .vine import VineIE
from .viki import VikiIE from .viki import VikiIE
from .vk import VKIE from .vk import VKIE
@@ -164,12 +181,17 @@ from .wat import WatIE
from .websurg import WeBSurgIE from .websurg import WeBSurgIE
from .weibo import WeiboIE from .weibo import WeiboIE
from .wimp import WimpIE from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE from .xhamster import XHamsterIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xvideos import XVideosIE from .xvideos import XVideosIE
from .xtube import XTubeIE from .xtube import XTubeIE
from .yahoo import YahooIE, YahooSearchIE from .yahoo import (
YahooIE,
YahooNewsIE,
YahooSearchIE,
)
from .youjizz import YouJizzIE from .youjizz import YouJizzIE
from .youku import YoukuIE from .youku import YoukuIE
from .youporn import YouPornIE from .youporn import YouPornIE

View File

@@ -13,7 +13,7 @@ from ..utils import (
class AddAnimeIE(InfoExtractor): class AddAnimeIE(InfoExtractor):
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)' _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'AddAnime' IE_NAME = u'AddAnime'
_TEST = { _TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',

View File

@@ -10,7 +10,7 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor): class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = { _TEST = {
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
u"playlist": [ u"playlist": [
@@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor):
}) })
formats = sorted(formats, key=lambda f: (f['height'], f['width'])) formats = sorted(formats, key=lambda f: (f['height'], f['width']))
info = { playlist.append({
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor):
'upload_date': upload_date, 'upload_date': upload_date,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'user_agent': 'QuickTime compatible (youtube-dl)', 'user_agent': 'QuickTime compatible (youtube-dl)',
} })
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['ext']
playlist.append(info)
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@@ -11,7 +11,7 @@ from ..utils import (
class ArchiveOrgIE(InfoExtractor): class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org' IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos' IE_DESC = 'archive.org videos'
_VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$' _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
_TEST = { _TEST = {
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
@@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
for f in formats: for f in formats:
f['ext'] = determine_ext(f['url']) f['ext'] = determine_ext(f['url'])
info = { return {
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
'upload_date': upload_date, 'upload_date': upload_date,
'thumbnail': data.get('misc', {}).get('image'),
} }
thumbnail = data.get('misc', {}).get('image')
if thumbnail:
info['thumbnail'] = thumbnail
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -17,8 +17,8 @@ from ..utils import (
# add tests. # add tests.
class ArteTvIE(InfoExtractor): class ArteTvIE(InfoExtractor):
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html' _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$' _LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv' IE_NAME = u'arte.tv'

View File

@@ -16,7 +16,7 @@ class AUEngineIE(InfoExtractor):
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]" u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
} }
} }
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@@ -54,7 +54,7 @@ class BambuserIE(InfoExtractor):
class BambuserChannelIE(InfoExtractor): class BambuserChannelIE(InfoExtractor):
IE_NAME = u'bambuser:channel' IE_NAME = u'bambuser:channel'
_VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)' _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request # The maximum number we can get with each request
_STEP = 50 _STEP = 50

View File

@@ -4,7 +4,7 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor): class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html' _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
_TEST = { _TEST = {
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',

View File

@@ -17,7 +17,8 @@ class ClipfishIE(InfoExtractor):
u'info_dict': { u'info_dict': {
u'title': u'FIFA 14 - E3 2013 Trailer', u'title': u'FIFA 14 - E3 2013 Trailer',
u'duration': 82, u'duration': 82,
} },
u'skip': 'Blocked in the US'
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -0,0 +1,52 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
)
class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
u'info_dict': {
u'id': u'4629301',
u'ext': u'mp4',
u'title': u'Brick Briscoe',
u'duration': 612,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, u'Downlaoding player')
# it includes a required token
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
playlist_page = self._download_webpage(
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
video_id, u'Downloading video info')
# Fix broken xml
playlist_page = re.sub('&', '&amp;', playlist_page)
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:
return node.attrib['value']
return {
'id': video_id,
'title': find_param('title'),
'url': track_doc.find('location').text,
'thumbnail': find_param('thumbnail'),
'duration': int(find_param('duration')),
}

View File

@@ -1,7 +1,7 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .mtv import MTVIE, _media_xml_tag from .mtv import MTVServicesInfoExtractor
from ..utils import ( from ..utils import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
@@ -11,8 +11,8 @@ from ..utils import (
) )
class ComedyCentralIE(MTVIE): class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/' _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = { _TEST = {
@@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE):
u'description': u'After a certain point, breastfeeding becomes c**kblocking.', u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
}, },
} }
# Overwrite MTVIE properties we don't want
_TESTS = []
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor):
}) })
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
info = { results.append({
'id': shortMediaId, 'id': shortMediaId,
'formats': formats, 'formats': formats,
'uploader': showId, 'uploader': showId,
@@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor):
'title': effTitle, 'title': effTitle,
'thumbnail': None, 'thumbnail': None,
'description': compat_str(officialTitle), 'description': compat_str(officialTitle),
} })
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
results.append(info)
return results return results

View File

@@ -55,6 +55,9 @@ class InfoExtractor(object):
subtitles: The subtitle file contents as a dictionary in the format subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}. {language: subtitles}.
view_count: How many users have watched the video on the platform. view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
comment_count: Number of comments on the video
urlhandle: [internal] The urlHandle to be used to download the file, urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen like returned by urllib.request.urlopen
age_limit: Age restriction for the video, as an integer (years) age_limit: Age restriction for the video, as an integer (years)
@@ -364,7 +367,8 @@ class InfoExtractor(object):
if display_name is None: if display_name is None:
display_name = name display_name = name
return self._html_search_regex( return self._html_search_regex(
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\']) r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name), [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=False) html, display_name, fatal=False)

View File

@@ -6,7 +6,7 @@ from ..utils import (
) )
class CSpanIE(InfoExtractor): class CSpanIE(InfoExtractor):
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' _VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
_TEST = { _TEST = {
u'url': u'http://www.c-spanvideo.org/program/HolderonV', u'url': u'http://www.c-spanvideo.org/program/HolderonV',
u'file': u'315139.flv', u'file': u'315139.flv',

View File

@@ -11,6 +11,7 @@ from ..utils import (
get_element_by_attribute, get_element_by_attribute,
get_element_by_id, get_element_by_id,
orderedSet, orderedSet,
str_to_int,
ExtractorError, ExtractorError,
) )
@@ -146,6 +147,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
self._list_available_subtitles(video_id, webpage) self._list_available_subtitles(video_id, webpage)
return return
view_count = str_to_int(self._search_regex(
r'video_views_value[^>]+>([\d\.,]+)<', webpage, u'view count'))
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
@@ -155,6 +159,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'subtitles': video_subtitles, 'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url'], 'thumbnail': info['thumbnail_url'],
'age_limit': age_limit, 'age_limit': age_limit,
'view_count': view_count,
} }
def _get_available_subtitles(self, video_id, webpage): def _get_available_subtitles(self, video_id, webpage):

View File

@@ -28,7 +28,8 @@ class DaumIE(InfoExtractor):
video_id = mobj.group(1) video_id = mobj.group(1)
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id) webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', full_id = self._search_regex(
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
webpage, u'full id') webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id}) query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml( info = self._download_xml(
@@ -56,7 +57,7 @@ class DaumIE(InfoExtractor):
'format_id': profile, 'format_id': profile,
}) })
info = { return {
'id': video_id, 'id': video_id,
'title': info.find('TITLE').text, 'title': info.find('TITLE').text,
'formats': formats, 'formats': formats,
@@ -65,6 +66,3 @@ class DaumIE(InfoExtractor):
'duration': int(info.find('DURATION').text), 'duration': int(info.find('DURATION').text),
'upload_date': info.find('REGDTTM').text[:8], 'upload_date': info.find('REGDTTM').text[:8],
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -11,7 +11,7 @@ from ..utils import (
class DreiSatIE(InfoExtractor): class DreiSatIE(InfoExtractor):
IE_NAME = '3sat' IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = { _TEST = {
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
u'file': u'36983.webm', u'file': u'36983.webm',
@@ -65,7 +65,7 @@ class DreiSatIE(InfoExtractor):
return (qidx, prefer_http, format['video_bitrate']) return (qidx, prefer_http, format['video_bitrate'])
formats.sort(key=_sortkey) formats.sort(key=_sortkey)
info = { return {
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
@@ -76,8 +76,3 @@ class DreiSatIE(InfoExtractor):
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': upload_date, 'upload_date': upload_date,
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -10,7 +10,7 @@ from ..utils import (
class EightTracksIE(InfoExtractor): class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks' IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
_TEST = { _TEST = {
u"name": u"EightTracks", u"name": u"EightTracks",
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a", u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",

View File

@@ -8,7 +8,7 @@ class ExfmIE(InfoExtractor):
IE_NAME = u'exfm' IE_NAME = u'exfm'
IE_DESC = u'ex.fm' IE_DESC = u'ex.fm'
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)' _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
_TESTS = [ _TESTS = [
{ {
u'url': u'http://ex.fm/song/eh359', u'url': u'http://ex.fm/song/eh359',

View File

@@ -9,7 +9,7 @@ from ..utils import (
class FazIE(InfoExtractor): class FazIE(InfoExtractor):
IE_NAME = u'faz.net' IE_NAME = u'faz.net'
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html' _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
_TEST = { _TEST = {
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
@@ -44,13 +44,10 @@ class FazIE(InfoExtractor):
}) })
descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
info = { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'formats': formats, 'formats': formats,
'description': descr, 'description': descr,
'thumbnail': config.find('STILL/STILL_BIG').text, 'thumbnail': config.find('STILL/STILL_BIG').text,
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -12,7 +12,7 @@ from ..utils import (
class FKTVIE(InfoExtractor): class FKTVIE(InfoExtractor):
IE_NAME = u'fernsehkritik.tv' IE_NAME = u'fernsehkritik.tv'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?' _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
_TEST = { _TEST = {
u'url': u'http://fernsehkritik.tv/folge-1', u'url': u'http://fernsehkritik.tv/folge-1',
@@ -52,7 +52,7 @@ class FKTVIE(InfoExtractor):
class FKTVPosteckeIE(InfoExtractor): class FKTVPosteckeIE(InfoExtractor):
IE_NAME = u'fernsehkritik.tv:postecke' IE_NAME = u'fernsehkritik.tv:postecke'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)' _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
_TEST = { _TEST = {
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
u'file': u'0120.flv', u'file': u'0120.flv',

View File

@@ -21,7 +21,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
thumbnail_path = info.find('image').text thumbnail_path = info.find('image').text
return {'id': video_id, return {'id': video_id,
'ext': 'mp4', 'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
'url': video_url, 'url': video_url,
'title': info.find('titre').text, 'title': info.find('titre').text,
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
@@ -45,7 +45,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = u'francetvinfo.fr' IE_NAME = u'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html' _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
_TEST = { _TEST = {
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@@ -66,35 +66,101 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
return self._extract_video(video_id) return self._extract_video(video_id)
class France2IE(FranceTVBaseInfoExtractor): class FranceTVIE(FranceTVBaseInfoExtractor):
IE_NAME = u'france2.fr' IE_NAME = u'francetv'
_VALID_URL = r'''(?x)https?://www\.france2\.fr/ IE_DESC = u'France 2, 3, 4, 5 and Ô'
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
(?: (?:
emissions/.*?/videos/(?P<id>\d+) emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
| emission/(?P<key>[^/?]+) | (emissions?|jt)/(?P<key>[^/?]+)
)''' )'''
_TEST = { _TESTS = [
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', # france2
u'file': u'75540104.mp4', {
u'info_dict': { u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
u'title': u'13h15, le samedi...', u'file': u'75540104.mp4',
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d', u'info_dict': {
u'title': u'13h15, le samedi...',
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
},
u'params': {
# m3u8 download
u'skip_download': True,
},
}, },
u'params': { # france3
u'skip_download': True, {
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
u'info_dict': {
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
u'ext': u'flv',
u'title': u'Le scandale du prix des médicaments',
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}, },
} # france4
{
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
u'info_dict': {
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
u'ext': u'flv',
u'title': u'Hero Corp Making of - Extrait 1',
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
},
u'params': {
# rtmp download
u'skip_download': True,
},
},
# france5
{
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
u'info_dict': {
u'id': u'92837968',
u'ext': u'mp4',
u'title': u'C à dire ?!',
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
},
u'params': {
# m3u8 download
u'skip_download': True,
},
},
# franceo
{
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
u'info_dict': {
u'id': u'92327925',
u'ext': u'mp4',
u'title': u'Infô-Afrique',
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
},
u'params': {
# m3u8 download
u'skip_download': True,
},
u'skip': u'The id changes frequently',
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj.group('key'): if mobj.group('key'):
webpage = self._download_webpage(url, mobj.group('key')) webpage = self._download_webpage(url, mobj.group('key'))
video_id = self._html_search_regex( id_res = [
r'''(?x)<div\s+class="video-player">\s* (r'''(?x)<div\s+class="video-player">\s*
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+ <a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
class="francetv-video-player">''', class="francetv-video-player">'''),
webpage, u'video ID') (r'<a id="player_direct" href="http://info\.francetelevisions'
'\.fr/\?id-video=([^"/&]+)'),
(r'<a class="video" id="ftv_player_(.+?)"'),
]
video_id = self._html_search_regex(id_res, webpage, u'video ID')
else: else:
video_id = mobj.group('id') video_id = mobj.group('id')
return self._extract_video(video_id) return self._extract_video(video_id)

View File

@@ -4,7 +4,7 @@ from .common import InfoExtractor
class GamekingsIE(InfoExtractor): class GamekingsIE(InfoExtractor):
_VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_TEST = { _TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4', u'file': u'20130811.mp4',

View File

@@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
'format_id': q, 'format_id': q,
}) })
info = { return {
'id': data_video['guid'], 'id': data_video['guid'],
'title': compat_urllib_parse.unquote(data_video['title']), 'title': compat_urllib_parse.unquote(data_video['title']),
'formats': formats, 'formats': formats,
'description': get_meta_content('description', webpage), 'description': get_meta_content('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -1,13 +1,10 @@
import re import re
from .mtv import MTVIE, _media_xml_tag from .mtv import MTVServicesInfoExtractor
class GametrailersIE(MTVIE):
""" class GametrailersIE(MTVServicesInfoExtractor):
Gametrailers use the same videos system as MTVIE, it just changes the feed _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
url, where the uri is and the method to get the thumbnails.
"""
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
_TEST = { _TEST = {
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4', u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
@@ -17,15 +14,9 @@ class GametrailersIE(MTVIE):
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
}, },
} }
# Overwrite MTVIE properties we don't want
_TESTS = []
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss' _FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')

View File

@@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
# Site Name | Video Title # Site Name | Video Title
# Video Title - Tagline | Site Name # Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical # and so on and so forth; it's just not practical
video_title = self._html_search_regex(r'<title>(.*)</title>', video_title = self._html_search_regex(
webpage, u'video title', default=u'video', flags=re.DOTALL) r'(?s)<title>(.*?)</title>', webpage, u'video title',
default=u'video')
# video uploader is domain name
video_uploader = self._search_regex(
r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
# Look for BrightCove: # Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage) bc_url = BrightcoveIE._extract_brightcove_url(webpage)
@@ -188,13 +193,35 @@ class GenericIE(InfoExtractor):
# Look for embedded YouTube player # Look for embedded YouTube player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
if matches: if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches] for tuppl in matches]
return self.playlist_result( return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title) urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for embedded Dailymotion player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for embedded Wistia player
match = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
if match:
return {
'_type': 'url_transparent',
'url': unescapeHTML(match.group('url')),
'ie_key': 'Wistia',
'uploader': video_uploader,
'title': video_title,
'id': video_id,
}
# Look for Bandcamp pages with custom domain # Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None: if mobj is not None:
@@ -238,14 +265,9 @@ class GenericIE(InfoExtractor):
# here's a fun little line of code for you: # here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0] video_id = os.path.splitext(video_id)[0]
# video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
url, u'video uploader')
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': None,
'title': video_title, 'title': video_title,
} }

View File

@@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
u'file': u'1435540.mp3', u'file': u'1435540.mp3',
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
u'info_dict': { u'info_dict': {
u"title": u"Freddie Gibbs - Lay It Down" u"title": u'Freddie Gibbs "Lay It Down"'
} }
} }

View File

@@ -103,7 +103,7 @@ class IGNIE(InfoExtractor):
class OneUPIE(IGNIE): class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system.""" """Extractor for 1up.com, it uses the ign videos system."""
_VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)' _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
IE_NAME = '1up.com' IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

View File

@@ -0,0 +1,57 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_attribute,
)
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
u'info_dict': {
u'id': u'2524815897',
u'ext': u'mp4',
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url,video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
flags=re.MULTILINE)
formats = []
for f_id, f_path in available_formats:
f_path = f_path.strip()
format_page = self._download_webpage(
compat_urlparse.urljoin(url, f_path),
u'Downloading info for %s format' % f_id)
json_data = self._search_regex(
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
format_page, u'json data', flags=re.DOTALL)
info = json.loads(json_data)
format_info = info['videoPlayerObject']['video']
formats.append({
'format_id': f_id,
'url': format_info['url'],
})
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': format_info['slate'],
}

View File

@@ -3,7 +3,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
class InstagramIE(InfoExtractor): class InstagramIE(InfoExtractor):
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/' _VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
_TEST = { _TEST = {
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc', u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
u'file': u'aye83DjauH.mp4', u'file': u'aye83DjauH.mp4',

View File

@@ -8,7 +8,7 @@ from ..utils import (
) )
class JukeboxIE(InfoExtractor): class JukeboxIE(InfoExtractor):
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html' _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>' _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"' _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>' _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'

View File

@@ -8,7 +8,7 @@ from ..utils import (
class LiveLeakIE(InfoExtractor): class LiveLeakIE(InfoExtractor):
_VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)' _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'liveleak' IE_NAME = u'liveleak'
_TEST = { _TEST = {
u'url': u'http://www.liveleak.com/view?i=757_1364311680', u'url': u'http://www.liveleak.com/view?i=757_1364311680',

View File

@@ -11,7 +11,7 @@ from ..utils import (
class LivestreamIE(InfoExtractor): class LivestreamIE(InfoExtractor):
IE_NAME = u'livestream' IE_NAME = u'livestream'
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$' _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = { _TEST = {
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
u'file': u'4719370.mp4', u'file': u'4719370.mp4',

View File

@@ -69,6 +69,21 @@ class MetacafeIE(InfoExtractor):
u'age_limit': 18, u'age_limit': 18,
}, },
}, },
# cbs video
{
u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
u'info_dict': {
u'id': u'0rOxMBabDXN6',
u'ext': u'flv',
u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
u'duration': 129,
},
u'params': {
# rtmp download
u'skip_download': True,
},
},
] ]
@@ -106,10 +121,16 @@ class MetacafeIE(InfoExtractor):
video_id = mobj.group(1) video_id = mobj.group(1)
# Check if video comes from YouTube # the video may come from an external site
mobj2 = re.match(r'^yt-(.*)$', video_id) m_external = re.match('^(\w{2})-(.*)$', video_id)
if mobj2 is not None: if m_external is not None:
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')] prefix, ext_id = m_external.groups()
# Check if video comes from YouTube
if prefix == 'yt':
return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
# CBS videos use theplatform.com
if prefix == 'cb':
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)

View File

@@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor):
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
webpage, u'description', flags=re.DOTALL) webpage, u'description', flags=re.DOTALL)
info = { return {
'id': video_id, 'id': video_id,
'title': clip.find('title').text, 'title': clip.find('title').text,
'formats': formats, 'formats': formats,
'description': description, 'description': description,
'duration': int(clip.find('duration').text), 'duration': int(clip.find('duration').text),
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -10,35 +10,8 @@ from ..utils import (
def _media_xml_tag(tag): def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag return '{http://search.yahoo.com/mrss/}%s' % tag
class MTVIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
},
]
class MTVServicesInfoExtractor(InfoExtractor):
@staticmethod @staticmethod
def _id_from_uri(uri): def _id_from_uri(uri):
return uri.split(':')[-1] return uri.split(':')[-1]
@@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
return base + m.group('finalid') return base + m.group('finalid')
def _get_thumbnail_url(self, uri, itemdoc): def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _extract_video_formats(self, metadataXml): def _extract_video_formats(self, metadataXml):
if '/error_country_block.swf' in metadataXml: if '/error_country_block.swf' in metadataXml:
@@ -93,7 +71,7 @@ class MTVIE(InfoExtractor):
else: else:
description = None description = None
info = { return {
'title': itemdoc.find('title').text, 'title': itemdoc.find('title').text,
'formats': self._extract_video_formats(mediagen_page), 'formats': self._extract_video_formats(mediagen_page),
'id': video_id, 'id': video_id,
@@ -101,11 +79,6 @@ class MTVIE(InfoExtractor):
'description': description, 'description': description,
} }
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
return info
def _get_videos_info(self, uri): def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri) video_id = self._id_from_uri(uri)
data = compat_urllib_parse.urlencode({'uri': uri}) data = compat_urllib_parse.urlencode({'uri': uri})
@@ -113,6 +86,39 @@ class MTVIE(InfoExtractor):
u'Downloading info') u'Downloading info')
return [self._get_video_info(item) for item in idoc.findall('.//item')] return [self._get_video_info(item) for item in idoc.findall('.//item')]
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
},
]
def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')

View File

@@ -9,7 +9,7 @@ from ..utils import (
class MuzuTVIE(InfoExtractor): class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)' _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = u'muzu.tv' IE_NAME = u'muzu.tv'
_TEST = { _TEST = {

View File

@@ -9,7 +9,7 @@ from ..utils import (
class MySpassIE(InfoExtractor): class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www.myspass.de/.*' _VALID_URL = r'http://www\.myspass\.de/.*'
_TEST = { _TEST = {
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
u'file': u'11741.mp4', u'file': u'11741.mp4',

View File

@@ -56,7 +56,7 @@ class NaverIE(InfoExtractor):
'height': int(format_el.find('height').text), 'height': int(format_el.find('height').text),
}) })
info = { return {
'id': video_id, 'id': video_id,
'title': info.find('Subject').text, 'title': info.find('Subject').text,
'formats': formats, 'formats': formats,
@@ -65,6 +65,3 @@ class NaverIE(InfoExtractor):
'upload_date': info.find('WriteDate').text.replace('.', ''), 'upload_date': info.find('WriteDate').text.replace('.', ''),
'view_count': int(info.find('PlayCount').text), 'view_count': int(info.find('PlayCount').text),
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -0,0 +1,43 @@
import json
import re
from .common import InfoExtractor
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
_TEST = {
u"url": u"http://9gag.tv/v/1912",
u"file": u"1912.mp4",
u"info_dict": {
u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
},
u'add_ie': [u'Youtube']
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
data_json = self._html_search_regex(r'''(?x)
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
data-video-meta="([^"]+)"''', webpage, u'video metadata')
data = json.loads(data_json)
return {
'_type': 'url_transparent',
'url': data['youtubeVideoId'],
'ie_key': 'Youtube',
'id': video_id,
'title': data['title'],
'description': data['description'],
'view_count': int(data['view_count']),
'like_count': int(data['statistic']['like']),
'dislike_count': int(data['statistic']['dislike']),
'thumbnail': data['thumbnail_url'],
}

View File

@@ -12,7 +12,7 @@ from ..utils import (
) )
class ORFIE(InfoExtractor): class ORFIE(InfoExtractor):
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' _VALID_URL = r'https?://tvthek\.orf\.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@@ -5,7 +5,7 @@ from .common import InfoExtractor
class PBSIE(InfoExtractor): class PBSIE(InfoExtractor):
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?' _VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
_TEST = { _TEST = {
u'url': u'http://video.pbs.org/video/2365006249/', u'url': u'http://video.pbs.org/video/2365006249/',

View File

@@ -0,0 +1,49 @@
import json
import re
from .common import InfoExtractor
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
_TEST = {
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
u"file": u"2009-01-02T16_03_35-08_00.mp3",
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
u"info_dict": {
u"uploader": u"Science Teaching Tips",
u"uploader_id": u"scienceteachingtips",
u"title": u"64. When the Moon Hits Your Eye",
u"duration": 446,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
channel = mobj.group('channel')
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
'?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id))
data_json = self._download_webpage(
json_url, video_id, note=u'Downloading video info')
data = json.loads(data_json)
video_url = data['downloadLink']
uploader = data['podcast']
title = data['title']
thumbnail = data['imageLocation']
duration = int(data['length'] / 1000.0)
return {
'id': video_id,
'url': video_url,
'title': title,
'uploader': uploader,
'uploader_id': channel,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -0,0 +1,51 @@
import re
import os
from .common import InfoExtractor
class PyvideoIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
_TESTS = [{
u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
u'file': u'24_4WWkSmNo.mp4',
u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
u'info_dict': {
u"title": u"Become a logging expert in 30 minutes",
u"description": u"md5:9665350d466c67fb5b1598de379021f7",
u"upload_date": u"20130320",
u"uploader": u"NextDayVideo",
u"uploader_id": u"NextDayVideo",
},
u'add_ie': ['Youtube'],
},
{
u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
u'info_dict': {
u'id': u'2542',
u'ext': u'm4v',
u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
if m_youtube is not None:
return self.url_result(m_youtube.group(1), 'Youtube')
title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
webpage, u'title', flags=re.DOTALL)
video_url = self._search_regex([r'<source src="(.*?)"',
r'<dt>Download</dt>.*?<a href="(.+?)"'],
webpage, u'video url', flags=re.DOTALL)
return {
'id': video_id,
'title': os.path.splitext(title)[0],
'url': video_url,
}

View File

@@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor):
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL') r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, u'title') webpage, u'title')
# No self-labeling, but they describe themselves as # No self-labeling, but they describe themselves as

View File

@@ -11,7 +11,7 @@ from ..utils import (
class RutubeIE(InfoExtractor): class RutubeIE(InfoExtractor):
_VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)' _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'
_TEST = { _TEST = {
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',

View File

@@ -4,7 +4,7 @@ from .common import InfoExtractor
class SlashdotIE(InfoExtractor): class SlashdotIE(InfoExtractor):
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)' _VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = { _TEST = {
u'add_ie': ['Ooyala'], u'add_ie': ['Ooyala'],

View File

@@ -0,0 +1,251 @@
# encoding: utf-8
import re
import json
import hashlib
from .common import InfoExtractor
from ..utils import (
ExtractorError
)
class SmotriIE(InfoExtractor):
IE_DESC = u'Smotri.com'
IE_NAME = u'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
_TESTS = [
# real video id 2610366
{
u'url': u'http://smotri.com/video/view/?id=v261036632ab',
u'file': u'v261036632ab.mp4',
u'md5': u'2a7b08249e6f5636557579c368040eb9',
u'info_dict': {
u'title': u'катастрофа с камер видеонаблюдения',
u'uploader': u'rbc2008',
u'uploader_id': u'rbc08',
u'upload_date': u'20131118',
u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
# real video id 57591
{
u'url': u'http://smotri.com/video/view/?id=v57591cb20',
u'file': u'v57591cb20.flv',
u'md5': u'830266dfc21f077eac5afd1883091bcd',
u'info_dict': {
u'title': u'test',
u'uploader': u'Support Photofile@photofile',
u'uploader_id': u'support-photofile',
u'upload_date': u'20070704',
u'description': u'test, видео test',
u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
# video-password
{
u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
u'file': u'v1390466a13c.mp4',
u'md5': u'f6331cef33cad65a0815ee482a54440b',
u'info_dict': {
u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
u'uploader': u'timoxa40',
u'uploader_id': u'timoxa40',
u'upload_date': u'20100404',
u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
},
u'params': {
u'videopassword': u'qwerty',
},
},
# age limit + video-password
{
u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
u'file': u'v15408898bcf.flv',
u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
u'info_dict': {
u'title': u'этот ролик не покажут по ТВ',
u'uploader': u'zzxxx',
u'uploader_id': u'ueggb',
u'upload_date': u'20101001',
u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
u'age_limit': 18,
u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
},
u'params': {
u'videopassword': u'333'
}
}
]
_SUCCESS = 0
_PASSWORD_NOT_VERIFIED = 1
_PASSWORD_DETECTED = 2
_VIDEO_NOT_FOUND = 3
def _search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
return self._html_search_regex(
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
html, display_name, fatal=False)
return self._html_search_meta(name, html, display_name)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
real_video_id = mobj.group('realvideoid')
# Download video JSON data
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._VIDEO_NOT_FOUND:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
# video-password set
video_password = self._downloader.params.get('videopassword', None)
if not video_password:
raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._PASSWORD_NOT_VERIFIED:
raise ExtractorError(u'Video password is invalid', expected=True)
if status != self._SUCCESS:
raise ExtractorError(u'Unexpected status value %s' % status)
# Extract the URL of the video
video_url = video_json['file_data']
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
# Adult content
if re.search(u'EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
video_page, u'confirm string')
confirm_url = video_page_url + '&confirm=%s' % confirm_string
video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
# Extract the rest of meta data
video_title = self._search_meta(u'name', video_page, u'title')
if not video_title:
video_title = video_url.rsplit('/', 1)[-1]
video_description = self._search_meta(u'description', video_page)
END_TEXT = u' на сайте Smotri.com'
if video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = u'Смотреть онлайн ролик '
if video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta(u'thumbnail', video_page)
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
)
if upload_date_m else None
)
duration_str = self._search_meta(u'duration', video_page)
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m else None
)
video_uploader = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_uploader_id = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_view_count = self._html_search_regex(
u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'video_duration': video_duration,
'view_count': video_view_count,
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
}
class SmotriCommunityIE(InfoExtractor):
IE_DESC = u'Smotri.com community videos'
IE_NAME = u'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
community_id = mobj.group('communityid')
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
rss = self._download_xml(url, community_id, u'Downloading community RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
community_title = self._html_search_regex(
u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
return self.playlist_result(entries, community_id, community_title)
class SmotriUserIE(InfoExtractor):
IE_DESC = u'Smotri.com user videos'
IE_NAME = u'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('userid')
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
rss = self._download_xml(url, user_id, u'Downloading user RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text
user_nickname = self._html_search_regex(
u'^Видео режиссера (.*)$', description_text,
u'user nickname')
return self.playlist_result(entries, user_id, user_nickname)

View File

@@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''^(?:https?://)? _VALID_URL = r'''^(?:https?://)?
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$) (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|(?P<widget>w.soundcloud.com/player/?.*?url=.*) |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
) )
''' '''
IE_NAME = u'soundcloud' IE_NAME = u'soundcloud'
@@ -217,7 +217,7 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE):
_VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$' _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
IE_NAME = u'soundcloud:user' IE_NAME = u'soundcloud:user'
# it's in tests/test_playlists.py # it's in tests/test_playlists.py

View File

@@ -1,15 +1,14 @@
import re import re
from .mtv import MTVIE, _media_xml_tag from .mtv import MTVServicesInfoExtractor
class SouthParkStudiosIE(MTVIE): class SouthParkStudiosIE(MTVServicesInfoExtractor):
IE_NAME = u'southparkstudios.com' IE_NAME = u'southparkstudios.com'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
# Overwrite MTVIE properties we don't want
_TESTS = [{ _TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
@@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
}, },
}] }]
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
url = u'http://www.' + mobj.group(u'url') url = u'http://www.' + mobj.group(u'url')

View File

@@ -6,7 +6,7 @@ from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor): class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html' _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = { _TEST = {
u'add_ie': ['Brightcove'], u'add_ie': ['Brightcove'],
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html', u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',

View File

@@ -18,7 +18,7 @@ from ..utils import (
class StanfordOpenClassroomIE(InfoExtractor): class StanfordOpenClassroomIE(InfoExtractor):
IE_NAME = u'stanfordoc' IE_NAME = u'stanfordoc'
IE_DESC = u'Stanford Open ClassRoom' IE_DESC = u'Stanford Open ClassRoom'
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' _VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
_TEST = { _TEST = {
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100', u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
u'file': u'PracticalUnix_intro-environment.mp4', u'file': u'PracticalUnix_intro-environment.mp4',

View File

@@ -7,7 +7,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor): class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player.""" """TF1 uses the wat.tv player."""
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html' _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
_TEST = { _TEST = {
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
u'file': u'10635995.mp4', u'file': u'10635995.mp4',

View File

@@ -0,0 +1,68 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
xpath_with_ns,
)
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
class ThePlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'
_TEST = {
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
u'info_dict': {
u'id': u'e9I_cZgTgIPd',
u'ext': u'flv',
u'title': u'Blackberry\'s big, bold Z30',
u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
u'duration': 247,
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _get_info(self, video_id):
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
meta = self._download_xml(smil_url, video_id)
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)
head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body'))
base_url = head.find(_x('smil:meta')).attrib['base']
switch = body.find(_x('smil:switch'))
formats = []
for f in switch.findall(_x('smil:video')):
attr = f.attrib
formats.append({
'url': base_url,
'play_path': 'mp4:' + attr['src'],
'ext': 'flv',
'width': int(attr['width']),
'height': int(attr['height']),
'vbr': int(attr['system-bitrate']),
})
formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
return {
'id': video_id,
'title': info['title'],
'formats': formats,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
'duration': info['duration']//1000,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
return self._get_info(video_id)

View File

@@ -55,7 +55,7 @@ class TriluliluIE(InfoExtractor):
for fnode in format_doc.findall('./formats/format') for fnode in format_doc.findall('./formats/format')
] ]
info = { return {
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
@@ -64,7 +64,3 @@ class TriluliluIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -3,7 +3,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
class UnistraIE(InfoExtractor): class UnistraIE(InfoExtractor):
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)' _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
_TEST = { _TEST = {
u'url': u'http://utv.unistra.fr/video.php?id_video=154', u'url': u'http://utv.unistra.fr/video.php?id_video=154',

View File

@@ -9,7 +9,7 @@ from ..utils import (
) )
class VeeHDIE(InfoExtractor): class VeeHDIE(InfoExtractor):
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)' _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
_TEST = { _TEST = {
u'url': u'http://veehd.com/video/4686958', u'url': u'http://veehd.com/video/4686958',

View File

@@ -15,7 +15,7 @@ class VevoIE(InfoExtractor):
Accepts urls from vevo.com or in the format 'vevo:{id}' Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE) (currently used by MTVIE)
""" """
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)' _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
_TESTS = [{ _TESTS = [{
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4', u'file': u'GB1101300280.mp4',
@@ -24,7 +24,7 @@ class VevoIE(InfoExtractor):
u"upload_date": u"20130624", u"upload_date": u"20130624",
u"uploader": u"Hurts", u"uploader": u"Hurts",
u"title": u"Somebody to Die For", u"title": u"Somebody to Die For",
u"duration": 230, u"duration": 230.12,
u"width": 1920, u"width": 1920,
u"height": 1080, u"height": 1080,
} }

View File

@@ -6,7 +6,7 @@ from ..utils import ExtractorError
class ViceIE(InfoExtractor): class ViceIE(InfoExtractor):
_VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)' _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
_TEST = { _TEST = {
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',

View File

@@ -2,13 +2,10 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class ViddlerIE(InfoExtractor): class ViddlerIE(InfoExtractor):
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)' _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
_TEST = { _TEST = {
u"url": u"http://www.viddler.com/v/43903784", u"url": u"http://www.viddler.com/v/43903784",
u'file': u'43903784.mp4', u'file': u'43903784.mp4',
@@ -47,7 +44,7 @@ class ViddlerIE(InfoExtractor):
r"thumbnail\s*:\s*'([^']*)'", r"thumbnail\s*:\s*'([^']*)'",
webpage, u'thumbnail', fatal=False) webpage, u'thumbnail', fatal=False)
info = { return {
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@@ -56,9 +53,3 @@ class ViddlerIE(InfoExtractor):
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
} }
# TODO: Remove when #980 has been merged
info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
info.update(info['formats'][-1])
return info

View File

@@ -7,7 +7,7 @@ from ..utils import (
) )
class VideofyMeIE(InfoExtractor): class VideofyMeIE(InfoExtractor):
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)' _VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
IE_NAME = u'videofy.me' IE_NAME = u'videofy.me'
_TEST = { _TEST = {

View File

@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$' _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_NETRC_MACHINE = 'vimeo' _NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo' IE_NAME = u'vimeo'
_TESTS = [ _TESTS = [
@@ -196,6 +196,16 @@ class VimeoIE(InfoExtractor):
if mobj is not None: if mobj is not None:
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
try:
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, u'view count'))
like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, u'like count'))
comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, u'comment count'))
except RegexNotFoundError:
# This info is only available in vimeo.com/{id} urls
view_count = None
like_count = None
comment_count = None
# Vimeo specific: extract request signature and timestamp # Vimeo specific: extract request signature and timestamp
sig = config['request']['signature'] sig = config['request']['signature']
timestamp = config['request']['timestamp'] timestamp = config['request']['timestamp']
@@ -242,6 +252,9 @@ class VimeoIE(InfoExtractor):
'description': video_description, 'description': video_description,
'formats': formats, 'formats': formats,
'webpage_url': url, 'webpage_url': url,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
} }
@@ -249,25 +262,77 @@ class VimeoChannelIE(InfoExtractor):
IE_NAME = u'vimeo:channel' IE_NAME = u'vimeo:channel'
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)' _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"' _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
def _real_extract(self, url): def _page_url(self, base_url, pagenum):
mobj = re.match(self._VALID_URL, url) return '%s/videos/page:%d/' % (base_url, pagenum)
channel_id = mobj.group('id')
def _extract_list_title(self, webpage):
return self._html_search_regex(self._TITLE_RE, webpage, u'list title')
def _extract_videos(self, list_id, base_url):
video_ids = [] video_ids = []
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum), webpage = self._download_webpage(
channel_id, u'Downloading page %s' % pagenum) self._page_url(base_url, pagenum) ,list_id,
u'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break break
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids] for video_id in video_ids]
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
webpage, u'channel title')
return {'_type': 'playlist', return {'_type': 'playlist',
'id': channel_id, 'id': list_id,
'title': channel_title, 'title': self._extract_list_title(webpage),
'entries': entries, 'entries': entries,
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
class VimeoUserIE(VimeoChannelIE):
IE_NAME = u'vimeo:user'
_VALID_URL = r'(?:https?://)?vimeo.\com/(?P<name>[^/]+)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
@classmethod
def suitable(cls, url):
if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url):
return False
return super(VimeoUserIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
return self._extract_videos(name, 'http://vimeo.com/%s' % name)
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = u'vimeo:album'
_VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P<id>\d+)'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
def _page_url(self, base_url, pagenum):
return '%s/page:%d/' % (base_url, pagenum)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
album_id = mobj.group('id')
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = u'vimeo:group'
_VALID_URL = r'(?:https?://)?vimeo.\com/groups/(?P<name>[^/]+)'
def _extract_list_title(self, webpage):
return self._og_search_title(webpage)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)

View File

@@ -11,7 +11,7 @@ from ..utils import (
class WatIE(InfoExtractor): class WatIE(InfoExtractor):
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html' _VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
IE_NAME = 'wat.tv' IE_NAME = 'wat.tv'
_TEST = { _TEST = {
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html', u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',

View File

@@ -11,7 +11,8 @@ class WimpIE(InfoExtractor):
u'file': u'deerfence.flv', u'file': u'deerfence.flv',
u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5', u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',
u'info_dict': { u'info_dict': {
u"title": u"Watch Till End: Herd of deer jump over a fence." u"title": u"Watch Till End: Herd of deer jump over a fence.",
u"description": u"These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
} }
} }
@@ -19,18 +20,15 @@ class WimpIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1) video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title') title = self._html_search_meta('description', webpage, u'video title')
thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url') googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
googleString = base64.b64decode(googleString).decode('ascii') googleString = base64.b64decode(googleString).decode('ascii')
final_url = self._search_regex('","(.*?)"', googleString,'final video url') final_url = self._search_regex('","(.*?)"', googleString,'final video url')
ext = final_url.rpartition(u'.')[2]
return [{
'id': video_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
}]
return {
'id': video_id,
'url': final_url,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
}

View File

@@ -0,0 +1,55 @@
import json
import re
from .common import InfoExtractor
class WistiaIE(InfoExtractor):
_VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
_TEST = {
u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
u"file": u"sh7fpupwlt.mov",
u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
u"info_dict": {
u"title": u"cfh_resourceful_zdkh_final_1"
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
data_json = self._html_search_regex(
r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
data = json.loads(data_json)
formats = []
thumbnails = []
for atype, a in data['assets'].items():
if atype == 'still':
thumbnails.append({
'url': a['url'],
'resolution': '%dx%d' % (a['width'], a['height']),
})
continue
if atype == 'preview':
continue
formats.append({
'format_id': atype,
'url': a['url'],
'width': a['width'],
'height': a['height'],
'filesize': a['size'],
'ext': a['ext'],
})
formats.sort(key=lambda a: a['filesize'])
return {
'id': video_id,
'title': data['name'],
'formats': formats,
'thumbnails': thumbnails,
}

View File

@@ -46,7 +46,7 @@ class XHamsterIE(InfoExtractor):
return mobj.group('server')+'/key='+mobj.group('file') return mobj.group('server')+'/key='+mobj.group('file')
def is_hd(webpage): def is_hd(webpage):
return webpage.find('<div class=\'icon iconHD\'>') != -1 return webpage.find('<div class=\'icon iconHD\'') != -1
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@@ -17,27 +17,21 @@ class YahooIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
u'file': u'214727115.flv', u'file': u'214727115.mp4',
u'md5': u'4962b075c08be8690a922ee026d05e69',
u'info_dict': { u'info_dict': {
u'title': u'Julian Smith & Travis Legg Watch Julian Smith', u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
u'description': u'Julian and Travis watch Julian Smith', u'description': u'Julian and Travis watch Julian Smith',
}, },
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
}, },
{ {
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
u'file': u'103000935.flv', u'file': u'103000935.mp4',
u'md5': u'd6e6fc6e1313c608f316ddad7b82b306',
u'info_dict': { u'info_dict': {
u'title': u'Codefellas - The Cougar Lies with Spanish Moss', u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
}, },
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
}, },
] ]
@@ -53,8 +47,12 @@ class YahooIE(InfoExtractor):
# The 'meta' field is not always in the video webpage, we request it # The 'meta' field is not always in the video webpage, we request it
# from another page # from another page
long_id = info['id'] long_id = info['id']
return self._get_info(long_id, video_id)
def _get_info(self, long_id, video_id):
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id) ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
' AND protocol="http"' % long_id)
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'q': query, 'q': query,
'env': 'prod', 'env': 'prod',
@@ -100,6 +98,32 @@ class YahooIE(InfoExtractor):
} }
class YahooNewsIE(YahooIE):
IE_NAME = 'yahoo:news'
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
_TEST = {
u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
u'md5': u'67010fdf3a08d290e060a4dd96baa07b',
u'info_dict': {
u'id': u'104538833',
u'ext': u'mp4',
u'title': u'China Moses Is Crazy About the Blues',
u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0',
},
}
# Overwrite YahooIE properties we don't want
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id')
return self._get_info(long_id, video_id)
class YahooSearchIE(SearchInfoExtractor): class YahooSearchIE(SearchInfoExtractor):
IE_DESC = u'Yahoo screen search' IE_DESC = u'Yahoo screen search'
_MAX_RESULTS = 1000 _MAX_RESULTS = 1000

View File

@@ -7,7 +7,7 @@ from ..utils import (
class YouJizzIE(InfoExtractor): class YouJizzIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
_TEST = { _TEST = {
u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
u'file': u'2189178.flv', u'file': u'2189178.flv',

View File

@@ -336,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"uploader": u"Philipp Hagemeister", u"uploader": u"Philipp Hagemeister",
u"uploader_id": u"phihag", u"uploader_id": u"phihag",
u"upload_date": u"20121002", u"upload_date": u"20121002",
u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
} }
}, },
{ {
@@ -388,10 +388,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
super(YoutubeIE, self).__init__(*args, **kwargs) super(YoutubeIE, self).__init__(*args, **kwargs)
self._player_cache = {} self._player_cache = {}
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self.to_screen(u'%s: Downloading video webpage' % video_id)
def report_video_info_webpage_download(self, video_id): def report_video_info_webpage_download(self, video_id):
"""Report attempt to download video info webpage.""" """Report attempt to download video info webpage."""
self.to_screen(u'%s: Downloading video info webpage' % video_id) self.to_screen(u'%s: Downloading video info webpage' % video_id)
@@ -1258,15 +1254,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_id = self._extract_id(url) video_id = self._extract_id(url)
# Get video webpage # Get video webpage
self.report_video_webpage_download(video_id)
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
request = compat_urllib_request.Request(url) video_webpage = self._download_webpage(url, video_id)
try:
video_webpage_bytes = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
# Attempt to extract SWF player URL # Attempt to extract SWF player URL
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
@@ -1366,6 +1355,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# description # description
video_description = get_element_by_id("eow-description", video_webpage) video_description = get_element_by_id("eow-description", video_webpage)
if video_description: if video_description:
video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
title="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
class="yt-uix-redirect-link"\s*>
[^<]+
</a>
''', r'\1', video_description)
video_description = clean_html(video_description) video_description = clean_html(video_description)
else: else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@@ -1374,6 +1372,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else: else:
video_description = u'' video_description = u''
def _extract_count(klass):
count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
if count is not None:
return int(count.replace(',', ''))
return None
like_count = _extract_count(u'likes-count')
dislike_count = _extract_count(u'dislikes-count')
# subtitles # subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage) video_subtitles = self.extract_subtitles(video_id, video_webpage)
@@ -1506,6 +1512,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'annotations': video_annotations, 'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count, 'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
}) })
return results return results
@@ -1520,10 +1528,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
\? (?:.*?&)*? (?:p|a|list)= \? (?:.*?&)*? (?:p|a|list)=
| p/ | p/
) )
((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,}) ((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
.* .*
| |
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
)""" )"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"' _MORE_PAGES_INDICATOR = r'data-link-type="next"'
@@ -1545,7 +1553,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _extract_mix(self, playlist_id): def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video # The mixes are generated from a a single video
# the id of the playlist is just 'RD' + video_id # the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id) url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
get_element_by_attribute('class', 'title ', webpage)) get_element_by_attribute('class', 'title ', webpage))
@@ -1573,7 +1581,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
else: else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
if len(playlist_id) == 13: # 'RD' + 11 characters for the video id if playlist_id.startswith('RD'):
# Mixes require a custom extraction process # Mixes require a custom extraction process
return self._extract_mix(playlist_id) return self._extract_mix(playlist_id)
@@ -1623,10 +1631,11 @@ class YoutubeChannelIE(InfoExtractor):
video_ids = [] video_ids = []
url = 'https://www.youtube.com/channel/%s/videos' % channel_id url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id) channel_page = self._download_webpage(url, channel_id)
if re.search(r'channel-header-autogenerated-label', channel_page) is not None: autogenerated = re.search(r'''(?x)
autogenerated = True class="[^"]*?(?:
else: channel-header-autogenerated-label|
autogenerated = False yt-channel-title-autogenerated
)[^"]*"''', channel_page) is not None
if autogenerated: if autogenerated:
# The videos are contained in a single page # The videos are contained in a single page
@@ -1765,6 +1774,7 @@ class YoutubeSearchIE(SearchInfoExtractor):
return self.playlist_result(videos, query) return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE): class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
_SEARCH_KEY = 'ytsearchdate' _SEARCH_KEY = 'ytsearchdate'
IE_DESC = u'YouTube.com searches, newest videos first' IE_DESC = u'YouTube.com searches, newest videos first'

View File

@@ -17,7 +17,6 @@ import ssl
import socket import socket
import sys import sys
import traceback import traceback
import xml.etree.ElementTree
import zlib import zlib
try: try:
@@ -562,11 +561,14 @@ def make_HTTPS_handler(opts_no_check_certificate):
return HTTPSHandlerV3() return HTTPSHandlerV3()
else: else:
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3) context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
context.set_default_verify_paths()
context.verify_mode = (ssl.CERT_NONE context.verify_mode = (ssl.CERT_NONE
if opts_no_check_certificate if opts_no_check_certificate
else ssl.CERT_REQUIRED) else ssl.CERT_REQUIRED)
context.set_default_verify_paths()
try:
context.load_default_certs()
except AttributeError:
pass # Python < 3.4
return compat_urllib_request.HTTPSHandler(context=context) return compat_urllib_request.HTTPSHandler(context=context)
class ExtractorError(Exception): class ExtractorError(Exception):
@@ -1021,3 +1023,7 @@ def format_bytes(bytes):
suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent] suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent) converted = float(bytes) / float(1024 ** exponent)
return u'%.2f%s' % (converted, suffix) return u'%.2f%s' % (converted, suffix)
def str_to_int(int_str):
int_str = re.sub(r'[,\.]', u'', int_str)
return int(int_str)

View File

@@ -1,2 +1,2 @@
__version__ = '2013.11.28.1' __version__ = '2013.12.08.1'