Compare commits
64 Commits
2013.10.15
...
2013.10.23
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1cf64ee468 | ||
|
|
cdec0190c4 | ||
|
|
2450bcb28b | ||
|
|
3126050c0f | ||
|
|
93b22c7828 | ||
|
|
0a89b2852e | ||
|
|
55b3e45bba | ||
|
|
365bcf6d97 | ||
|
|
71907db3ba | ||
|
|
6803655ced | ||
|
|
df1c39ec5c | ||
|
|
80f55a9511 | ||
|
|
7853cc5ae1 | ||
|
|
586a91b67f | ||
|
|
b028e96144 | ||
|
|
ce68b5907c | ||
|
|
fe7e0c9825 | ||
|
|
12893efe01 | ||
|
|
a6387bfd3c | ||
|
|
f6a54188c2 | ||
|
|
cbbd9a9c69 | ||
|
|
685a9cd2f1 | ||
|
|
182a107877 | ||
|
|
8c51aa6506 | ||
|
|
3fd39e37f2 | ||
|
|
49e86983e7 | ||
|
|
a9c58ad945 | ||
|
|
f8b45beacc | ||
|
|
9d92015d43 | ||
|
|
50a6150ed9 | ||
|
|
284acd57d6 | ||
|
|
8ed6b34477 | ||
|
|
f6f1fc9286 | ||
|
|
8e590a117f | ||
|
|
d5594202aa | ||
|
|
b186d949cf | ||
|
|
3d2986063c | ||
|
|
41fd7c7e60 | ||
|
|
fdefe96bf2 | ||
|
|
16f36a6fc9 | ||
|
|
cce722b79c | ||
|
|
82697fb2ab | ||
|
|
53c1d3ef49 | ||
|
|
8e55e9abfc | ||
|
|
7c58ef3275 | ||
|
|
416a5efce7 | ||
|
|
f4d96df0f1 | ||
|
|
5d254f776a | ||
|
|
1c1218fefc | ||
|
|
d21ab29200 | ||
|
|
54ed626cf8 | ||
|
|
a733eb6c53 | ||
|
|
591454798d | ||
|
|
38604f1a4f | ||
|
|
2d0efe70a6 | ||
|
|
bfd14b1b2f | ||
|
|
76965512da | ||
|
|
996d1c3242 | ||
|
|
8016c92297 | ||
|
|
e028d0d1e3 | ||
|
|
79819f58f2 | ||
|
|
6ff000b888 | ||
|
|
99e206d508 | ||
|
|
dd82ffea0c |
13
Makefile
13
Makefile
@@ -13,13 +13,13 @@ PYTHON=/usr/bin/env python
|
|||||||
|
|
||||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||||
ifeq ($(PREFIX),/usr)
|
ifeq ($(PREFIX),/usr)
|
||||||
SYSCONFDIR=/etc
|
SYSCONFDIR=/etc
|
||||||
else
|
else
|
||||||
ifeq ($(PREFIX),/usr/local)
|
ifeq ($(PREFIX),/usr/local)
|
||||||
SYSCONFDIR=/etc
|
SYSCONFDIR=/etc
|
||||||
else
|
else
|
||||||
SYSCONFDIR=$(PREFIX)/etc
|
SYSCONFDIR=$(PREFIX)/etc
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
||||||
@@ -71,6 +71,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
|||||||
--exclude '*~' \
|
--exclude '*~' \
|
||||||
--exclude '__pycache' \
|
--exclude '__pycache' \
|
||||||
--exclude '.git' \
|
--exclude '.git' \
|
||||||
|
--exclude 'testdata' \
|
||||||
-- \
|
-- \
|
||||||
bin devscripts test youtube_dl \
|
bin devscripts test youtube_dl \
|
||||||
CHANGELOG LICENSE README.md README.txt \
|
CHANGELOG LICENSE README.md README.txt \
|
||||||
|
|||||||
24
README.md
24
README.md
@@ -21,6 +21,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
sudo if needed)
|
sudo if needed)
|
||||||
-i, --ignore-errors continue on download errors, for example to to
|
-i, --ignore-errors continue on download errors, for example to to
|
||||||
skip unavailable videos in a playlist
|
skip unavailable videos in a playlist
|
||||||
|
--abort-on-error Abort downloading of further videos (in the
|
||||||
|
playlist or the command line) if an error occurs
|
||||||
--dump-user-agent display the current browser identification
|
--dump-user-agent display the current browser identification
|
||||||
--user-agent UA specify a custom user agent
|
--user-agent UA specify a custom user agent
|
||||||
--referer REF specify a custom referer, use if the video access
|
--referer REF specify a custom referer, use if the video access
|
||||||
@@ -30,7 +32,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--extractor-descriptions Output descriptions of all supported extractors
|
--extractor-descriptions Output descriptions of all supported extractors
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy
|
--proxy URL Use the specified HTTP/HTTPS proxy
|
||||||
--no-check-certificate Suppress HTTPS certificate validation.
|
--no-check-certificate Suppress HTTPS certificate validation.
|
||||||
--cache-dir None Location in the filesystem where youtube-dl can
|
--cache-dir DIR Location in the filesystem where youtube-dl can
|
||||||
store downloaded information permanently. By
|
store downloaded information permanently. By
|
||||||
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
|
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
|
||||||
/youtube-dl .
|
/youtube-dl .
|
||||||
@@ -76,15 +78,17 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
%(uploader_id)s for the uploader nickname if
|
%(uploader_id)s for the uploader nickname if
|
||||||
different, %(autonumber)s to get an automatically
|
different, %(autonumber)s to get an automatically
|
||||||
incremented number, %(ext)s for the filename
|
incremented number, %(ext)s for the filename
|
||||||
extension, %(upload_date)s for the upload date
|
extension, %(format)s for the format description
|
||||||
(YYYYMMDD), %(extractor)s for the provider
|
(like "22 - 1280x720" or "HD")%(upload_date)s for
|
||||||
(youtube, metacafe, etc), %(id)s for the video id
|
the upload date (YYYYMMDD), %(extractor)s for the
|
||||||
, %(playlist)s for the playlist the video is in,
|
provider (youtube, metacafe, etc), %(id)s for the
|
||||||
%(playlist_index)s for the position in the
|
video id , %(playlist)s for the playlist the
|
||||||
playlist and %% for a literal percent. Use - to
|
video is in, %(playlist_index)s for the position
|
||||||
output to stdout. Can also be used to download to
|
in the playlist and %% for a literal percent. Use
|
||||||
a different directory, for example with -o '/my/d
|
- to output to stdout. Can also be used to
|
||||||
ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
download to a different directory, for example
|
||||||
|
with -o '/my/downloads/%(uploader)s/%(title)s-%(i
|
||||||
|
d)s.%(ext)s' .
|
||||||
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
|
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
|
||||||
when it is present in output filename template or
|
when it is present in output filename template or
|
||||||
--autonumber option is given
|
--autonumber option is given
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
__youtube-dl()
|
__youtube_dl()
|
||||||
{
|
{
|
||||||
local cur prev opts
|
local cur prev opts
|
||||||
COMPREPLY=()
|
COMPREPLY=()
|
||||||
@@ -15,4 +15,4 @@ __youtube-dl()
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
complete -F __youtube-dl youtube-dl
|
complete -F __youtube_dl youtube-dl
|
||||||
|
|||||||
@@ -88,10 +88,6 @@ ROOT=$(pwd)
|
|||||||
"$ROOT/devscripts/gh-pages/update-sites.py"
|
"$ROOT/devscripts/gh-pages/update-sites.py"
|
||||||
git add *.html *.html.in update
|
git add *.html *.html.in update
|
||||||
git commit -m "release $version"
|
git commit -m "release $version"
|
||||||
git show HEAD
|
|
||||||
read -p "Is it good, can I push? (y/n) " -n 1
|
|
||||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
|
||||||
echo
|
|
||||||
git push "$ROOT" gh-pages
|
git push "$ROOT" gh-pages
|
||||||
git push "$ORIGIN_URL" gh-pages
|
git push "$ORIGIN_URL" gh-pages
|
||||||
)
|
)
|
||||||
|
|||||||
1
setup.py
1
setup.py
@@ -63,6 +63,7 @@ setup(
|
|||||||
' YouTube.com and other video sites.',
|
' YouTube.com and other video sites.',
|
||||||
url='https://github.com/rg3/youtube-dl',
|
url='https://github.com/rg3/youtube-dl',
|
||||||
author='Ricardo Garcia',
|
author='Ricardo Garcia',
|
||||||
|
author_email='ytdl@yt-dl.org',
|
||||||
maintainer='Philipp Hagemeister',
|
maintainer='Philipp Hagemeister',
|
||||||
maintainer_email='phihag@phihag.de',
|
maintainer_email='phihag@phihag.de',
|
||||||
packages=['youtube_dl', 'youtube_dl.extractor'],
|
packages=['youtube_dl', 'youtube_dl.extractor'],
|
||||||
|
|||||||
@@ -34,10 +34,10 @@ def try_rm(filename):
|
|||||||
|
|
||||||
|
|
||||||
class FakeYDL(YoutubeDL):
|
class FakeYDL(YoutubeDL):
|
||||||
def __init__(self):
|
def __init__(self, override=None):
|
||||||
# Different instances of the downloader can't share the same dictionary
|
# Different instances of the downloader can't share the same dictionary
|
||||||
# some test set the "sublang" parameter, which would break the md5 checks.
|
# some test set the "sublang" parameter, which would break the md5 checks.
|
||||||
params = get_params()
|
params = get_params(override=override)
|
||||||
super(FakeYDL, self).__init__(params)
|
super(FakeYDL, self).__init__(params)
|
||||||
self.result = []
|
self.result = []
|
||||||
|
|
||||||
|
|||||||
133
test/test_YoutubeDL.py
Normal file
133
test/test_YoutubeDL.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import FakeYDL
|
||||||
|
|
||||||
|
|
||||||
|
class YDL(FakeYDL):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(YDL, self).__init__(*args, **kwargs)
|
||||||
|
self.downloaded_info_dicts = []
|
||||||
|
self.msgs = []
|
||||||
|
|
||||||
|
def process_info(self, info_dict):
|
||||||
|
self.downloaded_info_dicts.append(info_dict)
|
||||||
|
|
||||||
|
def to_screen(self, msg):
|
||||||
|
self.msgs.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatSelection(unittest.TestCase):
|
||||||
|
def test_prefer_free_formats(self):
|
||||||
|
# Same resolution => download webm
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.params['prefer_free_formats'] = True
|
||||||
|
formats = [
|
||||||
|
{u'ext': u'webm', u'height': 460},
|
||||||
|
{u'ext': u'mp4', u'height': 460},
|
||||||
|
]
|
||||||
|
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'ext'], u'webm')
|
||||||
|
|
||||||
|
# Different resolution => download best quality (mp4)
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.params['prefer_free_formats'] = True
|
||||||
|
formats = [
|
||||||
|
{u'ext': u'webm', u'height': 720},
|
||||||
|
{u'ext': u'mp4', u'height': 1080},
|
||||||
|
]
|
||||||
|
info_dict[u'formats'] = formats
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'ext'], u'mp4')
|
||||||
|
|
||||||
|
# No prefer_free_formats => keep original formats order
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.params['prefer_free_formats'] = False
|
||||||
|
formats = [
|
||||||
|
{u'ext': u'webm', u'height': 720},
|
||||||
|
{u'ext': u'flv', u'height': 720},
|
||||||
|
]
|
||||||
|
info_dict[u'formats'] = formats
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'ext'], u'flv')
|
||||||
|
|
||||||
|
def test_format_limit(self):
|
||||||
|
formats = [
|
||||||
|
{u'format_id': u'meh'},
|
||||||
|
{u'format_id': u'good'},
|
||||||
|
{u'format_id': u'great'},
|
||||||
|
{u'format_id': u'excellent'},
|
||||||
|
]
|
||||||
|
info_dict = {
|
||||||
|
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
||||||
|
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||||
|
|
||||||
|
ydl = YDL({'format_limit': 'good'})
|
||||||
|
assert ydl.params['format_limit'] == 'good'
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'format_id'], u'good')
|
||||||
|
|
||||||
|
ydl = YDL({'format_limit': 'great', 'format': 'all'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
|
||||||
|
self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
|
||||||
|
self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
|
||||||
|
self.assertTrue('3' in ydl.msgs[0])
|
||||||
|
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.params['format_limit'] = 'excellent'
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||||
|
|
||||||
|
def test_format_selection(self):
|
||||||
|
formats = [
|
||||||
|
{u'format_id': u'35', u'ext': u'mp4'},
|
||||||
|
{u'format_id': u'45', u'ext': u'webm'},
|
||||||
|
{u'format_id': u'47', u'ext': u'webm'},
|
||||||
|
{u'format_id': u'2', u'ext': u'flv'},
|
||||||
|
]
|
||||||
|
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||||
|
|
||||||
|
ydl = YDL({'format': u'20/47'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], u'47')
|
||||||
|
|
||||||
|
ydl = YDL({'format': u'20/71/worst'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], u'35')
|
||||||
|
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], u'2')
|
||||||
|
|
||||||
|
ydl = YDL({'format': u'webm/mp4'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], u'47')
|
||||||
|
|
||||||
|
ydl = YDL({'format': u'3gp/40/mp4'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], u'35')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
@@ -216,10 +216,10 @@ class YoutubeDL(object):
|
|||||||
If stderr is a tty file the 'WARNING:' will be colored
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
'''
|
'''
|
||||||
if sys.stderr.isatty() and os.name != 'nt':
|
if sys.stderr.isatty() and os.name != 'nt':
|
||||||
_msg_header=u'\033[0;33mWARNING:\033[0m'
|
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header=u'WARNING:'
|
_msg_header = u'WARNING:'
|
||||||
warning_message=u'%s %s' % (_msg_header,message)
|
warning_message = u'%s %s' % (_msg_header, message)
|
||||||
self.to_stderr(warning_message)
|
self.to_stderr(warning_message)
|
||||||
|
|
||||||
def report_error(self, message, tb=None):
|
def report_error(self, message, tb=None):
|
||||||
@@ -234,19 +234,6 @@ class YoutubeDL(object):
|
|||||||
error_message = u'%s %s' % (_msg_header, message)
|
error_message = u'%s %s' % (_msg_header, message)
|
||||||
self.trouble(error_message, tb)
|
self.trouble(error_message, tb)
|
||||||
|
|
||||||
def slow_down(self, start_time, byte_counter):
|
|
||||||
"""Sleep if the download speed is over the rate limit."""
|
|
||||||
rate_limit = self.params.get('ratelimit', None)
|
|
||||||
if rate_limit is None or byte_counter == 0:
|
|
||||||
return
|
|
||||||
now = time.time()
|
|
||||||
elapsed = now - start_time
|
|
||||||
if elapsed <= 0.0:
|
|
||||||
return
|
|
||||||
speed = float(byte_counter) / elapsed
|
|
||||||
if speed > rate_limit:
|
|
||||||
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
|
||||||
|
|
||||||
def report_writedescription(self, descfn):
|
def report_writedescription(self, descfn):
|
||||||
""" Report that the description file is being written """
|
""" Report that the description file is being written """
|
||||||
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
||||||
@@ -288,13 +275,15 @@ class YoutubeDL(object):
|
|||||||
if template_dict['playlist_index'] is not None:
|
if template_dict['playlist_index'] is not None:
|
||||||
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
||||||
|
|
||||||
sanitize = lambda k,v: sanitize_filename(
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
u'NA' if v is None else compat_str(v),
|
u'NA' if v is None else compat_str(v),
|
||||||
restricted=self.params.get('restrictfilenames'),
|
restricted=self.params.get('restrictfilenames'),
|
||||||
is_id=(k==u'id'))
|
is_id=(k == u'id'))
|
||||||
template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
|
template_dict = dict((k, sanitize(k, v))
|
||||||
|
for k, v in template_dict.items())
|
||||||
|
|
||||||
filename = self.params['outtmpl'] % template_dict
|
tmpl = os.path.expanduser(self.params['outtmpl'])
|
||||||
|
filename = tmpl % template_dict
|
||||||
return filename
|
return filename
|
||||||
except KeyError as err:
|
except KeyError as err:
|
||||||
self.report_error(u'Erroneous output template')
|
self.report_error(u'Erroneous output template')
|
||||||
@@ -390,13 +379,7 @@ class YoutubeDL(object):
|
|||||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
||||||
if result_type == 'video':
|
if result_type == 'video':
|
||||||
ie_result.update(extra_info)
|
ie_result.update(extra_info)
|
||||||
if 'playlist' not in ie_result:
|
return self.process_video_result(ie_result)
|
||||||
# It isn't part of a playlist
|
|
||||||
ie_result['playlist'] = None
|
|
||||||
ie_result['playlist_index'] = None
|
|
||||||
if download:
|
|
||||||
self.process_info(ie_result)
|
|
||||||
return ie_result
|
|
||||||
elif result_type == 'url':
|
elif result_type == 'url':
|
||||||
# We have to add extra_info to the results because it may be
|
# We have to add extra_info to the results because it may be
|
||||||
# contained in a playlist
|
# contained in a playlist
|
||||||
@@ -407,7 +390,7 @@ class YoutubeDL(object):
|
|||||||
elif result_type == 'playlist':
|
elif result_type == 'playlist':
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||||
|
|
||||||
playlist_results = []
|
playlist_results = []
|
||||||
|
|
||||||
@@ -425,12 +408,12 @@ class YoutubeDL(object):
|
|||||||
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
||||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||||
|
|
||||||
for i,entry in enumerate(entries,1):
|
for i, entry in enumerate(entries, 1):
|
||||||
self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
|
self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
|
||||||
extra = {
|
extra = {
|
||||||
'playlist': playlist,
|
'playlist': playlist,
|
||||||
'playlist_index': i + playliststart,
|
'playlist_index': i + playliststart,
|
||||||
}
|
}
|
||||||
if not 'extractor' in entry:
|
if not 'extractor' in entry:
|
||||||
# We set the extractor, if it's an url it will be set then to
|
# We set the extractor, if it's an url it will be set then to
|
||||||
# the new extractor, but if it's already a video we must make
|
# the new extractor, but if it's already a video we must make
|
||||||
@@ -454,6 +437,103 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
def select_format(self, format_spec, available_formats):
|
||||||
|
if format_spec == 'best' or format_spec is None:
|
||||||
|
return available_formats[-1]
|
||||||
|
elif format_spec == 'worst':
|
||||||
|
return available_formats[0]
|
||||||
|
else:
|
||||||
|
extensions = [u'mp4', u'flv', u'webm', u'3gp']
|
||||||
|
if format_spec in extensions:
|
||||||
|
filter_f = lambda f: f['ext'] == format_spec
|
||||||
|
else:
|
||||||
|
filter_f = lambda f: f['format_id'] == format_spec
|
||||||
|
matches = list(filter(filter_f, available_formats))
|
||||||
|
if matches:
|
||||||
|
return matches[-1]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_video_result(self, info_dict, download=True):
|
||||||
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
|
|
||||||
|
if 'playlist' not in info_dict:
|
||||||
|
# It isn't part of a playlist
|
||||||
|
info_dict['playlist'] = None
|
||||||
|
info_dict['playlist_index'] = None
|
||||||
|
|
||||||
|
# This extractors handle format selection themselves
|
||||||
|
if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
|
||||||
|
if download:
|
||||||
|
self.process_info(info_dict)
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
# We now pick which formats have to be downloaded
|
||||||
|
if info_dict.get('formats') is None:
|
||||||
|
# There's only one format available
|
||||||
|
formats = [info_dict]
|
||||||
|
else:
|
||||||
|
formats = info_dict['formats']
|
||||||
|
|
||||||
|
# We check that all the formats have the format and format_id fields
|
||||||
|
for (i, format) in enumerate(formats):
|
||||||
|
if format.get('format_id') is None:
|
||||||
|
format['format_id'] = compat_str(i)
|
||||||
|
if format.get('format') is None:
|
||||||
|
format['format'] = u'{id} - {res}{note}'.format(
|
||||||
|
id=format['format_id'],
|
||||||
|
res=self.format_resolution(format),
|
||||||
|
note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.params.get('listformats', None):
|
||||||
|
self.list_formats(info_dict)
|
||||||
|
return
|
||||||
|
|
||||||
|
format_limit = self.params.get('format_limit', None)
|
||||||
|
if format_limit:
|
||||||
|
formats = list(takewhile_inclusive(
|
||||||
|
lambda f: f['format_id'] != format_limit, formats
|
||||||
|
))
|
||||||
|
if self.params.get('prefer_free_formats'):
|
||||||
|
def _free_formats_key(f):
|
||||||
|
try:
|
||||||
|
ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
|
||||||
|
except ValueError:
|
||||||
|
ext_ord = -1
|
||||||
|
# We only compare the extension if they have the same height and width
|
||||||
|
return (f.get('height'), f.get('width'), ext_ord)
|
||||||
|
formats = sorted(formats, key=_free_formats_key)
|
||||||
|
|
||||||
|
req_format = self.params.get('format', 'best')
|
||||||
|
if req_format is None:
|
||||||
|
req_format = 'best'
|
||||||
|
formats_to_download = []
|
||||||
|
# The -1 is for supporting YoutubeIE
|
||||||
|
if req_format in ('-1', 'all'):
|
||||||
|
formats_to_download = formats
|
||||||
|
else:
|
||||||
|
# We can accept formats requestd in the format: 34/5/best, we pick
|
||||||
|
# the first that is available, starting from left
|
||||||
|
req_formats = req_format.split('/')
|
||||||
|
for rf in req_formats:
|
||||||
|
selected_format = self.select_format(rf, formats)
|
||||||
|
if selected_format is not None:
|
||||||
|
formats_to_download = [selected_format]
|
||||||
|
break
|
||||||
|
if not formats_to_download:
|
||||||
|
raise ExtractorError(u'requested format not available')
|
||||||
|
|
||||||
|
if download:
|
||||||
|
if len(formats_to_download) > 1:
|
||||||
|
self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
|
||||||
|
for format in formats_to_download:
|
||||||
|
new_info = dict(info_dict)
|
||||||
|
new_info.update(format)
|
||||||
|
self.process_info(new_info)
|
||||||
|
# We update the info dict with the best quality format (backwards compatibility)
|
||||||
|
info_dict.update(formats_to_download[-1])
|
||||||
|
return info_dict
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
|
|
||||||
@@ -529,20 +609,20 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if self.params.get('writeannotations', False):
|
if self.params.get('writeannotations', False):
|
||||||
try:
|
try:
|
||||||
annofn = filename + u'.annotations.xml'
|
annofn = filename + u'.annotations.xml'
|
||||||
self.report_writeannotations(annofn)
|
self.report_writeannotations(annofn)
|
||||||
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
||||||
annofile.write(info_dict['annotations'])
|
annofile.write(info_dict['annotations'])
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
self.report_warning(u'There are no annotations to write.')
|
self.report_warning(u'There are no annotations to write.')
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'Cannot write annotations file: ' + annofn)
|
self.report_error(u'Cannot write annotations file: ' + annofn)
|
||||||
return
|
return
|
||||||
|
|
||||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||||
self.params.get('writeautomaticsub')])
|
self.params.get('writeautomaticsub')])
|
||||||
|
|
||||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||||
# subtitles download errors are already managed as troubles in relevant IE
|
# subtitles download errors are already managed as troubles in relevant IE
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
subtitles = info_dict['subtitles']
|
subtitles = info_dict['subtitles']
|
||||||
@@ -564,7 +644,7 @@ class YoutubeDL(object):
|
|||||||
infofn = filename + u'.info.json'
|
infofn = filename + u'.info.json'
|
||||||
self.report_writeinfojson(infofn)
|
self.report_writeinfojson(infofn)
|
||||||
try:
|
try:
|
||||||
json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
|
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
||||||
write_json_file(json_info_dict, encodeFilename(infofn))
|
write_json_file(json_info_dict, encodeFilename(infofn))
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
||||||
@@ -634,7 +714,7 @@ class YoutubeDL(object):
|
|||||||
keep_video = None
|
keep_video = None
|
||||||
for pp in self._pps:
|
for pp in self._pps:
|
||||||
try:
|
try:
|
||||||
keep_video_wish,new_info = pp.run(info)
|
keep_video_wish, new_info = pp.run(info)
|
||||||
if keep_video_wish is not None:
|
if keep_video_wish is not None:
|
||||||
if keep_video_wish:
|
if keep_video_wish:
|
||||||
keep_video = keep_video_wish
|
keep_video = keep_video_wish
|
||||||
@@ -672,3 +752,32 @@ class YoutubeDL(object):
|
|||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
||||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||||
archive_file.write(vid_id + u'\n')
|
archive_file.write(vid_id + u'\n')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def format_resolution(format):
|
||||||
|
if format.get('height') is not None:
|
||||||
|
if format.get('width') is not None:
|
||||||
|
res = u'%sx%s' % (format['width'], format['height'])
|
||||||
|
else:
|
||||||
|
res = u'%sp' % format['height']
|
||||||
|
else:
|
||||||
|
res = '???'
|
||||||
|
return res
|
||||||
|
|
||||||
|
def list_formats(self, info_dict):
|
||||||
|
formats_s = []
|
||||||
|
for format in info_dict.get('formats', [info_dict]):
|
||||||
|
formats_s.append(u'%-15s: %-5s %-15s[%s]' % (
|
||||||
|
format['format_id'],
|
||||||
|
format['ext'],
|
||||||
|
format.get('format_note') or '-',
|
||||||
|
self.format_resolution(format),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(formats_s) != 1:
|
||||||
|
formats_s[0] += ' (worst)'
|
||||||
|
formats_s[-1] += ' (best)'
|
||||||
|
formats_s = "\n".join(formats_s)
|
||||||
|
self.to_screen(u'[info] Available formats for %s:\n'
|
||||||
|
u'format code extension note resolution\n%s' % (
|
||||||
|
info_dict['id'], formats_s))
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
def _hide_login_info(opts):
|
def _hide_login_info(opts):
|
||||||
opts = list(opts)
|
opts = list(opts)
|
||||||
for private_opt in ['-p', '--password', '-u', '--username']:
|
for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
|
||||||
try:
|
try:
|
||||||
i = opts.index(private_opt)
|
i = opts.index(private_opt)
|
||||||
opts[i+1] = '<PRIVATE>'
|
opts[i+1] = '<PRIVATE>'
|
||||||
@@ -179,6 +179,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
|
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
|
||||||
general.add_option('-i', '--ignore-errors',
|
general.add_option('-i', '--ignore-errors',
|
||||||
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
|
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
|
||||||
|
general.add_option('--abort-on-error',
|
||||||
|
action='store_false', dest='ignoreerrors',
|
||||||
|
help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
|
||||||
general.add_option('--dump-user-agent',
|
general.add_option('--dump-user-agent',
|
||||||
action='store_true', dest='dump_user_agent',
|
action='store_true', dest='dump_user_agent',
|
||||||
help='display the current browser identification', default=False)
|
help='display the current browser identification', default=False)
|
||||||
@@ -196,7 +199,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
||||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--cache-dir', dest='cachedir', default=get_cachedir(),
|
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||||
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
|
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||||
@@ -235,7 +238,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
|
|
||||||
video_format.add_option('-f', '--format',
|
video_format.add_option('-f', '--format',
|
||||||
action='store', dest='format', metavar='FORMAT',
|
action='store', dest='format', metavar='FORMAT', default='best',
|
||||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||||
video_format.add_option('--all-formats',
|
video_format.add_option('--all-formats',
|
||||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||||
@@ -332,7 +335,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
help=('output filename template. Use %(title)s to get the title, '
|
help=('output filename template. Use %(title)s to get the title, '
|
||||||
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
|
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
|
||||||
'%(autonumber)s to get an automatically incremented number, '
|
'%(autonumber)s to get an automatically incremented number, '
|
||||||
'%(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), '
|
'%(ext)s for the filename extension, '
|
||||||
|
'%(format)s for the format description (like "22 - 1280x720" or "HD")'
|
||||||
|
'%(upload_date)s for the upload date (YYYYMMDD), '
|
||||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||||
'%(id)s for the video id , %(playlist)s for the playlist the video is in, '
|
'%(id)s for the video id , %(playlist)s for the playlist the video is in, '
|
||||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||||
|
|||||||
@@ -102,6 +102,7 @@ from .ro220 import Ro220IE
|
|||||||
from .rottentomatoes import RottenTomatoesIE
|
from .rottentomatoes import RottenTomatoesIE
|
||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
|
from .rutube import RutubeIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .slashdot import SlashdotIE
|
from .slashdot import SlashdotIE
|
||||||
from .slideshare import SlideshareIE
|
from .slideshare import SlideshareIE
|
||||||
@@ -114,6 +115,7 @@ from .statigram import StatigramIE
|
|||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
from .sztvhu import SztvHuIE
|
from .sztvhu import SztvHuIE
|
||||||
from .teamcoco import TeamcocoIE
|
from .teamcoco import TeamcocoIE
|
||||||
|
from .techtalks import TechTalksIE
|
||||||
from .ted import TEDIE
|
from .ted import TEDIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
|
|||||||
@@ -174,12 +174,27 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
# Some formats use the m3u8 protocol
|
# Some formats use the m3u8 protocol
|
||||||
formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
|
formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
|
||||||
# We order the formats by quality
|
# We order the formats by quality
|
||||||
formats = sorted(formats, key=lambda f: int(f.get('height',-1)))
|
formats = list(formats) # in python3 filter returns an iterator
|
||||||
|
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
||||||
|
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
||||||
|
else:
|
||||||
|
sort_key = lambda f: int(f.get('height',-1))
|
||||||
|
formats = sorted(formats, key=sort_key)
|
||||||
# Prefer videos without subtitles in the same language
|
# Prefer videos without subtitles in the same language
|
||||||
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
|
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
|
||||||
# Pick the best quality
|
# Pick the best quality
|
||||||
def _format(format_info):
|
def _format(format_info):
|
||||||
|
quality = format_info['quality']
|
||||||
|
m_quality = re.match(r'\w*? - (\d*)p', quality)
|
||||||
|
if m_quality is not None:
|
||||||
|
quality = m_quality.group(1)
|
||||||
|
if format_info.get('versionCode') is not None:
|
||||||
|
format_id = u'%s-%s' % (quality, format_info['versionCode'])
|
||||||
|
else:
|
||||||
|
format_id = quality
|
||||||
info = {
|
info = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'format_note': format_info.get('versionLibelle'),
|
||||||
'width': format_info.get('width'),
|
'width': format_info.get('width'),
|
||||||
'height': format_info.get('height'),
|
'height': format_info.get('height'),
|
||||||
}
|
}
|
||||||
@@ -192,8 +207,6 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
info['ext'] = determine_ext(info['url'])
|
info['ext'] = determine_ext(info['url'])
|
||||||
return info
|
return info
|
||||||
info_dict['formats'] = [_format(f) for f in formats]
|
info_dict['formats'] = [_format(f) for f in formats]
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info_dict.update(info_dict['formats'][-1])
|
|
||||||
|
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
@@ -207,7 +220,7 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
|||||||
u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||||
u'file': u'050489-002.mp4',
|
u'file': u'050489-002.mp4',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'Agentur Amateur #2 - Corporate Design',
|
u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -53,6 +53,8 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
|
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
|
||||||
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
|
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
|
||||||
lambda m: m.group(1) + '/>', object_str)
|
lambda m: m.group(1) + '/>', object_str)
|
||||||
|
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
||||||
|
object_str = object_str.replace(u'<--', u'<!--')
|
||||||
|
|
||||||
object_doc = xml.etree.ElementTree.fromstring(object_str)
|
object_doc = xml.etree.ElementTree.fromstring(object_str)
|
||||||
assert u'BrightcoveExperience' in object_doc.attrib['class']
|
assert u'BrightcoveExperience' in object_doc.attrib['class']
|
||||||
@@ -96,7 +98,10 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
|
playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
|
||||||
player_key, u'Downloading playlist information')
|
player_key, u'Downloading playlist information')
|
||||||
|
|
||||||
playlist_info = json.loads(playlist_info)['videoList']
|
json_data = json.loads(playlist_info)
|
||||||
|
if 'videoList' not in json_data:
|
||||||
|
raise ExtractorError(u'Empty playlist')
|
||||||
|
playlist_info = json_data['videoList']
|
||||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||||
|
|
||||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
RegexNotFoundError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -61,9 +62,12 @@ class InfoExtractor(object):
|
|||||||
* ext Will be calculated from url if missing
|
* ext Will be calculated from url if missing
|
||||||
* format A human-readable description of the format
|
* format A human-readable description of the format
|
||||||
("mp4 container with h264/opus").
|
("mp4 container with h264/opus").
|
||||||
Calculated from width and height if missing.
|
Calculated from the format_id, width, height
|
||||||
|
and format_note fields if missing.
|
||||||
* format_id A short description of the format
|
* format_id A short description of the format
|
||||||
("mp4_h264_opus" or "19")
|
("mp4_h264_opus" or "19")
|
||||||
|
* format_note Additional info about the format
|
||||||
|
("3D" or "DASH video")
|
||||||
* width Width of the video, if known
|
* width Width of the video, if known
|
||||||
* height Height of the video, if known
|
* height Height of the video, if known
|
||||||
|
|
||||||
@@ -228,7 +232,7 @@ class InfoExtractor(object):
|
|||||||
Perform a regex search on the given string, using a single or a list of
|
Perform a regex search on the given string, using a single or a list of
|
||||||
patterns returning the first matching group.
|
patterns returning the first matching group.
|
||||||
In case of failure return a default value or raise a WARNING or a
|
In case of failure return a default value or raise a WARNING or a
|
||||||
ExtractorError, depending on fatal, specifying the field name.
|
RegexNotFoundError, depending on fatal, specifying the field name.
|
||||||
"""
|
"""
|
||||||
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||||
mobj = re.search(pattern, string, flags)
|
mobj = re.search(pattern, string, flags)
|
||||||
@@ -248,7 +252,7 @@ class InfoExtractor(object):
|
|||||||
elif default is not None:
|
elif default is not None:
|
||||||
return default
|
return default
|
||||||
elif fatal:
|
elif fatal:
|
||||||
raise ExtractorError(u'Unable to extract %s' % _name)
|
raise RegexNotFoundError(u'Unable to extract %s' % _name)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(u'unable to extract %s; '
|
self._downloader.report_warning(u'unable to extract %s; '
|
||||||
u'please report this issue on http://yt-dl.org/bug' % _name)
|
u'please report this issue on http://yt-dl.org/bug' % _name)
|
||||||
@@ -365,7 +369,7 @@ class SearchInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
raise NotImplementedError("This method must be implemented by sublclasses")
|
raise NotImplementedError("This method must be implemented by subclasses")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def SEARCH_KEY(self):
|
def SEARCH_KEY(self):
|
||||||
|
|||||||
@@ -28,6 +28,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
|
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
|
||||||
IE_NAME = u'dailymotion'
|
IE_NAME = u'dailymotion'
|
||||||
|
|
||||||
|
_FORMATS = [
|
||||||
|
(u'stream_h264_ld_url', u'ld'),
|
||||||
|
(u'stream_h264_url', u'standard'),
|
||||||
|
(u'stream_h264_hq_url', u'hq'),
|
||||||
|
(u'stream_h264_hd_url', u'hd'),
|
||||||
|
(u'stream_h264_hd1080_url', u'hd180'),
|
||||||
|
]
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
||||||
@@ -60,7 +69,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
||||||
|
|
||||||
video_extension = 'mp4'
|
|
||||||
url = 'http://www.dailymotion.com/video/%s' % video_id
|
url = 'http://www.dailymotion.com/video/%s' % video_id
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
@@ -99,18 +107,24 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
# TODO: support choosing qualities
|
formats = []
|
||||||
|
for (key, format_id) in self._FORMATS:
|
||||||
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
|
video_url = info.get(key)
|
||||||
'stream_h264_hq_url','stream_h264_url',
|
if video_url is not None:
|
||||||
'stream_h264_ld_url']:
|
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
||||||
if info.get(key):#key in info and info[key]:
|
if m_size is not None:
|
||||||
max_quality = key
|
width, height = m_size.group(1), m_size.group(2)
|
||||||
self.to_screen(u'Using %s' % key)
|
else:
|
||||||
break
|
width, height = None, None
|
||||||
else:
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': format_id,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
if not formats:
|
||||||
raise ExtractorError(u'Unable to extract video URL')
|
raise ExtractorError(u'Unable to extract video URL')
|
||||||
video_url = info[max_quality]
|
|
||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id)
|
video_subtitles = self.extract_subtitles(video_id)
|
||||||
@@ -120,11 +134,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'ext': video_extension,
|
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'thumbnail': info['thumbnail_url']
|
'thumbnail': info['thumbnail_url']
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -142,12 +142,19 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded Vimeo player
|
# Look for embedded Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage)
|
r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
player_url = unescapeHTML(mobj.group(1))
|
player_url = unescapeHTML(mobj.group(1))
|
||||||
surl = smuggle_url(player_url, {'Referer': url})
|
surl = smuggle_url(player_url, {'Referer': url})
|
||||||
return self.url_result(surl, 'Vimeo')
|
return self.url_result(surl, 'Vimeo')
|
||||||
|
|
||||||
|
# Look for embedded YouTube player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage)
|
||||||
|
if mobj:
|
||||||
|
surl = unescapeHTML(mobj.group(1))
|
||||||
|
return self.url_result(surl, 'Youtube')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
|||||||
@@ -41,9 +41,9 @@ class GooglePlusIE(InfoExtractor):
|
|||||||
|
|
||||||
# Extract update date
|
# Extract update date
|
||||||
upload_date = self._html_search_regex(
|
upload_date = self._html_search_regex(
|
||||||
r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*>
|
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
||||||
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
||||||
webpage, u'upload date', fatal=False)
|
webpage, u'upload date', fatal=False, flags=re.VERBOSE)
|
||||||
if upload_date:
|
if upload_date:
|
||||||
# Convert timestring to a format suitable for filename
|
# Convert timestring to a format suitable for filename
|
||||||
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
|
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'SKYFALL',
|
u'title': u'SKYFALL',
|
||||||
u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
||||||
u'duration': 156,
|
u'duration': 153,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,7 +74,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
formats = sorted(formats, key=lambda f: f['bitrate'])
|
formats = sorted(formats, key=lambda f: f['bitrate'])
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': item.find('title').text,
|
'title': item.find('title').text,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@@ -82,6 +82,3 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
'description': item.find('description').text,
|
'description': item.find('description').text,
|
||||||
'duration': int(attr['duration']),
|
'duration': int(attr['duration']),
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
return info
|
|
||||||
|
|||||||
@@ -90,8 +90,8 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
|
|||||||
r'{statusIndex:0,index:0,.*?id:(.*?),'],
|
r'{statusIndex:0,index:0,.*?id:(.*?),'],
|
||||||
webpage, u'category id')
|
webpage, u'category id')
|
||||||
playlist_title = self._html_search_regex(
|
playlist_title = self._html_search_regex(
|
||||||
r'\?catid=%s">(.*?)</a>' % cat_id,
|
r'tab0"[^>]*?>(.*?)</td>',
|
||||||
webpage, u'playlist title', flags=re.DOTALL)
|
webpage, u'playlist title', flags=re.DOTALL).lower().capitalize()
|
||||||
|
|
||||||
data = compat_urllib_parse.urlencode({
|
data = compat_urllib_parse.urlencode({
|
||||||
'cid': cat_id,
|
'cid': cat_id,
|
||||||
|
|||||||
@@ -20,7 +20,10 @@ class NowVideoIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage_url = 'http://www.nowvideo.ch/video/' + video_id
|
webpage_url = 'http://www.nowvideo.ch/video/' + video_id
|
||||||
|
embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
|
||||||
webpage = self._download_webpage(webpage_url, video_id)
|
webpage = self._download_webpage(webpage_url, video_id)
|
||||||
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
|
u'Downloading embed page')
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
@@ -28,7 +31,7 @@ class NowVideoIE(InfoExtractor):
|
|||||||
webpage, u'video title')
|
webpage, u'video title')
|
||||||
|
|
||||||
video_key = self._search_regex(r'var fkzd="(.*)";',
|
video_key = self._search_regex(r'var fkzd="(.*)";',
|
||||||
webpage, u'video key')
|
embed_page, u'video key')
|
||||||
|
|
||||||
api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
|
api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
|
||||||
api_response = self._download_webpage(api_call, video_id,
|
api_response = self._download_webpage(api_call, video_id,
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ class RedTubeIE(InfoExtractor):
|
|||||||
u'file': u'66418.mp4',
|
u'file': u'66418.mp4',
|
||||||
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Sucked on a toilet"
|
u"title": u"Sucked on a toilet",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
58
youtube_dl/extractor/rutube.py
Normal file
58
youtube_dl/extractor/rutube.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
compat_str,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RutubeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||||
|
u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Раненный кенгуру забежал в аптеку',
|
||||||
|
u'uploader': u'NTDRussian',
|
||||||
|
u'uploader_id': u'29790',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# It requires ffmpeg (m3u8 download)
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_api_response(self, short_id, subpath):
|
||||||
|
api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)
|
||||||
|
response_json = self._download_webpage(api_url, short_id,
|
||||||
|
u'Downloading %s json' % subpath)
|
||||||
|
return json.loads(response_json)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
long_id = mobj.group('long_id')
|
||||||
|
webpage = self._download_webpage(url, long_id)
|
||||||
|
og_video = self._og_search_video_url(webpage)
|
||||||
|
short_id = compat_urlparse.urlparse(og_video).path[1:]
|
||||||
|
options = self._get_api_response(short_id, 'options')
|
||||||
|
trackinfo = self._get_api_response(short_id, 'trackinfo')
|
||||||
|
# Some videos don't have the author field
|
||||||
|
author = trackinfo.get('author') or {}
|
||||||
|
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
||||||
|
if m3u8_url is None:
|
||||||
|
raise ExtractorError(u'Couldn\'t find m3u8 manifest url')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': trackinfo['id'],
|
||||||
|
'title': trackinfo['title'],
|
||||||
|
'url': m3u8_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': options['thumbnail_url'],
|
||||||
|
'uploader': author.get('name'),
|
||||||
|
'uploader_id': compat_str(author['id']) if author else None,
|
||||||
|
}
|
||||||
65
youtube_dl/extractor/techtalks.py
Normal file
65
youtube_dl/extractor/techtalks.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
get_element_by_attribute,
|
||||||
|
clean_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TechTalksIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||||
|
u'playlist': [
|
||||||
|
{
|
||||||
|
u'file': u'57758.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'file': u'57758-slides.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
talk_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, talk_id)
|
||||||
|
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
|
||||||
|
u'rtmp url')
|
||||||
|
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||||
|
webpage, u'presenter play path')
|
||||||
|
title = clean_html(get_element_by_attribute('class', 'title', webpage))
|
||||||
|
video_info = {
|
||||||
|
'id': talk_id,
|
||||||
|
'title': title,
|
||||||
|
'url': rtmp_url,
|
||||||
|
'play_path': play_path,
|
||||||
|
'ext': 'flv',
|
||||||
|
}
|
||||||
|
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
|
||||||
|
if m_slides is None:
|
||||||
|
return video_info
|
||||||
|
else:
|
||||||
|
return [
|
||||||
|
video_info,
|
||||||
|
# The slides video
|
||||||
|
{
|
||||||
|
'id': talk_id + '-slides',
|
||||||
|
'title': title,
|
||||||
|
'url': rtmp_url,
|
||||||
|
'play_path': m_slides.group(1),
|
||||||
|
'ext': 'flv',
|
||||||
|
},
|
||||||
|
]
|
||||||
@@ -48,7 +48,8 @@ class TudouIE(InfoExtractor):
|
|||||||
'ie_key': 'Youku'
|
'ie_key': 'Youku'
|
||||||
}
|
}
|
||||||
|
|
||||||
title = self._search_regex(r",kw:['\"](.+?)[\"']", webpage, u'title')
|
title = self._search_regex(
|
||||||
|
r",kw:\s*['\"](.+?)[\"']", webpage, u'title')
|
||||||
thumbnail_url = self._search_regex(
|
thumbnail_url = self._search_regex(
|
||||||
r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)
|
r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ class VideoDetectiveIE(InfoExtractor):
|
|||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'KICK-ASS 2',
|
u'title': u'KICK-ASS 2',
|
||||||
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
|
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
|
||||||
u'duration': 138,
|
u'duration': 135,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
# encoding: utf-8
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import itertools
|
import itertools
|
||||||
@@ -10,6 +11,7 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
RegexNotFoundError,
|
||||||
std_headers,
|
std_headers,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
@@ -25,7 +27,7 @@ class VimeoIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
u'url': u'http://vimeo.com/56015672',
|
u'url': u'http://vimeo.com/56015672',
|
||||||
u'file': u'56015672.mp4',
|
u'file': u'56015672.mp4',
|
||||||
u'md5': u'8879b6cc097e987f02484baf890129e5',
|
u'md5': u'ae7a1d8b183758a0506b0622f37dfa14',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"upload_date": u"20121220",
|
u"upload_date": u"20121220",
|
||||||
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||||
@@ -54,7 +56,22 @@ class VimeoIE(InfoExtractor):
|
|||||||
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
|
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
|
||||||
u'uploader': u'The BLN & Business of Software',
|
u'uploader': u'The BLN & Business of Software',
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://vimeo.com/68375962',
|
||||||
|
u'file': u'68375962.mp4',
|
||||||
|
u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7',
|
||||||
|
u'note': u'Video protected with password',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'youtube-dl password protected test video',
|
||||||
|
u'upload_date': u'20130614',
|
||||||
|
u'uploader_id': u'user18948128',
|
||||||
|
u'uploader': u'Jaime Marquínez Ferrándiz',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'videopassword': u'youtube-dl',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@@ -129,18 +146,26 @@ class VimeoIE(InfoExtractor):
|
|||||||
|
|
||||||
# Extract the config JSON
|
# Extract the config JSON
|
||||||
try:
|
try:
|
||||||
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
|
try:
|
||||||
webpage, u'info section', flags=re.DOTALL)
|
config_url = self._html_search_regex(
|
||||||
config = json.loads(config)
|
r' data-config-url="(.+?)"', webpage, u'config URL')
|
||||||
except:
|
config_json = self._download_webpage(config_url, video_id)
|
||||||
|
config = json.loads(config_json)
|
||||||
|
except RegexNotFoundError:
|
||||||
|
# For pro videos or player.vimeo.com urls
|
||||||
|
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
|
||||||
|
webpage, u'info section', flags=re.DOTALL)
|
||||||
|
config = json.loads(config)
|
||||||
|
except Exception as e:
|
||||||
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
||||||
raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
|
raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
|
||||||
|
|
||||||
if re.search('If so please provide the correct password.', webpage):
|
if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
|
||||||
self._verify_video_password(url, video_id, webpage)
|
self._verify_video_password(url, video_id, webpage)
|
||||||
return self._real_extract(url)
|
return self._real_extract(url)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'Unable to extract info section')
|
raise ExtractorError(u'Unable to extract info section',
|
||||||
|
cause=e)
|
||||||
|
|
||||||
# Extract title
|
# Extract title
|
||||||
video_title = config["video"]["title"]
|
video_title = config["video"]["title"]
|
||||||
@@ -179,46 +204,45 @@ class VimeoIE(InfoExtractor):
|
|||||||
|
|
||||||
# Vimeo specific: extract video codec and quality information
|
# Vimeo specific: extract video codec and quality information
|
||||||
# First consider quality, then codecs, then take everything
|
# First consider quality, then codecs, then take everything
|
||||||
# TODO bind to format param
|
codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
|
||||||
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
|
|
||||||
files = { 'hd': [], 'sd': [], 'other': []}
|
files = { 'hd': [], 'sd': [], 'other': []}
|
||||||
config_files = config["video"].get("files") or config["request"].get("files")
|
config_files = config["video"].get("files") or config["request"].get("files")
|
||||||
for codec_name, codec_extension in codecs:
|
for codec_name, codec_extension in codecs:
|
||||||
if codec_name in config_files:
|
for quality in config_files.get(codec_name, []):
|
||||||
if 'hd' in config_files[codec_name]:
|
format_id = '-'.join((codec_name, quality)).lower()
|
||||||
files['hd'].append((codec_name, codec_extension, 'hd'))
|
key = quality if quality in files else 'other'
|
||||||
elif 'sd' in config_files[codec_name]:
|
video_url = None
|
||||||
files['sd'].append((codec_name, codec_extension, 'sd'))
|
if isinstance(config_files[codec_name], dict):
|
||||||
|
file_info = config_files[codec_name][quality]
|
||||||
|
video_url = file_info.get('url')
|
||||||
else:
|
else:
|
||||||
files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
|
file_info = {}
|
||||||
|
if video_url is None:
|
||||||
|
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
|
||||||
|
%(video_id, sig, timestamp, quality, codec_name.upper())
|
||||||
|
|
||||||
for quality in ('hd', 'sd', 'other'):
|
files[key].append({
|
||||||
if len(files[quality]) > 0:
|
'ext': codec_extension,
|
||||||
video_quality = files[quality][0][2]
|
'url': video_url,
|
||||||
video_codec = files[quality][0][0]
|
'format_id': format_id,
|
||||||
video_extension = files[quality][0][1]
|
'width': file_info.get('width'),
|
||||||
self.to_screen(u'%s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
|
'height': file_info.get('height'),
|
||||||
break
|
})
|
||||||
else:
|
formats = []
|
||||||
|
for key in ('other', 'sd', 'hd'):
|
||||||
|
formats += files[key]
|
||||||
|
if len(formats) == 0:
|
||||||
raise ExtractorError(u'No known codec found')
|
raise ExtractorError(u'No known codec found')
|
||||||
|
|
||||||
video_url = None
|
|
||||||
if isinstance(config_files[video_codec], dict):
|
|
||||||
video_url = config_files[video_codec][video_quality].get("url")
|
|
||||||
if video_url is None:
|
|
||||||
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
|
|
||||||
%(video_id, sig, timestamp, video_quality, video_codec.upper())
|
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension,
|
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
|
'formats': formats,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,8 @@ class XHamsterIE(InfoExtractor):
|
|||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"upload_date": u"20121014",
|
u"upload_date": u"20121014",
|
||||||
u"uploader_id": u"Ruseful2011",
|
u"uploader_id": u"Ruseful2011",
|
||||||
u"title": u"FemaleAgent Shy beauty takes the bait"
|
u"title": u"FemaleAgent Shy beauty takes the bait",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -29,7 +30,8 @@ class XHamsterIE(InfoExtractor):
|
|||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"upload_date": u"20130914",
|
u"upload_date": u"20130914",
|
||||||
u"uploader_id": u"jojo747400",
|
u"uploader_id": u"jojo747400",
|
||||||
u"title": u"Britney Spears Sexy Booty"
|
u"title": u"Britney Spears Sexy Booty",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@@ -72,6 +74,8 @@ class XHamsterIE(InfoExtractor):
|
|||||||
video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
|
video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
|
||||||
webpage, u'thumbnail', fatal=False)
|
webpage, u'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@@ -80,5 +84,6 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'description': video_description,
|
'description': video_description,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
'thumbnail': video_thumbnail
|
'thumbnail': video_thumbnail,
|
||||||
|
'age_limit': age_limit,
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -18,7 +18,8 @@ class XNXXIE(InfoExtractor):
|
|||||||
u'file': u'1135332.flv',
|
u'file': u'1135332.flv',
|
||||||
u'md5': u'0831677e2b4761795f68d417e0b7b445',
|
u'md5': u'0831677e2b4761795f68d417e0b7b445',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"lida \u00bb Naked Funny Actress (5)"
|
u"title": u"lida \u00bb Naked Funny Actress (5)",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -50,4 +51,5 @@ class XNXXIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': None,
|
'description': None,
|
||||||
|
'age_limit': 18,
|
||||||
}]
|
}]
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ class XVideosIE(InfoExtractor):
|
|||||||
u'file': u'939581.flv',
|
u'file': u'939581.flv',
|
||||||
u'md5': u'1d0c835822f0a71a7bf011855db929d0',
|
u'md5': u'1d0c835822f0a71a7bf011855db929d0',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Funny Porns By >>>>S<<<<<< -1"
|
u"title": u"Funny Porns By >>>>S<<<<<< -1",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -46,6 +47,7 @@ class XVideosIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': None,
|
'description': None,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
return [info]
|
return [info]
|
||||||
|
|||||||
@@ -26,7 +26,8 @@ class YouPornIE(InfoExtractor):
|
|||||||
u"upload_date": u"20101221",
|
u"upload_date": u"20101221",
|
||||||
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
|
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
|
||||||
u"uploader": u"Ask Dan And Jennifer",
|
u"uploader": u"Ask Dan And Jennifer",
|
||||||
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?"
|
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
|
||||||
|
u"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -236,11 +236,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'136': 'mp4',
|
'136': 'mp4',
|
||||||
'137': 'mp4',
|
'137': 'mp4',
|
||||||
'138': 'mp4',
|
'138': 'mp4',
|
||||||
'139': 'mp4',
|
|
||||||
'140': 'mp4',
|
|
||||||
'141': 'mp4',
|
|
||||||
'160': 'mp4',
|
'160': 'mp4',
|
||||||
|
|
||||||
|
# Dash mp4 audio
|
||||||
|
'139': 'm4a',
|
||||||
|
'140': 'm4a',
|
||||||
|
'141': 'm4a',
|
||||||
|
|
||||||
# Dash webm
|
# Dash webm
|
||||||
'171': 'webm',
|
'171': 'webm',
|
||||||
'172': 'webm',
|
'172': 'webm',
|
||||||
@@ -1150,7 +1152,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
list_page = self._download_webpage(list_url, video_id)
|
list_page = self._download_webpage(list_url, video_id)
|
||||||
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
|
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
|
||||||
original_lang_node = caption_list.find('track')
|
original_lang_node = caption_list.find('track')
|
||||||
if original_lang_node.attrib.get('kind') != 'asr' :
|
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
||||||
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
||||||
return {}
|
return {}
|
||||||
original_lang = original_lang_node.attrib['lang_code']
|
original_lang = original_lang_node.attrib['lang_code']
|
||||||
|
|||||||
@@ -572,6 +572,11 @@ class ExtractorError(Exception):
|
|||||||
return u''.join(traceback.format_tb(self.traceback))
|
return u''.join(traceback.format_tb(self.traceback))
|
||||||
|
|
||||||
|
|
||||||
|
class RegexNotFoundError(ExtractorError):
|
||||||
|
"""Error when a regex didn't match"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class DownloadError(Exception):
|
class DownloadError(Exception):
|
||||||
"""Download Error exception.
|
"""Download Error exception.
|
||||||
|
|
||||||
@@ -947,6 +952,15 @@ def shell_quote(args):
|
|||||||
return ' '.join(map(pipes.quote, args))
|
return ' '.join(map(pipes.quote, args))
|
||||||
|
|
||||||
|
|
||||||
|
def takewhile_inclusive(pred, seq):
|
||||||
|
""" Like itertools.takewhile, but include the latest evaluated element
|
||||||
|
(the first element so that Not pred(e)) """
|
||||||
|
for e in seq:
|
||||||
|
yield e
|
||||||
|
if not pred(e):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def smuggle_url(url, data):
|
def smuggle_url(url, data):
|
||||||
""" Pass additional data in a URL for internal use. """
|
""" Pass additional data in a URL for internal use. """
|
||||||
|
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.10.15'
|
__version__ = '2013.10.23.2'
|
||||||
|
|||||||
Reference in New Issue
Block a user