Compare commits
47 Commits
2013.12.11
...
2013.12.16
Author | SHA1 | Date | |
---|---|---|---|
![]() |
24050dd11c | ||
![]() |
8c8e3eec79 | ||
![]() |
7ebc9dee69 | ||
![]() |
ee3e63e477 | ||
![]() |
e9c424c144 | ||
![]() |
0a9ce268ba | ||
![]() |
4b2da48ea7 | ||
![]() |
e64eaaa97d | ||
![]() |
780603027f | ||
![]() |
00902cd601 | ||
![]() |
d67b0b1596 | ||
![]() |
d7dda16888 | ||
![]() |
a19fd00cc4 | ||
![]() |
d66152a898 | ||
![]() |
8c5f0c9fbc | ||
![]() |
6888a874a1 | ||
![]() |
09dacfa57f | ||
![]() |
b2ae513586 | ||
![]() |
e4a0489f6e | ||
![]() |
b83be81d27 | ||
![]() |
6f5dcd4eee | ||
![]() |
1bb2fc98e0 | ||
![]() |
e3946f989e | ||
![]() |
8863d0de91 | ||
![]() |
7b6fefc9d4 | ||
![]() |
525ef9227f | ||
![]() |
c0ba0f4859 | ||
![]() |
b466b7029d | ||
![]() |
fa3ae234e0 | ||
![]() |
48462108f3 | ||
![]() |
f8b56e95b8 | ||
![]() |
5fe18bdbde | ||
![]() |
dca02c80bc | ||
![]() |
9ee859b683 | ||
![]() |
8e05c870b4 | ||
![]() |
5d574e143f | ||
![]() |
2a203a6cda | ||
![]() |
dadb8184e4 | ||
![]() |
7a563df90a | ||
![]() |
24b173fa5c | ||
![]() |
9b17ba0fa5 | ||
![]() |
211f555d4c | ||
![]() |
4d2ebb6bd7 | ||
![]() |
df53747436 | ||
![]() |
f2c36ee43e | ||
![]() |
00381b4ccb | ||
![]() |
df1d7da2af |
@@ -56,6 +56,10 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--date DATE download only videos uploaded in this date
|
--date DATE download only videos uploaded in this date
|
||||||
--datebefore DATE download only videos uploaded before this date
|
--datebefore DATE download only videos uploaded before this date
|
||||||
--dateafter DATE download only videos uploaded after this date
|
--dateafter DATE download only videos uploaded after this date
|
||||||
|
--min-views COUNT Do not download any videos with less than COUNT
|
||||||
|
views
|
||||||
|
--max-views COUNT Do not download any videos with more than COUNT
|
||||||
|
views
|
||||||
--no-playlist download only the currently playing video
|
--no-playlist download only the currently playing video
|
||||||
--age-limit YEARS download only videos suitable for the given age
|
--age-limit YEARS download only videos suitable for the given age
|
||||||
--download-archive FILE Download only videos not listed in the archive
|
--download-archive FILE Download only videos not listed in the archive
|
||||||
@@ -127,6 +131,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--get-id simulate, quiet but print id
|
--get-id simulate, quiet but print id
|
||||||
--get-thumbnail simulate, quiet but print thumbnail URL
|
--get-thumbnail simulate, quiet but print thumbnail URL
|
||||||
--get-description simulate, quiet but print video description
|
--get-description simulate, quiet but print video description
|
||||||
|
--get-duration simulate, quiet but print video length
|
||||||
--get-filename simulate, quiet but print output filename
|
--get-filename simulate, quiet but print output filename
|
||||||
--get-format simulate, quiet but print output format
|
--get-format simulate, quiet but print output format
|
||||||
-j, --dump-json simulate, quiet but print JSON information
|
-j, --dump-json simulate, quiet but print JSON information
|
||||||
|
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
from test.helper import get_testcases
|
from test.helper import get_testcases
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
|
FacebookIE,
|
||||||
gen_extractors,
|
gen_extractors,
|
||||||
JustinTVIE,
|
JustinTVIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
@@ -87,12 +88,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
|
def test_facebook_matching(self):
|
||||||
|
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
for tc in get_testcases():
|
for tc in get_testcases():
|
||||||
url = tc['url']
|
url = tc['url']
|
||||||
for ie in ies:
|
for ie in ies:
|
||||||
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
|
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
||||||
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
|
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
|
||||||
else:
|
else:
|
||||||
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
||||||
|
@@ -34,6 +34,7 @@ from .utils import (
|
|||||||
encodeFilename,
|
encodeFilename,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
formatSeconds,
|
||||||
get_term_width,
|
get_term_width,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
@@ -94,6 +95,7 @@ class YoutubeDL(object):
|
|||||||
forcethumbnail: Force printing thumbnail URL.
|
forcethumbnail: Force printing thumbnail URL.
|
||||||
forcedescription: Force printing description.
|
forcedescription: Force printing description.
|
||||||
forcefilename: Force printing final filename.
|
forcefilename: Force printing final filename.
|
||||||
|
forceduration: Force printing duration.
|
||||||
forcejson: Force printing info_dict as JSON.
|
forcejson: Force printing info_dict as JSON.
|
||||||
simulate: Do not download the video files.
|
simulate: Do not download the video files.
|
||||||
format: Video format code.
|
format: Video format code.
|
||||||
@@ -127,7 +129,16 @@ class YoutubeDL(object):
|
|||||||
noplaylist: Download single video instead of a playlist if in doubt.
|
noplaylist: Download single video instead of a playlist if in doubt.
|
||||||
age_limit: An integer representing the user's age in years.
|
age_limit: An integer representing the user's age in years.
|
||||||
Unsuitable videos for the given age are skipped.
|
Unsuitable videos for the given age are skipped.
|
||||||
download_archive: File name of a file where all downloads are recorded.
|
min_views: An integer representing the minimum view count the video
|
||||||
|
must have in order to not be skipped.
|
||||||
|
Videos without view count information are always
|
||||||
|
downloaded. None for no limit.
|
||||||
|
max_views: An integer representing the maximum view count.
|
||||||
|
Videos that are more popular than that are not
|
||||||
|
downloaded.
|
||||||
|
Videos without view count information are always
|
||||||
|
downloaded. None for no limit.
|
||||||
|
download_archive: File name of a file where all downloads are recorded.
|
||||||
Videos already present in the file are not downloaded
|
Videos already present in the file are not downloaded
|
||||||
again.
|
again.
|
||||||
cookiefile: File name where cookies should be read from and dumped to.
|
cookiefile: File name where cookies should be read from and dumped to.
|
||||||
@@ -355,22 +366,6 @@ class YoutubeDL(object):
|
|||||||
error_message = u'%s %s' % (_msg_header, message)
|
error_message = u'%s %s' % (_msg_header, message)
|
||||||
self.trouble(error_message, tb)
|
self.trouble(error_message, tb)
|
||||||
|
|
||||||
def report_writedescription(self, descfn):
|
|
||||||
""" Report that the description file is being written """
|
|
||||||
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
|
||||||
|
|
||||||
def report_writesubtitles(self, sub_filename):
|
|
||||||
""" Report that the subtitles file is being written """
|
|
||||||
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
|
|
||||||
|
|
||||||
def report_writeinfojson(self, infofn):
|
|
||||||
""" Report that the metadata file has been written """
|
|
||||||
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
|
|
||||||
|
|
||||||
def report_writeannotations(self, annofn):
|
|
||||||
""" Report that the annotations file has been written. """
|
|
||||||
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
|
|
||||||
|
|
||||||
def report_file_already_downloaded(self, file_name):
|
def report_file_already_downloaded(self, file_name):
|
||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
try:
|
try:
|
||||||
@@ -415,13 +410,14 @@ class YoutubeDL(object):
|
|||||||
def _match_entry(self, info_dict):
|
def _match_entry(self, info_dict):
|
||||||
""" Returns None iff the file should be downloaded """
|
""" Returns None iff the file should be downloaded """
|
||||||
|
|
||||||
|
video_title = info_dict.get('title', info_dict.get('id', u'video'))
|
||||||
if 'title' in info_dict:
|
if 'title' in info_dict:
|
||||||
# This can happen when we're just evaluating the playlist
|
# This can happen when we're just evaluating the playlist
|
||||||
title = info_dict['title']
|
title = info_dict['title']
|
||||||
matchtitle = self.params.get('matchtitle', False)
|
matchtitle = self.params.get('matchtitle', False)
|
||||||
if matchtitle:
|
if matchtitle:
|
||||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||||
rejecttitle = self.params.get('rejecttitle', False)
|
rejecttitle = self.params.get('rejecttitle', False)
|
||||||
if rejecttitle:
|
if rejecttitle:
|
||||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||||
@@ -430,14 +426,21 @@ class YoutubeDL(object):
|
|||||||
if date is not None:
|
if date is not None:
|
||||||
dateRange = self.params.get('daterange', DateRange())
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
if date not in dateRange:
|
if date not in dateRange:
|
||||||
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||||
|
view_count = info_dict.get('view_count', None)
|
||||||
|
if view_count is not None:
|
||||||
|
min_views = self.params.get('min_views')
|
||||||
|
if min_views is not None and view_count < min_views:
|
||||||
|
return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
|
||||||
|
max_views = self.params.get('max_views')
|
||||||
|
if max_views is not None and view_count > max_views:
|
||||||
|
return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
age_limit = self.params.get('age_limit')
|
age_limit = self.params.get('age_limit')
|
||||||
if age_limit is not None:
|
if age_limit is not None:
|
||||||
if age_limit < info_dict.get('age_limit', 0):
|
if age_limit < info_dict.get('age_limit', 0):
|
||||||
return u'Skipping "' + title + '" because it is age restricted'
|
return u'Skipping "' + title + '" because it is age restricted'
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return (u'%s has already been recorded in archive'
|
return u'%s has already been recorded in archive' % video_title
|
||||||
% info_dict.get('title', info_dict.get('id', u'video')))
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -554,16 +557,16 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
n_all_entries = len(ie_result['entries'])
|
n_all_entries = len(ie_result['entries'])
|
||||||
playliststart = self.params.get('playliststart', 1) - 1
|
playliststart = self.params.get('playliststart', 1) - 1
|
||||||
playlistend = self.params.get('playlistend', -1)
|
playlistend = self.params.get('playlistend', None)
|
||||||
|
# For backwards compatibility, interpret -1 as whole list
|
||||||
if playlistend == -1:
|
if playlistend == -1:
|
||||||
entries = ie_result['entries'][playliststart:]
|
playlistend = None
|
||||||
else:
|
|
||||||
entries = ie_result['entries'][playliststart:playlistend]
|
|
||||||
|
|
||||||
|
entries = ie_result['entries'][playliststart:playlistend]
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
|
|
||||||
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
self.to_screen(
|
||||||
|
u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
||||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||||
|
|
||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
@@ -748,6 +751,8 @@ class YoutubeDL(object):
|
|||||||
self.to_stdout(info_dict['description'])
|
self.to_stdout(info_dict['description'])
|
||||||
if self.params.get('forcefilename', False) and filename is not None:
|
if self.params.get('forcefilename', False) and filename is not None:
|
||||||
self.to_stdout(filename)
|
self.to_stdout(filename)
|
||||||
|
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||||
|
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||||
if self.params.get('forceformat', False):
|
if self.params.get('forceformat', False):
|
||||||
self.to_stdout(info_dict['format'])
|
self.to_stdout(info_dict['format'])
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
@@ -770,28 +775,34 @@ class YoutubeDL(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writedescription', False):
|
if self.params.get('writedescription', False):
|
||||||
try:
|
descfn = filename + u'.description'
|
||||||
descfn = filename + u'.description'
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
||||||
self.report_writedescription(descfn)
|
self.to_screen(u'[info] Video description is already present')
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
else:
|
||||||
descfile.write(info_dict['description'])
|
try:
|
||||||
except (KeyError, TypeError):
|
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
||||||
self.report_warning(u'There\'s no description to write.')
|
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
except (OSError, IOError):
|
descfile.write(info_dict['description'])
|
||||||
self.report_error(u'Cannot write description file ' + descfn)
|
except (KeyError, TypeError):
|
||||||
return
|
self.report_warning(u'There\'s no description to write.')
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error(u'Cannot write description file ' + descfn)
|
||||||
|
return
|
||||||
|
|
||||||
if self.params.get('writeannotations', False):
|
if self.params.get('writeannotations', False):
|
||||||
try:
|
annofn = filename + u'.annotations.xml'
|
||||||
annofn = filename + u'.annotations.xml'
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
||||||
self.report_writeannotations(annofn)
|
self.to_screen(u'[info] Video annotations are already present')
|
||||||
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
else:
|
||||||
annofile.write(info_dict['annotations'])
|
try:
|
||||||
except (KeyError, TypeError):
|
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
|
||||||
self.report_warning(u'There are no annotations to write.')
|
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
||||||
except (OSError, IOError):
|
annofile.write(info_dict['annotations'])
|
||||||
self.report_error(u'Cannot write annotations file: ' + annofn)
|
except (KeyError, TypeError):
|
||||||
return
|
self.report_warning(u'There are no annotations to write.')
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error(u'Cannot write annotations file: ' + annofn)
|
||||||
|
return
|
||||||
|
|
||||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||||
self.params.get('writeautomaticsub')])
|
self.params.get('writeautomaticsub')])
|
||||||
@@ -807,38 +818,48 @@ class YoutubeDL(object):
|
|||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||||
self.report_writesubtitles(sub_filename)
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||||
subfile.write(sub)
|
else:
|
||||||
|
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
|
||||||
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||||
|
subfile.write(sub)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
||||||
self.report_writeinfojson(infofn)
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
|
||||||
try:
|
self.to_screen(u'[info] Video description metadata is already present')
|
||||||
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
else:
|
||||||
write_json_file(json_info_dict, encodeFilename(infofn))
|
self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
|
||||||
except (OSError, IOError):
|
try:
|
||||||
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
||||||
return
|
write_json_file(json_info_dict, encodeFilename(infofn))
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
||||||
|
return
|
||||||
|
|
||||||
if self.params.get('writethumbnail', False):
|
if self.params.get('writethumbnail', False):
|
||||||
if info_dict.get('thumbnail') is not None:
|
if info_dict.get('thumbnail') is not None:
|
||||||
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
|
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
|
||||||
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
|
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
|
||||||
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||||
(info_dict['extractor'], info_dict['id']))
|
self.to_screen(u'[%s] %s: Thumbnail is already present' %
|
||||||
try:
|
(info_dict['extractor'], info_dict['id']))
|
||||||
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
else:
|
||||||
with open(thumb_filename, 'wb') as thumbf:
|
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
|
||||||
shutil.copyfileobj(uf, thumbf)
|
(info_dict['extractor'], info_dict['id']))
|
||||||
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
|
try:
|
||||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
with open(thumb_filename, 'wb') as thumbf:
|
||||||
self.report_warning(u'Unable to download thumbnail "%s": %s' %
|
shutil.copyfileobj(uf, thumbf)
|
||||||
(info_dict['thumbnail'], compat_str(err)))
|
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
|
||||||
|
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
||||||
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
self.report_warning(u'Unable to download thumbnail "%s": %s' %
|
||||||
|
(info_dict['thumbnail'], compat_str(err)))
|
||||||
|
|
||||||
if not self.params.get('skip_download', False):
|
if not self.params.get('skip_download', False):
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
||||||
|
@@ -37,6 +37,7 @@ __authors__ = (
|
|||||||
'Anton Larionov',
|
'Anton Larionov',
|
||||||
'Takuya Tsuchida',
|
'Takuya Tsuchida',
|
||||||
'Sergey M.',
|
'Sergey M.',
|
||||||
|
'Michael Orlitzky',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
@@ -62,6 +63,7 @@ from .utils import (
|
|||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
|
setproctitle,
|
||||||
std_headers,
|
std_headers,
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
@@ -196,10 +198,14 @@ def parseOpts(overrideArguments=None):
|
|||||||
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
|
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option('--playlist-start',
|
selection.add_option(
|
||||||
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
|
'--playlist-start',
|
||||||
selection.add_option('--playlist-end',
|
dest='playliststart', metavar='NUMBER', default=1, type=int,
|
||||||
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
help='playlist video to start at (default is %default)')
|
||||||
|
selection.add_option(
|
||||||
|
'--playlist-end',
|
||||||
|
dest='playlistend', metavar='NUMBER', default=None, type=int,
|
||||||
|
help='playlist video to end at (default is last)')
|
||||||
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--max-downloads', metavar='NUMBER',
|
selection.add_option('--max-downloads', metavar='NUMBER',
|
||||||
@@ -210,6 +216,14 @@ def parseOpts(overrideArguments=None):
|
|||||||
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||||
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
||||||
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
||||||
|
selection.add_option(
|
||||||
|
'--min-views', metavar='COUNT', dest='min_views',
|
||||||
|
default=None, type=int,
|
||||||
|
help="Do not download any videos with less than COUNT views",)
|
||||||
|
selection.add_option(
|
||||||
|
'--max-views', metavar='COUNT', dest='max_views',
|
||||||
|
default=None, type=int,
|
||||||
|
help="Do not download any videos with more than COUNT views",)
|
||||||
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
|
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
|
||||||
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
|
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
|
||||||
help='download only videos suitable for the given age',
|
help='download only videos suitable for the given age',
|
||||||
@@ -290,6 +304,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
verbosity.add_option('--get-description',
|
verbosity.add_option('--get-description',
|
||||||
action='store_true', dest='getdescription',
|
action='store_true', dest='getdescription',
|
||||||
help='simulate, quiet but print video description', default=False)
|
help='simulate, quiet but print video description', default=False)
|
||||||
|
verbosity.add_option('--get-duration',
|
||||||
|
action='store_true', dest='getduration',
|
||||||
|
help='simulate, quiet but print video length', default=False)
|
||||||
verbosity.add_option('--get-filename',
|
verbosity.add_option('--get-filename',
|
||||||
action='store_true', dest='getfilename',
|
action='store_true', dest='getfilename',
|
||||||
help='simulate, quiet but print output filename', default=False)
|
help='simulate, quiet but print output filename', default=False)
|
||||||
@@ -460,12 +477,15 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
return parser, opts, args
|
return parser, opts, args
|
||||||
|
|
||||||
|
|
||||||
def _real_main(argv=None):
|
def _real_main(argv=None):
|
||||||
# Compatibility fixes for Windows
|
# Compatibility fixes for Windows
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
# https://github.com/rg3/youtube-dl/issues/820
|
# https://github.com/rg3/youtube-dl/issues/820
|
||||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
||||||
|
|
||||||
|
setproctitle(u'youtube-dl')
|
||||||
|
|
||||||
parser, opts, args = parseOpts(argv)
|
parser, opts, args = parseOpts(argv)
|
||||||
|
|
||||||
# Set user agent
|
# Set user agent
|
||||||
@@ -560,18 +580,10 @@ def _real_main(argv=None):
|
|||||||
if numeric_buffersize is None:
|
if numeric_buffersize is None:
|
||||||
parser.error(u'invalid buffer size specified')
|
parser.error(u'invalid buffer size specified')
|
||||||
opts.buffersize = numeric_buffersize
|
opts.buffersize = numeric_buffersize
|
||||||
try:
|
if opts.playliststart <= 0:
|
||||||
opts.playliststart = int(opts.playliststart)
|
raise ValueError(u'Playlist start must be positive')
|
||||||
if opts.playliststart <= 0:
|
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
|
||||||
raise ValueError(u'Playlist start must be positive')
|
raise ValueError(u'Playlist end must be greater than playlist start')
|
||||||
except (TypeError, ValueError):
|
|
||||||
parser.error(u'invalid playlist start number specified')
|
|
||||||
try:
|
|
||||||
opts.playlistend = int(opts.playlistend)
|
|
||||||
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
|
||||||
raise ValueError(u'Playlist end must be greater than playlist start')
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
parser.error(u'invalid playlist end number specified')
|
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||||
parser.error(u'invalid audio format specified')
|
parser.error(u'invalid audio format specified')
|
||||||
@@ -604,27 +616,30 @@ def _real_main(argv=None):
|
|||||||
or (opts.useid and u'%(id)s.%(ext)s')
|
or (opts.useid and u'%(id)s.%(ext)s')
|
||||||
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
||||||
or u'%(title)s-%(id)s.%(ext)s')
|
or u'%(title)s-%(id)s.%(ext)s')
|
||||||
if '%(ext)s' not in outtmpl and opts.extractaudio:
|
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||||
parser.error(u'Cannot download a video and extract audio into the same'
|
parser.error(u'Cannot download a video and extract audio into the same'
|
||||||
u' file! Use "%%(ext)s" instead of %r' %
|
u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||||
determine_ext(outtmpl, u''))
|
u' template'.format(outtmpl))
|
||||||
|
|
||||||
|
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
'username': opts.username,
|
'username': opts.username,
|
||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
'videopassword': opts.videopassword,
|
'videopassword': opts.videopassword,
|
||||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
'quiet': (opts.quiet or any_printing),
|
||||||
'forceurl': opts.geturl,
|
'forceurl': opts.geturl,
|
||||||
'forcetitle': opts.gettitle,
|
'forcetitle': opts.gettitle,
|
||||||
'forceid': opts.getid,
|
'forceid': opts.getid,
|
||||||
'forcethumbnail': opts.getthumbnail,
|
'forcethumbnail': opts.getthumbnail,
|
||||||
'forcedescription': opts.getdescription,
|
'forcedescription': opts.getdescription,
|
||||||
|
'forceduration': opts.getduration,
|
||||||
'forcefilename': opts.getfilename,
|
'forcefilename': opts.getfilename,
|
||||||
'forceformat': opts.getformat,
|
'forceformat': opts.getformat,
|
||||||
'forcejson': opts.dumpjson,
|
'forcejson': opts.dumpjson,
|
||||||
'simulate': opts.simulate,
|
'simulate': opts.simulate,
|
||||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
'skip_download': (opts.skip_download or opts.simulate or any_printing),
|
||||||
'format': opts.format,
|
'format': opts.format,
|
||||||
'format_limit': opts.format_limit,
|
'format_limit': opts.format_limit,
|
||||||
'listformats': opts.listformats,
|
'listformats': opts.listformats,
|
||||||
@@ -668,6 +683,8 @@ def _real_main(argv=None):
|
|||||||
'keepvideo': opts.keepvideo,
|
'keepvideo': opts.keepvideo,
|
||||||
'min_filesize': opts.min_filesize,
|
'min_filesize': opts.min_filesize,
|
||||||
'max_filesize': opts.max_filesize,
|
'max_filesize': opts.max_filesize,
|
||||||
|
'min_views': opts.min_views,
|
||||||
|
'max_views': opts.max_views,
|
||||||
'daterange': date,
|
'daterange': date,
|
||||||
'cachedir': opts.cachedir,
|
'cachedir': opts.cachedir,
|
||||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
|
@@ -13,6 +13,7 @@ from .arte import (
|
|||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
@@ -20,6 +21,8 @@ from .brightcove import BrightcoveIE
|
|||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
|
from .cbs import CBSIE
|
||||||
|
from .channel9 import Channel9IE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
@@ -87,6 +90,7 @@ from .kickstarter import KickStarterIE
|
|||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||||
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mit import TechTVMITIE, MITIE
|
from .mit import TechTVMITIE, MITIE
|
||||||
@@ -111,6 +115,7 @@ from .orf import ORFIE
|
|||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import PornHubIE
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
|
86
youtube_dl/extractor/blinkx.py
Normal file
86
youtube_dl/extractor/blinkx.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BlinkxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
|
||||||
|
_IE_NAME = u'blinkx'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
|
||||||
|
u'file': u'8aQUy7GV.mp4',
|
||||||
|
u'md5': u'2e9a07364af40163a908edbf10bb2492',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Police Car Rolls Away",
|
||||||
|
u"uploader": u"stupidvideos.com",
|
||||||
|
u"upload_date": u"20131215",
|
||||||
|
u"description": u"A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
|
||||||
|
u"duration": 14.886,
|
||||||
|
u"thumbnails": [{
|
||||||
|
"width": 100,
|
||||||
|
"height": 76,
|
||||||
|
"url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('id')
|
||||||
|
display_id = video_id[:8]
|
||||||
|
|
||||||
|
api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||||
|
u'video=%s' % video_id)
|
||||||
|
data_json = self._download_webpage(api_url, display_id)
|
||||||
|
data = json.loads(data_json)['api']['results'][0]
|
||||||
|
dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
|
||||||
|
upload_date = dt.strftime('%Y%m%d')
|
||||||
|
|
||||||
|
duration = None
|
||||||
|
thumbnails = []
|
||||||
|
formats = []
|
||||||
|
for m in data['media']:
|
||||||
|
if m['type'] == 'jpg':
|
||||||
|
thumbnails.append({
|
||||||
|
'url': m['link'],
|
||||||
|
'width': int(m['w']),
|
||||||
|
'height': int(m['h']),
|
||||||
|
})
|
||||||
|
elif m['type'] == 'original':
|
||||||
|
duration = m['d']
|
||||||
|
elif m['type'] in ('flv', 'mp4'):
|
||||||
|
vcodec = remove_start(m['vcodec'], 'ff')
|
||||||
|
acodec = remove_start(m['acodec'], 'ff')
|
||||||
|
format_id = (u'%s-%sk-%s' %
|
||||||
|
(vcodec,
|
||||||
|
(int(m['vbr']) + int(m['abr'])) // 1000,
|
||||||
|
m['w']))
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': m['link'],
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'acodec': acodec,
|
||||||
|
'abr': int(m['abr']) // 1000,
|
||||||
|
'vbr': int(m['vbr']) // 1000,
|
||||||
|
'width': int(m['w']),
|
||||||
|
'height': int(m['h']),
|
||||||
|
})
|
||||||
|
formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr']))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': display_id,
|
||||||
|
'fullid': video_id,
|
||||||
|
'title': data['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': data['channel_name'],
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'description': data.get('description'),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
30
youtube_dl/extractor/cbs.py
Normal file
30
youtube_dl/extractor/cbs.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CBSIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||||
|
u'file': u'4JUVEwq3wUT7.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Connect Chat feat. Garth Brooks',
|
||||||
|
u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||||
|
u'duration': 1495,
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
real_id = self._search_regex(
|
||||||
|
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||||
|
webpage, u'real video ID')
|
||||||
|
return self.url_result(u'theplatform:%s' % real_id)
|
267
youtube_dl/extractor/channel9.py
Normal file
267
youtube_dl/extractor/channel9.py
Normal file
@@ -0,0 +1,267 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
class Channel9IE(InfoExtractor):
|
||||||
|
'''
|
||||||
|
Common extractor for channel9.msdn.com.
|
||||||
|
|
||||||
|
The type of provided URL (video or playlist) is determined according to
|
||||||
|
meta Search.PageType from web page HTML rather than URL itself, as it is
|
||||||
|
not always possible to do.
|
||||||
|
'''
|
||||||
|
IE_DESC = u'Channel 9'
|
||||||
|
IE_NAME = u'channel9'
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
u'url': u'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||||
|
u'file': u'Events_TechEd_Australia_2013_KOS002.mp4',
|
||||||
|
u'md5': u'bbd75296ba47916b754e73c3a4bbdf10',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Developer Kick-Off Session: Stuff We Love',
|
||||||
|
u'description': u'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||||
|
u'duration': 4576,
|
||||||
|
u'thumbnail': u'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
|
||||||
|
u'session_code': u'KOS002',
|
||||||
|
u'session_day': u'Day 1',
|
||||||
|
u'session_room': u'Arena 1A',
|
||||||
|
u'session_speakers': [ u'Ed Blankenship', u'Andrew Coates', u'Brady Gaster', u'Patrick Klug', u'Mads Kristensen' ],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
|
u'file': u'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
||||||
|
u'md5': u'b43ee4529d111bc37ba7ee4f34813e68',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Self-service BI with Power BI - nuclear testing',
|
||||||
|
u'description': u'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||||
|
u'duration': 1540,
|
||||||
|
u'thumbnail': u'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
|
||||||
|
u'authors': [ u'Mike Wilmot' ],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||||
|
|
||||||
|
# Sorted by quality
|
||||||
|
_known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
|
||||||
|
|
||||||
|
def _restore_bytes(self, formatted_size):
|
||||||
|
if not formatted_size:
|
||||||
|
return 0
|
||||||
|
m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
|
||||||
|
if not m:
|
||||||
|
return 0
|
||||||
|
units = m.group('units')
|
||||||
|
try:
|
||||||
|
exponent = [u'B', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB'].index(units.upper())
|
||||||
|
except ValueError:
|
||||||
|
return 0
|
||||||
|
size = float(m.group('size'))
|
||||||
|
return int(size * (1024 ** exponent))
|
||||||
|
|
||||||
|
def _formats_from_html(self, html):
|
||||||
|
FORMAT_REGEX = r'''
|
||||||
|
(?x)
|
||||||
|
<a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
|
||||||
|
<span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
|
||||||
|
(?:<div\s+class="popup\s+rounded">\s*
|
||||||
|
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
|
||||||
|
</div>)? # File size part may be missing
|
||||||
|
'''
|
||||||
|
# Extract known formats
|
||||||
|
formats = [{'url': x.group('url'),
|
||||||
|
'format_id': x.group('quality'),
|
||||||
|
'format_note': x.group('note'),
|
||||||
|
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
||||||
|
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
||||||
|
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
|
||||||
|
# Sort according to known formats list
|
||||||
|
formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_title(self, html):
|
||||||
|
title = self._html_search_meta(u'title', html, u'title')
|
||||||
|
if title is None:
|
||||||
|
title = self._og_search_title(html)
|
||||||
|
TITLE_SUFFIX = u' (Channel 9)'
|
||||||
|
if title is not None and title.endswith(TITLE_SUFFIX):
|
||||||
|
title = title[:-len(TITLE_SUFFIX)]
|
||||||
|
return title
|
||||||
|
|
||||||
|
def _extract_description(self, html):
|
||||||
|
DESCRIPTION_REGEX = r'''(?sx)
|
||||||
|
<div\s+class="entry-content">\s*
|
||||||
|
<div\s+id="entry-body">\s*
|
||||||
|
(?P<description>.+?)\s*
|
||||||
|
</div>\s*
|
||||||
|
</div>
|
||||||
|
'''
|
||||||
|
m = re.search(DESCRIPTION_REGEX, html)
|
||||||
|
if m is not None:
|
||||||
|
return m.group('description')
|
||||||
|
return self._html_search_meta(u'description', html, u'description')
|
||||||
|
|
||||||
|
def _extract_duration(self, html):
|
||||||
|
m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
|
||||||
|
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
|
||||||
|
|
||||||
|
def _extract_slides(self, html):
|
||||||
|
m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
|
||||||
|
return m.group('slidesurl') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_zip(self, html):
|
||||||
|
m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
|
||||||
|
return m.group('zipurl') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_avg_rating(self, html):
|
||||||
|
m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
|
||||||
|
return float(m.group('avgrating')) if m is not None else 0
|
||||||
|
|
||||||
|
def _extract_rating_count(self, html):
|
||||||
|
m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
|
||||||
|
return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
|
||||||
|
|
||||||
|
def _extract_view_count(self, html):
|
||||||
|
m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
|
||||||
|
return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
|
||||||
|
|
||||||
|
def _extract_comment_count(self, html):
|
||||||
|
m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
|
||||||
|
return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
|
||||||
|
|
||||||
|
def _fix_count(self, count):
|
||||||
|
return int(str(count).replace(',', '')) if count is not None else None
|
||||||
|
|
||||||
|
def _extract_authors(self, html):
|
||||||
|
m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
|
||||||
|
if m is None:
|
||||||
|
return None
|
||||||
|
return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
|
||||||
|
|
||||||
|
def _extract_session_code(self, html):
|
||||||
|
m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
|
||||||
|
return m.group('code') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_session_day(self, html):
|
||||||
|
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
|
||||||
|
return m.group('day') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_session_room(self, html):
|
||||||
|
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
|
||||||
|
return m.group('room') if m is not None else None
|
||||||
|
|
||||||
|
def _extract_session_speakers(self, html):
|
||||||
|
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
|
||||||
|
|
||||||
|
def _extract_content(self, html, content_path):
|
||||||
|
# Look for downloadable content
|
||||||
|
formats = self._formats_from_html(html)
|
||||||
|
slides = self._extract_slides(html)
|
||||||
|
zip_ = self._extract_zip(html)
|
||||||
|
|
||||||
|
# Nothing to download
|
||||||
|
if len(formats) == 0 and slides is None and zip_ is None:
|
||||||
|
self._downloader.report_warning(u'None of recording, slides or zip are available for %s' % content_path)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extract meta
|
||||||
|
title = self._extract_title(html)
|
||||||
|
description = self._extract_description(html)
|
||||||
|
thumbnail = self._og_search_thumbnail(html)
|
||||||
|
duration = self._extract_duration(html)
|
||||||
|
avg_rating = self._extract_avg_rating(html)
|
||||||
|
rating_count = self._extract_rating_count(html)
|
||||||
|
view_count = self._extract_view_count(html)
|
||||||
|
comment_count = self._extract_comment_count(html)
|
||||||
|
|
||||||
|
common = {'_type': 'video',
|
||||||
|
'id': content_path,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'avg_rating': avg_rating,
|
||||||
|
'rating_count': rating_count,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = []
|
||||||
|
|
||||||
|
if slides is not None:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({ 'title': title + '-Slides', 'url': slides })
|
||||||
|
result.append(d)
|
||||||
|
|
||||||
|
if zip_ is not None:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({ 'title': title + '-Zip', 'url': zip_ })
|
||||||
|
result.append(d)
|
||||||
|
|
||||||
|
if len(formats) > 0:
|
||||||
|
d = common.copy()
|
||||||
|
d.update({ 'title': title, 'formats': formats })
|
||||||
|
result.append(d)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _extract_entry_item(self, html, content_path):
|
||||||
|
contents = self._extract_content(html, content_path)
|
||||||
|
if contents is None:
|
||||||
|
return contents
|
||||||
|
|
||||||
|
authors = self._extract_authors(html)
|
||||||
|
|
||||||
|
for content in contents:
|
||||||
|
content['authors'] = authors
|
||||||
|
|
||||||
|
return contents
|
||||||
|
|
||||||
|
def _extract_session(self, html, content_path):
|
||||||
|
contents = self._extract_content(html, content_path)
|
||||||
|
if contents is None:
|
||||||
|
return contents
|
||||||
|
|
||||||
|
session_meta = {'session_code': self._extract_session_code(html),
|
||||||
|
'session_day': self._extract_session_day(html),
|
||||||
|
'session_room': self._extract_session_room(html),
|
||||||
|
'session_speakers': self._extract_session_speakers(html),
|
||||||
|
}
|
||||||
|
|
||||||
|
for content in contents:
|
||||||
|
content.update(session_meta)
|
||||||
|
|
||||||
|
return contents
|
||||||
|
|
||||||
|
def _extract_list(self, content_path):
|
||||||
|
rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS')
|
||||||
|
entries = [self.url_result(session_url.text, 'Channel9')
|
||||||
|
for session_url in rss.findall('./channel/item/link')]
|
||||||
|
title_text = rss.find('./channel/title').text
|
||||||
|
return self.playlist_result(entries, content_path, title_text)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
content_path = mobj.group('contentpath')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, content_path, u'Downloading web page')
|
||||||
|
|
||||||
|
page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
|
||||||
|
if page_type_m is None:
|
||||||
|
raise ExtractorError(u'Search.PageType not found, don\'t know how to process this page', expected=True)
|
||||||
|
|
||||||
|
page_type = page_type_m.group('pagetype')
|
||||||
|
if page_type == 'List': # List page, may contain list of 'item'-like objects
|
||||||
|
return self._extract_list(content_path)
|
||||||
|
elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
|
||||||
|
return self._extract_entry_item(webpage, content_path)
|
||||||
|
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||||
|
return self._extract_session(webpage, content_path)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(u'Unexpected Search.PageType %s' % page_type, expected=True)
|
@@ -34,15 +34,39 @@ class InfoExtractor(object):
|
|||||||
The dictionaries must include the following fields:
|
The dictionaries must include the following fields:
|
||||||
|
|
||||||
id: Video identifier.
|
id: Video identifier.
|
||||||
url: Final video URL.
|
|
||||||
title: Video title, unescaped.
|
title: Video title, unescaped.
|
||||||
ext: Video filename extension.
|
|
||||||
|
|
||||||
Instead of url and ext, formats can also specified.
|
Additionally, it must contain either a formats entry or url and ext:
|
||||||
|
|
||||||
|
formats: A list of dictionaries for each format available, it must
|
||||||
|
be ordered from worst to best quality. Potential fields:
|
||||||
|
* url Mandatory. The URL of the video file
|
||||||
|
* ext Will be calculated from url if missing
|
||||||
|
* format A human-readable description of the format
|
||||||
|
("mp4 container with h264/opus").
|
||||||
|
Calculated from the format_id, width, height.
|
||||||
|
and format_note fields if missing.
|
||||||
|
* format_id A short description of the format
|
||||||
|
("mp4_h264_opus" or "19")
|
||||||
|
* format_note Additional info about the format
|
||||||
|
("3D" or "DASH video")
|
||||||
|
* width Width of the video, if known
|
||||||
|
* height Height of the video, if known
|
||||||
|
* abr Average audio bitrate in KBit/s
|
||||||
|
* acodec Name of the audio codec in use
|
||||||
|
* vbr Average video bitrate in KBit/s
|
||||||
|
* vcodec Name of the video codec in use
|
||||||
|
* filesize The number of bytes, if known in advance
|
||||||
|
* player_url SWF Player URL (used for rtmpdump).
|
||||||
|
url: Final video URL.
|
||||||
|
ext: Video filename extension.
|
||||||
|
format: The video format, defaults to ext (used for --get-format)
|
||||||
|
player_url: SWF Player URL (used for rtmpdump).
|
||||||
|
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||||
|
like returned by urllib.request.urlopen
|
||||||
|
|
||||||
The following fields are optional:
|
The following fields are optional:
|
||||||
|
|
||||||
format: The video format, defaults to ext (used for --get-format)
|
|
||||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||||
"url") for the varying thumbnails
|
"url") for the varying thumbnails
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
@@ -51,35 +75,14 @@ class InfoExtractor(object):
|
|||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
location: Physical location of the video.
|
location: Physical location of the video.
|
||||||
player_url: SWF Player URL (used for rtmpdump).
|
|
||||||
subtitles: The subtitle file contents as a dictionary in the format
|
subtitles: The subtitle file contents as a dictionary in the format
|
||||||
{language: subtitles}.
|
{language: subtitles}.
|
||||||
|
duration: Length of the video in seconds, as an integer.
|
||||||
view_count: How many users have watched the video on the platform.
|
view_count: How many users have watched the video on the platform.
|
||||||
like_count: Number of positive ratings of the video
|
like_count: Number of positive ratings of the video
|
||||||
dislike_count: Number of negative ratings of the video
|
dislike_count: Number of negative ratings of the video
|
||||||
comment_count: Number of comments on the video
|
comment_count: Number of comments on the video
|
||||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
|
||||||
like returned by urllib.request.urlopen
|
|
||||||
age_limit: Age restriction for the video, as an integer (years)
|
age_limit: Age restriction for the video, as an integer (years)
|
||||||
formats: A list of dictionaries for each format available, it must
|
|
||||||
be ordered from worst to best quality. Potential fields:
|
|
||||||
* url Mandatory. The URL of the video file
|
|
||||||
* ext Will be calculated from url if missing
|
|
||||||
* format A human-readable description of the format
|
|
||||||
("mp4 container with h264/opus").
|
|
||||||
Calculated from the format_id, width, height.
|
|
||||||
and format_note fields if missing.
|
|
||||||
* format_id A short description of the format
|
|
||||||
("mp4_h264_opus" or "19")
|
|
||||||
* format_note Additional info about the format
|
|
||||||
("3D" or "DASH video")
|
|
||||||
* width Width of the video, if known
|
|
||||||
* height Height of the video, if known
|
|
||||||
* abr Average audio bitrate in KBit/s
|
|
||||||
* acodec Name of the audio codec in use
|
|
||||||
* vbr Average video bitrate in KBit/s
|
|
||||||
* vcodec Name of the video codec in use
|
|
||||||
* filesize The number of bytes, if known in advance
|
|
||||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
|
@@ -28,7 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||||
"""Information Extractor for Dailymotion"""
|
"""Information Extractor for Dailymotion"""
|
||||||
|
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
|
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||||
IE_NAME = u'dailymotion'
|
IE_NAME = u'dailymotion'
|
||||||
|
|
||||||
_FORMATS = [
|
_FORMATS = [
|
||||||
@@ -81,7 +81,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
# Extract id and simplified title from URL
|
# Extract id and simplified title from URL
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
url = 'http://www.dailymotion.com/video/%s' % video_id
|
url = 'http://www.dailymotion.com/video/%s' % video_id
|
||||||
|
|
||||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class DaumIE(InfoExtractor):
|
class DaumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||||
IE_NAME = u'daum.net'
|
IE_NAME = u'daum.net'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
class FacebookIE(InfoExtractor):
|
class FacebookIE(InfoExtractor):
|
||||||
"""Information Extractor for Facebook"""
|
"""Information Extractor for Facebook"""
|
||||||
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
||||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
u'file': u'120708114770723.mp4',
|
u'file': u'120708114770723.mp4',
|
||||||
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"duration": 279,
|
u"duration": 279,
|
||||||
u"title": u"PEOPLE ARE AWESOME 2013"
|
u"title": u"PEOPLE ARE AWESOME 2013"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -222,6 +222,18 @@ class GenericIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Look for embedded blip.tv player
|
||||||
|
mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||||
|
if mobj:
|
||||||
|
return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
|
||||||
|
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
|
||||||
|
if mobj:
|
||||||
|
player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
|
||||||
|
player_page = self._download_webpage(player_url, mobj.group(1))
|
||||||
|
blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
|
||||||
|
if blip_video_id:
|
||||||
|
return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
|
@@ -44,7 +44,7 @@ class IGNIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
|
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion',
|
u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
|
||||||
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
|
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
78
youtube_dl/extractor/mdr.py
Normal file
78
youtube_dl/extractor/mdr.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MDRIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
|
||||||
|
u'file': u'165624.mp4',
|
||||||
|
u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
|
||||||
|
u'file': u'718370.mp3',
|
||||||
|
u'md5': u'a9d21345a234c7b45dee612f290fd8d7',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('video_id')
|
||||||
|
domain = m.group('domain')
|
||||||
|
|
||||||
|
# determine title and media streams from webpage
|
||||||
|
html = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
|
||||||
|
xmlurl = self._search_regex(
|
||||||
|
r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
|
||||||
|
|
||||||
|
doc = self._download_xml(domain + xmlurl, video_id)
|
||||||
|
formats = []
|
||||||
|
for a in doc.findall('./assets/asset'):
|
||||||
|
url_el = a.find('.//progressiveDownloadUrl')
|
||||||
|
if url_el is None:
|
||||||
|
continue
|
||||||
|
abr = int(a.find('bitrateAudio').text) // 1000
|
||||||
|
media_type = a.find('mediaType').text
|
||||||
|
format = {
|
||||||
|
'abr': abr,
|
||||||
|
'filesize': int(a.find('fileSize').text),
|
||||||
|
'url': url_el.text,
|
||||||
|
}
|
||||||
|
|
||||||
|
vbr_el = a.find('bitrateVideo')
|
||||||
|
if vbr_el is None:
|
||||||
|
format.update({
|
||||||
|
'vcodec': 'none',
|
||||||
|
'format_id': u'%s-%d' % (media_type, abr),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
vbr = int(vbr_el.text) // 1000
|
||||||
|
format.update({
|
||||||
|
'vbr': vbr,
|
||||||
|
'width': int(a.find('frameWidth').text),
|
||||||
|
'height': int(a.find('frameHeight').text),
|
||||||
|
'format_id': u'%s-%d' % (media_type, vbr),
|
||||||
|
})
|
||||||
|
formats.append(format)
|
||||||
|
formats.sort(key=lambda f: (f.get('vbr'), f['abr']))
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError(u'Could not find any valid formats')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NaverIE(InfoExtractor):
|
class NaverIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://tvcast.naver.com/v/81652',
|
u'url': u'http://tvcast.naver.com/v/81652',
|
||||||
|
@@ -1,6 +1,4 @@
|
|||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import month_by_name
|
from ..utils import month_by_name
|
||||||
|
38
youtube_dl/extractor/pornhd.py
Normal file
38
youtube_dl/extractor/pornhd.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import compat_urllib_parse
|
||||||
|
|
||||||
|
|
||||||
|
class PornHdIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||||
|
u'file': u'1962.flv',
|
||||||
|
u'md5': u'35272469887dca97abd30abecc6cdf75',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
|
||||||
|
u"age_limit": 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
video_id = mobj.group('video_id')
|
||||||
|
video_title = mobj.group('video_title')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'&hd=(http.+?)&', webpage, u'video URL')
|
||||||
|
video_url = compat_urllib_parse.unquote(video_url)
|
||||||
|
age_limit = 18
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': video_title,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@@ -12,7 +12,7 @@ from ..aes import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class PornHubIE(InfoExtractor):
|
class PornHubIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||||
u'file': u'648719015.mp4',
|
u'file': u'648719015.mp4',
|
||||||
|
@@ -3,6 +3,7 @@ import json
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -32,6 +33,17 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||||
'format=smil&mbr=true'.format(video_id))
|
'format=smil&mbr=true'.format(video_id))
|
||||||
meta = self._download_xml(smil_url, video_id)
|
meta = self._download_xml(smil_url, video_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
error_msg = next(
|
||||||
|
n.attrib['abstract']
|
||||||
|
for n in meta.findall(_x('.//smil:ref'))
|
||||||
|
if n.attrib.get('title') == u'Geographic Restriction')
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ExtractorError(error_msg, expected=True)
|
||||||
|
|
||||||
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
|
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
|
||||||
info_json = self._download_webpage(info_url, video_id)
|
info_json = self._download_webpage(info_url, video_id)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
|
@@ -15,6 +15,7 @@ class VideoPremiumIE(InfoExtractor):
|
|||||||
u'params': {
|
u'params': {
|
||||||
u'skip_download': True,
|
u'skip_download': True,
|
||||||
},
|
},
|
||||||
|
u'skip': u'Test file has been deleted.',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -1377,9 +1377,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
if 'length_seconds' not in video_info:
|
if 'length_seconds' not in video_info:
|
||||||
self._downloader.report_warning(u'unable to extract video duration')
|
self._downloader.report_warning(u'unable to extract video duration')
|
||||||
video_duration = ''
|
video_duration = None
|
||||||
else:
|
else:
|
||||||
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
|
video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
|
||||||
|
|
||||||
# annotations
|
# annotations
|
||||||
video_annotations = None
|
video_annotations = None
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
import email.utils
|
import email.utils
|
||||||
import errno
|
import errno
|
||||||
@@ -1051,7 +1052,7 @@ def month_by_name(name):
|
|||||||
""" Return the number of a month by (locale-independently) English name """
|
""" Return the number of a month by (locale-independently) English name """
|
||||||
|
|
||||||
ENGLISH_NAMES = [
|
ENGLISH_NAMES = [
|
||||||
u'Januar', u'February', u'March', u'April', u'May', u'June',
|
u'January', u'February', u'March', u'April', u'May', u'June',
|
||||||
u'July', u'August', u'September', u'October', u'November', u'December']
|
u'July', u'August', u'September', u'October', u'November', u'December']
|
||||||
try:
|
try:
|
||||||
return ENGLISH_NAMES.index(name) + 1
|
return ENGLISH_NAMES.index(name) + 1
|
||||||
@@ -1062,3 +1063,24 @@ def month_by_name(name):
|
|||||||
def fix_xml_all_ampersand(xml_str):
|
def fix_xml_all_ampersand(xml_str):
|
||||||
"""Replace all the '&' by '&' in XML"""
|
"""Replace all the '&' by '&' in XML"""
|
||||||
return xml_str.replace(u'&', u'&')
|
return xml_str.replace(u'&', u'&')
|
||||||
|
|
||||||
|
|
||||||
|
def setproctitle(title):
|
||||||
|
assert isinstance(title, type(u''))
|
||||||
|
try:
|
||||||
|
libc = ctypes.cdll.LoadLibrary("libc.so.6")
|
||||||
|
except OSError:
|
||||||
|
return
|
||||||
|
title = title
|
||||||
|
buf = ctypes.create_string_buffer(len(title) + 1)
|
||||||
|
buf.value = title.encode('utf-8')
|
||||||
|
try:
|
||||||
|
libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
|
||||||
|
except AttributeError:
|
||||||
|
return # Strange libc, just skip this
|
||||||
|
|
||||||
|
|
||||||
|
def remove_start(s, start):
|
||||||
|
if s.startswith(start):
|
||||||
|
return s[len(start):]
|
||||||
|
return s
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.12.11.1'
|
__version__ = '2013.12.16.4'
|
||||||
|
Reference in New Issue
Block a user