Compare commits
27 Commits
2014.04.01
...
2014.04.03
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a9f304031b | ||
![]() |
9271bc8355 | ||
![]() |
968ed2a777 | ||
![]() |
24de5d2556 | ||
![]() |
d26e981df4 | ||
![]() |
e45d40b171 | ||
![]() |
4a419b8851 | ||
![]() |
bec1fad223 | ||
![]() |
177fed41bc | ||
![]() |
b900e7cba4 | ||
![]() |
14cb4979f0 | ||
![]() |
69e61e30fe | ||
![]() |
cce929eaac | ||
![]() |
b6cfde99b7 | ||
![]() |
1be99f052d | ||
![]() |
2410c43d83 | ||
![]() |
aea6e7fc3c | ||
![]() |
91a76c40c0 | ||
![]() |
d2b194607c | ||
![]() |
f6177462db | ||
![]() |
9ddaf4ef8c | ||
![]() |
97b5573848 | ||
![]() |
18c95c1ab0 | ||
![]() |
0479c625a4 | ||
![]() |
f659951e22 | ||
![]() |
5853a7316e | ||
![]() |
a612753db9 |
@@ -3,5 +3,4 @@ include test/*.py
|
|||||||
include test/*.json
|
include test/*.json
|
||||||
include youtube-dl.bash-completion
|
include youtube-dl.bash-completion
|
||||||
include youtube-dl.1
|
include youtube-dl.1
|
||||||
recursive-include docs *
|
recursive-include docs Makefile conf.py *.rst
|
||||||
prune docs/_build
|
|
||||||
|
@@ -153,6 +153,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch(
|
self.assertMatch(
|
||||||
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||||
['ComedyCentralShows'])
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -42,6 +42,7 @@ from youtube_dl.extractor import (
|
|||||||
ToypicsUserIE,
|
ToypicsUserIE,
|
||||||
XTubeUserIE,
|
XTubeUserIE,
|
||||||
InstagramUserIE,
|
InstagramUserIE,
|
||||||
|
CSpanIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -314,6 +315,19 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
expect_info_dict(self, EXPECTED, test_video)
|
expect_info_dict(self, EXPECTED, test_video)
|
||||||
|
|
||||||
|
def test_CSpan_playlist(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = CSpanIE(dl)
|
||||||
|
result = ie.extract(
|
||||||
|
'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], '342759')
|
||||||
|
self.assertEqual(
|
||||||
|
result['title'], 'General Motors Ignition Switch Recall')
|
||||||
|
self.assertEqual(len(result['entries']), 9)
|
||||||
|
whole_duration = sum(e['duration'] for e in result['entries'])
|
||||||
|
self.assertEqual(whole_duration, 14855)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -702,6 +702,11 @@ class YoutubeDL(object):
|
|||||||
def process_video_result(self, info_dict, download=True):
|
def process_video_result(self, info_dict, download=True):
|
||||||
assert info_dict.get('_type', 'video') == 'video'
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
|
|
||||||
|
if 'id' not in info_dict:
|
||||||
|
raise ExtractorError('Missing "id" field in extractor result')
|
||||||
|
if 'title' not in info_dict:
|
||||||
|
raise ExtractorError('Missing "title" field in extractor result')
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
info_dict['playlist'] = None
|
info_dict['playlist'] = None
|
||||||
@@ -733,6 +738,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
# We check that all the formats have the format and format_id fields
|
# We check that all the formats have the format and format_id fields
|
||||||
for i, format in enumerate(formats):
|
for i, format in enumerate(formats):
|
||||||
|
if 'url' not in format:
|
||||||
|
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
if format.get('format') is None:
|
if format.get('format') is None:
|
||||||
@@ -743,7 +751,7 @@ class YoutubeDL(object):
|
|||||||
)
|
)
|
||||||
# Automatically determine file extension if missing
|
# Automatically determine file extension if missing
|
||||||
if 'ext' not in format:
|
if 'ext' not in format:
|
||||||
format['ext'] = determine_ext(format['url'])
|
format['ext'] = determine_ext(format['url']).lower()
|
||||||
|
|
||||||
format_limit = self.params.get('format_limit', None)
|
format_limit = self.params.get('format_limit', None)
|
||||||
if format_limit:
|
if format_limit:
|
||||||
@@ -868,7 +876,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
dn = os.path.dirname(encodeFilename(filename))
|
dn = os.path.dirname(encodeFilename(filename))
|
||||||
if dn != '' and not os.path.exists(dn):
|
if dn and not os.path.exists(dn):
|
||||||
os.makedirs(dn)
|
os.makedirs(dn)
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
self.report_error('unable to create directory ' + compat_str(err))
|
self.report_error('unable to create directory ' + compat_str(err))
|
||||||
|
@@ -297,6 +297,7 @@ class F4mFD(FileDownloader):
|
|||||||
break
|
break
|
||||||
frags_filenames.append(frag_filename)
|
frags_filenames.append(frag_filename)
|
||||||
|
|
||||||
|
dest_stream.close()
|
||||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
||||||
|
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
|
@@ -40,6 +40,7 @@ from .clipfish import ClipfishIE
|
|||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
from .cmt import CMTIE
|
from .cmt import CMTIE
|
||||||
|
from .cnet import CNETIE
|
||||||
from .cnn import (
|
from .cnn import (
|
||||||
CNNIE,
|
CNNIE,
|
||||||
CNNBlogsIE,
|
CNNBlogsIE,
|
||||||
@@ -83,6 +84,7 @@ from .fktv import (
|
|||||||
)
|
)
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
from .fourtube import FourTubeIE
|
from .fourtube import FourTubeIE
|
||||||
|
from .franceculture import FranceCultureIE
|
||||||
from .franceinter import FranceInterIE
|
from .franceinter import FranceInterIE
|
||||||
from .francetv import (
|
from .francetv import (
|
||||||
PluzzIE,
|
PluzzIE,
|
||||||
@@ -156,6 +158,7 @@ from .mtv import (
|
|||||||
MTVIE,
|
MTVIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
)
|
)
|
||||||
|
from .musicplayon import MusicPlayOnIE
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
from .myspace import MySpaceIE
|
from .myspace import MySpaceIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
|
@@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
object_str = object_str.replace('<--', '<!--')
|
object_str = object_str.replace('<--', '<!--')
|
||||||
object_str = fix_xml_ampersands(object_str)
|
object_str = fix_xml_ampersands(object_str)
|
||||||
|
|
||||||
object_doc = xml.etree.ElementTree.fromstring(object_str)
|
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||||
|
|
||||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||||
if fv_el is not None:
|
if fv_el is not None:
|
||||||
|
@@ -2,39 +2,46 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class C56IE(InfoExtractor):
|
class C56IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
|
_VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
|
||||||
IE_NAME = '56.com'
|
IE_NAME = '56.com'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||||
'file': '93440716.flv',
|
|
||||||
'md5': 'e59995ac63d0457783ea05f93f12a866',
|
'md5': 'e59995ac63d0457783ea05f93f12a866',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '93440716',
|
||||||
|
'ext': 'flv',
|
||||||
'title': '网事知多少 第32期:车怒',
|
'title': '网事知多少 第32期:车怒',
|
||||||
|
'duration': 283.813,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||||
text_id = mobj.group('textid')
|
text_id = mobj.group('textid')
|
||||||
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
|
|
||||||
text_id, 'Downloading video info')
|
page = self._download_json(
|
||||||
info = json.loads(info_page)['info']
|
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
|
||||||
formats = [{
|
|
||||||
'format_id': f['type'],
|
info = page['info']
|
||||||
'filesize': int(f['filesize']),
|
|
||||||
'url': f['url']
|
formats = [
|
||||||
} for f in info['rfiles']]
|
{
|
||||||
|
'format_id': f['type'],
|
||||||
|
'filesize': int(f['filesize']),
|
||||||
|
'url': f['url']
|
||||||
|
} for f in info['rfiles']
|
||||||
|
]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': info['vid'],
|
'id': info['vid'],
|
||||||
'title': info['Subject'],
|
'title': info['Subject'],
|
||||||
|
'duration': int(info['duration']) / 1000.0,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': info.get('bimg') or info.get('img'),
|
'thumbnail': info.get('bimg') or info.get('img'),
|
||||||
}
|
}
|
||||||
|
70
youtube_dl/extractor/cnet.py
Normal file
70
youtube_dl/extractor/cnet.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CNETIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||||
|
'md5': '041233212a0d06b179c87cbcca1577b8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||||
|
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||||
|
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
|
||||||
|
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
|
||||||
|
'uploader': 'Sarah Mitroff',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
data_json = self._html_search_regex(
|
||||||
|
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
|
||||||
|
webpage, 'data json')
|
||||||
|
data = json.loads(data_json)
|
||||||
|
vdata = data['video']
|
||||||
|
|
||||||
|
video_id = vdata['id']
|
||||||
|
title = vdata['headline']
|
||||||
|
description = vdata.get('dek')
|
||||||
|
thumbnail = vdata.get('image', {}).get('path')
|
||||||
|
author = vdata.get('author')
|
||||||
|
if author:
|
||||||
|
uploader = '%s %s' % (author['firstName'], author['lastName'])
|
||||||
|
uploader_id = author.get('email')
|
||||||
|
else:
|
||||||
|
uploader = None
|
||||||
|
uploader_id = None
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': '%s-%s-%s' % (
|
||||||
|
f['type'], f['format'],
|
||||||
|
int_or_none(f.get('bitrate'), 1000, default='')),
|
||||||
|
'url': f['uri'],
|
||||||
|
'tbr': int_or_none(f.get('bitrate'), 1000),
|
||||||
|
} for f in vdata['files']['data']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@@ -43,7 +43,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||||
(full-episodes/(?P<episode>.*)|
|
(full-episodes/(?P<episode>.*)|
|
||||||
(?P<clip>
|
(?P<clip>
|
||||||
(?:videos/[^/]+/(?P<videotitle>[^/?#]+))
|
(?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||||
)|
|
)|
|
||||||
@@ -59,7 +59,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
'upload_date': '20121213',
|
'upload_date': '20121213',
|
||||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||||
'uploader': 'thedailyshow',
|
'uploader': 'thedailyshow',
|
||||||
'title': 'thedailyshow-kristen-stewart part 1',
|
'title': 'thedailyshow kristen-stewart part 1',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -165,7 +165,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||||
duration = float_or_none(content.attrib.get('duration'))
|
duration = float_or_none(content.attrib.get('duration'))
|
||||||
mediagen_url = content.attrib['url']
|
mediagen_url = content.attrib['url']
|
||||||
guid = itemEl.find('.//guid').text.rpartition(':')[-1]
|
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
||||||
|
|
||||||
cdoc = self._download_xml(
|
cdoc = self._download_xml(
|
||||||
mediagen_url, epTitle,
|
mediagen_url, epTitle,
|
||||||
|
@@ -252,6 +252,17 @@ class InfoExtractor(object):
|
|||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
|
|
||||||
content = webpage_bytes.decode(encoding, 'replace')
|
content = webpage_bytes.decode(encoding, 'replace')
|
||||||
|
|
||||||
|
if (u'<title>Access to this site is blocked</title>' in content and
|
||||||
|
u'Websense' in content[:512]):
|
||||||
|
msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
|
||||||
|
blocked_iframe = self._html_search_regex(
|
||||||
|
r'<iframe src="([^"]+)"', content,
|
||||||
|
u'Websense information URL', default=None)
|
||||||
|
if blocked_iframe:
|
||||||
|
msg += u' Visit %s for more details' % blocked_iframe
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
return (content, urlh)
|
return (content, urlh)
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
|
@@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
)
|
)
|
||||||
@@ -54,18 +55,29 @@ class CSpanIE(InfoExtractor):
|
|||||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||||
data = self._download_json(info_url, video_id)
|
data = self._download_json(info_url, video_id)
|
||||||
|
|
||||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
doc = self._download_xml(
|
||||||
|
'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||||
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
def find_string(s):
|
title = find_xpath_attr(doc, './/string', 'name', 'title').text
|
||||||
return find_xpath_attr(doc, './/string', 'name', s).text
|
thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
|
||||||
|
|
||||||
|
files = data['video']['files']
|
||||||
|
|
||||||
|
entries = [{
|
||||||
|
'id': '%s_%d' % (video_id, partnum + 1),
|
||||||
|
'title': (
|
||||||
|
title if len(files) == 1 else
|
||||||
|
'%s part %d' % (title, partnum + 1)),
|
||||||
|
'url': unescapeHTML(f['path']['#text']),
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': int_or_none(f.get('length', {}).get('#text')),
|
||||||
|
} for partnum, f in enumerate(files)]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': title,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': find_string('title'),
|
|
||||||
'url': url,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': find_string('poster'),
|
|
||||||
}
|
}
|
||||||
|
77
youtube_dl/extractor/franceculture.py
Normal file
77
youtube_dl/extractor/franceculture.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FranceCultureIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4795174',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Rendez-vous au pays des geeks',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'uploader': 'Colette Fellous',
|
||||||
|
'upload_date': '20140301',
|
||||||
|
'duration': 3601,
|
||||||
|
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
||||||
|
'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
baseurl = mobj.group('baseurl')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
params_code = self._search_regex(
|
||||||
|
r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
|
||||||
|
webpage, 'parameter code')
|
||||||
|
params = compat_parse_qs(params_code)
|
||||||
|
video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
thumbnail_part = self._html_search_regex(
|
||||||
|
r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
|
||||||
|
'thumbnail', fatal=False)
|
||||||
|
if thumbnail_part is None:
|
||||||
|
thumbnail = None
|
||||||
|
else:
|
||||||
|
thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
|
||||||
|
|
||||||
|
info = json.loads(params['infoData'][0])[0]
|
||||||
|
duration = info.get('media_length')
|
||||||
|
upload_date_candidate = info.get('media_section5')
|
||||||
|
upload_date = (
|
||||||
|
upload_date_candidate
|
||||||
|
if (upload_date_candidate is not None and
|
||||||
|
re.match(r'[0-9]{8}$', upload_date_candidate))
|
||||||
|
else None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
}
|
@@ -82,6 +82,17 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Brightcove'],
|
'add_ie': ['Brightcove'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.championat.com/video/football/v/87/87499.html',
|
||||||
|
'md5': 'fb973ecf6e4a78a67453647444222983',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3414141473001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Видео. Удаление Дзагоева (ЦСКА)',
|
||||||
|
'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
|
||||||
|
'uploader': 'Championat',
|
||||||
|
},
|
||||||
|
},
|
||||||
# Direct link to a video
|
# Direct link to a video
|
||||||
{
|
{
|
||||||
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||||
|
75
youtube_dl/extractor/musicplayon.py
Normal file
75
youtube_dl/extractor/musicplayon.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class MusicPlayOnIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://en.musicplayon.com/play?v=433377',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '433377',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
|
||||||
|
'description': 'Rick Ross Interview On Chelsea Lately',
|
||||||
|
'duration': 342,
|
||||||
|
'uploader': 'ultrafish',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(page)
|
||||||
|
description = self._og_search_description(page)
|
||||||
|
thumbnail = self._og_search_thumbnail(page)
|
||||||
|
duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
|
||||||
|
view_count = self._og_search_property('count', page, fatal=False)
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<div>by <a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
manifest = self._download_webpage(
|
||||||
|
'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
|
||||||
|
|
||||||
|
for entry in manifest.split('#')[1:]:
|
||||||
|
if entry.startswith('EXT-X-STREAM-INF:'):
|
||||||
|
meta, url, _ = entry.split('\n')
|
||||||
|
params = dict(param.split('=') for param in meta.split(',')[1:])
|
||||||
|
formats.append({
|
||||||
|
'url': url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'tbr': int(params['BANDWIDTH']),
|
||||||
|
'width': int(params['RESOLUTION'].split('x')[1]),
|
||||||
|
'height': int(params['RESOLUTION'].split('x')[-1]),
|
||||||
|
'format_note': params['NAME'].replace('"', '').strip(),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'duration': int_or_none(duration),
|
||||||
|
'view_count': int_or_none(view_count),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -2,7 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -39,17 +38,15 @@ class RutubeIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
api_response = self._download_webpage(
|
video = self._download_json(
|
||||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
video = json.loads(api_response)
|
|
||||||
|
trackinfo = self._download_json(
|
||||||
api_response = self._download_webpage(
|
|
||||||
'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
||||||
video_id, 'Downloading trackinfo JSON')
|
video_id, 'Downloading trackinfo JSON')
|
||||||
trackinfo = json.loads(api_response)
|
|
||||||
|
|
||||||
# Some videos don't have the author field
|
# Some videos don't have the author field
|
||||||
author = trackinfo.get('author') or {}
|
author = trackinfo.get('author') or {}
|
||||||
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
||||||
@@ -82,10 +79,9 @@ class RutubeChannelIE(InfoExtractor):
|
|||||||
def _extract_videos(self, channel_id, channel_title=None):
|
def _extract_videos(self, channel_id, channel_title=None):
|
||||||
entries = []
|
entries = []
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
api_response = self._download_webpage(
|
page = self._download_json(
|
||||||
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
||||||
channel_id, 'Downloading page %s' % pagenum)
|
channel_id, 'Downloading page %s' % pagenum)
|
||||||
page = json.loads(api_response)
|
|
||||||
results = page['results']
|
results = page['results']
|
||||||
if not results:
|
if not results:
|
||||||
break
|
break
|
||||||
@@ -111,10 +107,9 @@ class RutubeMovieIE(RutubeChannelIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
movie_id = mobj.group('id')
|
movie_id = mobj.group('id')
|
||||||
api_response = self._download_webpage(
|
movie = self._download_json(
|
||||||
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
||||||
'Downloading movie JSON')
|
'Downloading movie JSON')
|
||||||
movie = json.loads(api_response)
|
|
||||||
movie_name = movie['name']
|
movie_name = movie['name']
|
||||||
return self._extract_videos(movie_id, movie_name)
|
return self._extract_videos(movie_id, movie_name)
|
||||||
|
|
||||||
|
@@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class VKIE(InfoExtractor):
|
class VKIE(InfoExtractor):
|
||||||
IE_NAME = 'vk.com'
|
IE_NAME = 'vk.com'
|
||||||
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
||||||
_NETRC_MACHINE = 'vk'
|
_NETRC_MACHINE = 'vk'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
@@ -1738,11 +1738,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
feed_entries = []
|
feed_entries = []
|
||||||
paging = 0
|
paging = 0
|
||||||
for i in itertools.count(1):
|
for i in itertools.count(1):
|
||||||
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
info = self._download_json(self._FEED_TEMPLATE % paging,
|
||||||
u'%s feed' % self._FEED_NAME,
|
u'%s feed' % self._FEED_NAME,
|
||||||
u'Downloading page %s' % i)
|
u'Downloading page %s' % i)
|
||||||
info = json.loads(info)
|
feed_html = info.get('feed_html') or info.get('content_html')
|
||||||
feed_html = info['feed_html']
|
|
||||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||||
ids = orderedSet(m.group(1) for m in m_ids)
|
ids = orderedSet(m.group(1) for m in m_ids)
|
||||||
feed_entries.extend(
|
feed_entries.extend(
|
||||||
@@ -1754,7 +1753,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||||
|
|
||||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
|
||||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||||
_FEED_NAME = 'subscriptions'
|
_FEED_NAME = 'subscriptions'
|
||||||
_PLAYLIST_TITLE = u'Youtube Subscriptions'
|
_PLAYLIST_TITLE = u'Youtube Subscriptions'
|
||||||
|
@@ -1176,12 +1176,12 @@ class HEADRequest(compat_urllib_request.Request):
|
|||||||
return "HEAD"
|
return "HEAD"
|
||||||
|
|
||||||
|
|
||||||
def int_or_none(v, scale=1):
|
def int_or_none(v, scale=1, default=None):
|
||||||
return v if v is None else (int(v) // scale)
|
return default if v is None else (int(v) // scale)
|
||||||
|
|
||||||
|
|
||||||
def float_or_none(v, scale=1):
|
def float_or_none(v, scale=1, default=None):
|
||||||
return v if v is None else (float(v) / scale)
|
return default if v is None else (float(v) / scale)
|
||||||
|
|
||||||
|
|
||||||
def parse_duration(s):
|
def parse_duration(s):
|
||||||
@@ -1264,8 +1264,8 @@ class PagedList(object):
|
|||||||
|
|
||||||
def uppercase_escape(s):
|
def uppercase_escape(s):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'\\U([0-9a-fA-F]{8})',
|
r'\\U[0-9a-fA-F]{8}',
|
||||||
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
lambda m: m.group(0).decode('unicode-escape'), s)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
struct.pack(u'!I', 0)
|
struct.pack(u'!I', 0)
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.04.01.2'
|
__version__ = '2014.04.03.3'
|
||||||
|
Reference in New Issue
Block a user