Compare commits

..

38 Commits

Author SHA1 Message Date
Philipp Hagemeister
4c89bbd22c release 2014.03.27.1 2014-03-27 02:52:06 +01:00
Philipp Hagemeister
e2b06e76c1 [urort] Add extractor (Fixes #2634) 2014-03-27 02:51:50 +01:00
Philipp Hagemeister
e9c076c317 [clipsyndicate] Modernize 2014-03-27 02:30:00 +01:00
Philipp Hagemeister
6c072e7d25 release 2014.03.27 2014-03-27 02:22:57 +01:00
Philipp Hagemeister
ac6c104871 [ted] Add support for watch/ URLs (Fixes #2637) 2014-03-27 02:22:40 +01:00
Philipp Hagemeister
69c01a9f68 [comedycentral] Add a testcase for extended-interviews URLs (#2636) 2014-03-27 02:02:48 +01:00
Philipp Hagemeister
e55213ce35 Merge remote-tracking branch 'malept/tds-extended-interviews' 2014-03-27 02:02:18 +01:00
Mark Lee
24a2aac445 [comedycentral] fix TDS extended interviews
The new website broke the URL format.
Added "playlist" as a valid ID keyword.
2014-03-26 10:51:02 -07:00
Jaime Marquínez Ferrándiz
98acdc895b Merge remote-tracking branch 'dstftw/download-referer-header' (closes #2628) 2014-03-26 15:20:11 +01:00
Jaime Marquínez Ferrándiz
bd3b5b8b10 [slashdot] Remove extractor
The generic ooyala detection works fine.
2014-03-26 15:09:14 +01:00
Jaime Marquínez Ferrándiz
9a90636805 [vice] Remove extractor
The generic ooyala detection works fine.
2014-03-26 15:03:34 +01:00
Sergey M․
6a66ae96ed [cspan] Roll back unfinished rtmp support 2014-03-26 19:51:54 +07:00
Jaime Marquínez Ferrándiz
2c8a4ba6b5 Makefile: include the docs in the tarball 2014-03-26 12:01:08 +01:00
Philipp Hagemeister
ad8915b729 Add --no-warnings option (Fixes #2630) 2014-03-26 00:43:46 +01:00
Philipp Hagemeister
34cbc7ee8d [comedycentral] Better titles 2014-03-25 23:46:51 +01:00
Sergey M․
a59e40a1ea Replace 'referer' with 'http_referer' 2014-03-25 21:53:26 +07:00
Sergey M․
ad0a75db6b [auengine] Add referer 2014-03-25 21:22:41 +07:00
Sergey M․
1d0e49e1c7 Use explicitly set Referer header for downloading 2014-03-25 21:22:27 +07:00
Sergey M․
b4461b6ebe [auengine] Modernize 2014-03-25 21:16:10 +07:00
Philipp Hagemeister
80959224fe release 2014.03.25.1 2014-03-25 14:27:40 +01:00
Philipp Hagemeister
865cbf4fc5 [comedycentral] Correct uri (Fixes #2627) 2014-03-25 14:27:23 +01:00
Philipp Hagemeister
196f061cac release 2014.03.25 2014-03-25 04:01:13 +01:00
Philipp Hagemeister
99b380c33b [comedycentral] Fix thedailyshow / thecolbertreport (Fixes #2600, #2596) 2014-03-25 04:00:57 +01:00
Philipp Hagemeister
02e4482e22 release 2014.03.24.5 2014-03-24 23:23:38 +01:00
Philipp Hagemeister
b8a792de80 Merge remote-tracking branch 'origin/master' into HEAD
Conflicts:
	youtube_dl/extractor/arte.py
2014-03-24 23:23:17 +01:00
Philipp Hagemeister
fac55558ad [washingtonpost] Add extractor (Fixes #2622) 2014-03-24 23:21:20 +01:00
Jaime Marquínez Ferrándiz
b2799ff96d [arte] Fix videos.arte.tv extraction 2014-03-24 22:38:51 +01:00
Philipp Hagemeister
7a249480b4 [arte] Fix video.arte.tv extractor 2014-03-24 22:34:03 +01:00
Philipp Hagemeister
f605128d13 [rts] Add thumbnail support 2014-03-24 22:32:04 +01:00
Philipp Hagemeister
ba40a74666 [clipfish] Modernize 2014-03-24 22:30:32 +01:00
Philipp Hagemeister
fb8ae2d438 release 2014.03.24.4 2014-03-24 22:03:51 +01:00
Philipp Hagemeister
893f8832b5 [arte] Add support for embedded videos (Fixes #2620) 2014-03-24 22:01:47 +01:00
Philipp Hagemeister
878d11ec29 [arte] Add support for multiple formats 2014-03-24 21:36:26 +01:00
Philipp Hagemeister
515bbe4b5b [arte] Remove liveweb support
liveweb.arte.tv is no longer functional, everything has moved to concert
2014-03-24 21:31:19 +01:00
Philipp Hagemeister
75f2e25ba9 [downloader/hls] Encode filename (Fixes #2609) 2014-03-24 21:23:05 +01:00
Philipp Hagemeister
0d466d34a3 release 2014.03.24.3 2014-03-24 17:12:42 +01:00
Philipp Hagemeister
6949d81095 [byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00
Philipp Hagemeister
f847ca02d3 [addanime] Modernize 2014-03-24 16:39:53 +01:00
26 changed files with 479 additions and 281 deletions

View File

@@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '__pycache' \
--exclude '.git' \
--exclude 'testdata' \
--exclude 'docs/_build' \
-- \
bin devscripts test youtube_dl \
bin devscripts test youtube_dl docs \
CHANGELOG LICENSE README.md README.txt \
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
youtube-dl

View File

@@ -169,6 +169,7 @@ which means you can modify it, redistribute it or use it however you like.
## Verbosity / Simulation Options:
-q, --quiet activates quiet mode
--no-warnings Ignore warnings
-s, --simulate do not download the video and do not write
anything to disk
--skip-download do not download the video

View File

@@ -143,5 +143,8 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
def test_ComedyCentralShows(self):
self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows'])
if __name__ == '__main__':
unittest.main()

View File

@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Various small unit tests
import io
import json
import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform
@@ -36,6 +37,7 @@ from youtube_dl.utils import (
urlencode_postdata,
xpath_with_ns,
parse_iso8601,
strip_jsonp,
)
if sys.version_info < (3, 0):
@@ -272,5 +274,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
def test_strip_jsonp(self):
stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}])
if __name__ == '__main__':
unittest.main()

View File

@@ -94,6 +94,7 @@ class YoutubeDL(object):
usenetrc: Use netrc for authentication instead.
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
no_warnings: Do not print out anything for warnings.
forceurl: Force printing final URL.
forcetitle: Force printing title.
forceid: Force printing ID.
@@ -376,6 +377,8 @@ class YoutubeDL(object):
if self.params.get('logger') is not None:
self.params['logger'].warning(message)
else:
if self.params.get('no_warnings'):
return
if self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:

View File

@@ -364,6 +364,10 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('-q', '--quiet',
action='store_true', dest='quiet', help='activates quiet mode', default=False)
verbosity.add_option(
'--no-warnings',
dest='no_warnings', action='store_true', default=False,
help='Ignore warnings')
verbosity.add_option('-s', '--simulate',
action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
verbosity.add_option('--skip-download',
@@ -708,6 +712,7 @@ def _real_main(argv=None):
'password': opts.password,
'videopassword': opts.videopassword,
'quiet': (opts.quiet or any_printing),
'no_warnings': opts.no_warnings,
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'forceid': opts.getid,

View File

@@ -13,8 +13,10 @@ class HlsFD(FileDownloader):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', tmpfilename]
args = [
'-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc',
encodeFilename(tmpfilename, for_subprocess=True)]
for program in ['avconv', 'ffmpeg']:
try:

View File

@@ -23,6 +23,8 @@ class HttpFD(FileDownloader):
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
if 'http_referer' in info_dict:
headers['Referer'] = info_dict['http_referer']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)

View File

@@ -14,6 +14,7 @@ from .arte import (
ArteTVConcertIE,
ArteTVFutureIE,
ArteTVDDCIE,
ArteTVEmbedIE,
)
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
@@ -25,6 +26,7 @@ from .bloomberg import BloombergIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .byutv import BYUtvIE
from .c56 import C56IE
from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
@@ -206,7 +208,6 @@ from .rutv import RUTVIE
from .savefrom import SaveFromIE
from .servingsys import ServingSysIE
from .sina import SinaIE
from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .smotri import (
SmotriIE,
@@ -255,13 +256,13 @@ from .udemy import (
UdemyCourseIE
)
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
from .vesti import VestiIE
from .vevo import VevoIE
from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
@@ -280,6 +281,7 @@ from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE
from .vube import VubeIE
from .washingtonpost import WashingtonPostIE
from .wat import WatIE
from .wdr import WDRIE
from .weibo import WeiboIE

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -14,14 +16,14 @@ from ..utils import (
class AddAnimeIE(InfoExtractor):
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'AddAnime'
_TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
u'file': u'24MR3YO5SAS9.mp4',
u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
u'info_dict': {
u"description": u"One Piece 606",
u"title": u"One Piece 606"
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0',
'info_dict': {
'id': '24MR3YO5SAS9',
'ext': 'mp4',
'description': 'One Piece 606',
'title': 'One Piece 606',
}
}
@@ -38,10 +40,10 @@ class AddAnimeIE(InfoExtractor):
redir_webpage = ee.cause.read().decode('utf-8')
action = self._search_regex(
r'<form id="challenge-form" action="([^"]+)"',
redir_webpage, u'Redirect form')
redir_webpage, 'Redirect form')
vc = self._search_regex(
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
redir_webpage, u'redirect vc value')
redir_webpage, 'redirect vc value')
av = re.search(
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
redir_webpage)
@@ -52,19 +54,19 @@ class AddAnimeIE(InfoExtractor):
parsed_url = compat_urllib_parse_urlparse(url)
av_val = av_res + len(parsed_url.netloc)
confirm_url = (
parsed_url.scheme + u'://' + parsed_url.netloc +
parsed_url.scheme + '://' + parsed_url.netloc +
action + '?' +
compat_urllib_parse.urlencode({
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
self._download_webpage(
confirm_url, video_id,
note=u'Confirming after redirect')
note='Confirming after redirect')
webpage = self._download_webpage(url, video_id)
formats = []
for format_id in ('normal', 'hq'):
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
video_url = self._search_regex(rex, webpage, u'video file URLx',
video_url = self._search_regex(rex, webpage, 'video file URLx',
fatal=False)
if not video_url:
continue
@@ -72,14 +74,13 @@ class AddAnimeIE(InfoExtractor):
'format_id': format_id,
'url': video_url,
})
if not formats:
raise ExtractorError(u'Cannot find any video format!')
self._sort_formats(formats)
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
return {
'_type': 'video',
'id': video_id,
'id': video_id,
'formats': formats,
'title': video_title,
'description': video_description

View File

@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
@@ -19,114 +18,41 @@ from ..utils import (
# is different for each one. The videos usually expire in 7 days, so we can't
# add tests.
class ArteTvIE(InfoExtractor):
_VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
class ArteTvIE(InfoExtractor):
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
IE_NAME = 'arte.tv'
@classmethod
def suitable(cls, url):
return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
# TODO implement Live Stream
# from ..utils import compat_urllib_parse
# def extractLiveStream(self, url):
# video_lang = url.split('/')[-4]
# info = self.grep_webpage(
# url,
# r'src="(.*?/videothek_js.*?\.js)',
# 0,
# [
# (1, 'url', 'Invalid URL: %s' % url)
# ]
# )
# http_host = url.split('/')[2]
# next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
# info = self.grep_webpage(
# next_url,
# r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
# '(http://.*?\.swf).*?' +
# '(rtmp://.*?)\'',
# re.DOTALL,
# [
# (1, 'path', 'could not extract video path: %s' % url),
# (2, 'player', 'could not extract video player: %s' % url),
# (3, 'url', 'could not extract video url: %s' % url)
# ]
# )
# video_url = '%s/%s' % (info.get('url'), info.get('path'))
def _real_extract(self, url):
mobj = re.match(self._VIDEOS_URL, url)
if mobj is not None:
id = mobj.group('id')
lang = mobj.group('lang')
return self._extract_video(url, id, lang)
mobj = re.match(self._VALID_URL, url)
lang = mobj.group('lang')
video_id = mobj.group('id')
mobj = re.match(self._LIVEWEB_URL, url)
if mobj is not None:
name = mobj.group('name')
lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, url) is not None:
raise ExtractorError('Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
# return
raise ExtractorError('No video found')
def _extract_video(self, url, video_id, lang):
"""Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml_doc = self._download_xml(
ref_xml_url, video_id, note='Downloading metadata')
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(
config = self._download_xml(
config_xml_url, video_id, note='Downloading configuration')
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
def _key(m):
quality = m.group('quality')
if quality == 'hd':
return 2
else:
return 1
# We pick the best quality
video_urls = sorted(video_urls, key=_key)
video_url = list(video_urls)[-1].group('url')
title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
config_xml, 'thumbnail')
return {'id': video_id,
'title': title,
'thumbnail': thumbnail,
'url': video_url,
'ext': 'flv',
}
formats = [{
'forma_id': q.attrib['quality'],
'url': q.text,
'ext': 'flv',
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
} for q in config.findall('./urls/url')]
self._sort_formats(formats)
def _extract_liveweb(self, url, name, lang):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, 'Downloading information')
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
url_node = event_doc.find('urlSd')
return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text,
'url': url_node.text.replace('MP4', 'mp4'),
'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
}
title = config.find('.//name').text
thumbnail = config.find('.//firstThumbnailUrl').text
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}
class ArteTVPlus7IE(InfoExtractor):
@@ -152,9 +78,7 @@ class ArteTVPlus7IE(InfoExtractor):
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
self.report_extraction(video_id)
info = json.loads(json_info)
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
info_dict = {
@@ -176,6 +100,8 @@ class ArteTVPlus7IE(InfoExtractor):
l = 'F'
elif lang == 'de':
l = 'A'
else:
l = lang
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
@@ -305,3 +231,22 @@ class ArteTVConcertIE(ArteTVPlus7IE):
'description': 'md5:486eb08f991552ade77439fe6d82c305',
},
}
class ArteTVEmbedIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:embed'
_VALID_URL = r'''(?x)
http://www\.arte\.tv
/playerv2/embed\.php\?json_url=
(?P<json_url>
http://arte\.tv/papi/tvguide/videos/stream/player/
(?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
)
'''
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
lang = mobj.group('lang')
json_url = mobj.group('json_url')
return self._extract_from_json_url(json_url, video_id, lang)

View File

@@ -11,22 +11,24 @@ from ..utils import (
class AUEngineIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
_TEST = {
'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
'file': 'lfvlytY6.mp4',
'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
'info_dict': {
'id': 'lfvlytY6',
'ext': 'mp4',
'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
}
}
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
webpage, 'title')
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
title = title.strip()
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
links = map(compat_urllib_parse.unquote, links)
@@ -39,14 +41,15 @@ class AUEngineIE(InfoExtractor):
elif '/videos/' in link:
video_url = link
if not video_url:
raise ExtractorError(u'Could not find video URL')
raise ExtractorError('Could not find video URL')
ext = '.' + determine_ext(video_url)
if ext == title[-len(ext):]:
title = title[:-len(ext)]
return {
'id': video_id,
'url': video_url,
'title': title,
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
}

View File

@@ -0,0 +1,50 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class BYUtvIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
_TEST = {
'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
'info_dict': {
'id': 'granite-flats-talking',
'ext': 'mp4',
'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f',
'title': 'Talking',
'thumbnail': 're:^https?://.*promo.*'
},
'params': {
'skip_download': True,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
episode_code = self._search_regex(
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
episode_json = re.sub(
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
ep = json.loads(episode_json)
if ep['providerType'] == 'Ooyala':
return {
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:%s' % ep['providerId'],
'id': video_id,
'title': ep['title'],
'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'),
}
else:
raise ExtractorError('Unsupported provider %s' % ep['provider'])

View File

@@ -1,22 +1,28 @@
from __future__ import unicode_literals
import re
import time
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils import (
ExtractorError,
parse_duration,
)
class ClipfishIE(InfoExtractor):
IE_NAME = u'clipfish'
IE_NAME = 'clipfish'
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
_TEST = {
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
u'file': u'3966754.mp4',
u'md5': u'2521cd644e862936cf2e698206e47385',
u'info_dict': {
u'title': u'FIFA 14 - E3 2013 Trailer',
u'duration': 82,
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
'md5': '2521cd644e862936cf2e698206e47385',
'info_dict': {
'id': '3966754',
'ext': 'mp4',
'title': 'FIFA 14 - E3 2013 Trailer',
'duration': 82,
},
u'skip': 'Blocked in the US'
}
@@ -33,21 +39,10 @@ class ClipfishIE(InfoExtractor):
video_url = doc.find('filename').text
if video_url is None:
xml_bytes = xml.etree.ElementTree.tostring(doc)
raise ExtractorError(u'Cannot find video URL in document %r' %
raise ExtractorError('Cannot find video URL in document %r' %
xml_bytes)
thumbnail = doc.find('imageurl').text
duration_str = doc.find('duration').text
m = re.match(
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
duration_str)
if m:
duration = (
(int(m.group('hours')) * 60 * 60) +
(int(m.group('minutes')) * 60) +
(int(m.group('seconds')))
)
else:
duration = None
duration = parse_duration(doc.find('duration').text)
return {
'id': video_id,

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -11,13 +13,14 @@ class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
u'info_dict': {
u'id': u'4629301',
u'ext': u'mp4',
u'title': u'Brick Briscoe',
u'duration': 612,
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
'info_dict': {
'id': '4629301',
'ext': 'mp4',
'title': 'Brick Briscoe',
'duration': 612,
'thumbnail': 're:^https?://.+\.jpg',
},
}
@@ -26,13 +29,13 @@ class ClipsyndicateIE(InfoExtractor):
video_id = mobj.group('id')
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, u'Downlaoding player')
video_id, 'Downlaoding player')
# it includes a required token
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
pdoc = self._download_xml(
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
video_id, u'Downloading video info',
video_id, 'Downloading video info',
transform_source=fix_xml_ampersands)
track_doc = pdoc.find('trackList/track')

View File

@@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor
from ..utils import (
compat_str,
compat_urllib_parse,
ExtractorError,
int_or_none,
unified_strdate,
)
@@ -32,31 +32,32 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
class ComedyCentralShowsIE(InfoExtractor):
IE_DESC = 'The Daily Show / Colbert Report'
IE_DESC = 'The Daily Show / The Colbert Report'
# urls can be abbreviations like :thedailyshow or :colbert
# urls for episodes like:
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
_VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|(https?://)?(www\.)?
(?P<showname>thedailyshow|colbertnation)\.com/
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
(full-episodes/(?P<episode>.*)|
(?P<clip>
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
(?P<interview>
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
$"""
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
$'''
_TEST = {
'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
'file': '422212.mp4',
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
'info_dict': {
"upload_date": "20121214",
"description": "Kristen Stewart",
"uploader": "thedailyshow",
"title": "thedailyshow-kristen-stewart part 1"
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
'ext': 'mp4',
'upload_date': '20121213',
'description': 'Kristen Stewart learns to let loose in "On the Road."',
'uploader': 'thedailyshow',
'title': 'thedailyshow-kristen-stewart part 1',
}
}
@@ -79,11 +80,6 @@ class ComedyCentralShowsIE(InfoExtractor):
'400': (384, 216),
}
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
@staticmethod
def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
@@ -99,9 +95,9 @@ class ComedyCentralShowsIE(InfoExtractor):
if mobj.group('shortname'):
if mobj.group('shortname') in ('tds', 'thedailyshow'):
url = 'http://www.thedailyshow.com/full-episodes/'
url = 'http://thedailyshow.cc.com/full-episodes/'
else:
url = 'http://www.colbertnation.com/full-episodes/'
url = 'http://thecolbertreport.cc.com/full-episodes/'
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
assert mobj is not None
@@ -120,9 +116,9 @@ class ComedyCentralShowsIE(InfoExtractor):
epTitle = mobj.group('showname')
else:
epTitle = mobj.group('episode')
show_name = mobj.group('showname')
self.report_extraction(epTitle)
webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
if dlNewest:
url = htmlHandle.geturl()
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -130,71 +126,86 @@ class ComedyCentralShowsIE(InfoExtractor):
raise ExtractorError('Invalid redirected URL: ' + url)
if mobj.group('episode') == '':
raise ExtractorError('Redirected URL is still not specific: ' + url)
epTitle = mobj.group('episode')
epTitle = mobj.group('episode').rpartition('/')[-1]
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
if len(mMovieParams) == 0:
# The Colbert Report embeds the information in a without
# a URL prefix; so extract the alternate reference
# and then add the URL prefix manually.
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
if len(altMovieParams) == 0:
raise ExtractorError('unable to find Flash URL in webpage ' + url)
else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
idoc = self._download_xml(indexUrl, epTitle,
'Downloading show index',
'unable to download episode index')
# Correct cc.com in uri
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
results = []
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
idoc = self._download_xml(
index_url, epTitle,
'Downloading show index', 'Unable to download episode index')
itemEls = idoc.findall('.//item')
for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text
shortMediaId = mediaId.split(':')[-1]
showId = mediaId.split(':')[-2].replace('.com', '')
officialTitle = itemEl.findall('./title')[0].text
officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
title = idoc.find('./channel/title').text
description = idoc.find('./channel/description').text
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId}))
cdoc = self._download_xml(configUrl, epTitle,
'Downloading configuration for %s' % shortMediaId)
entries = []
item_els = idoc.findall('.//item')
for part_num, itemEl in enumerate(item_els):
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
duration = int_or_none(content.attrib.get('duration'))
mediagen_url = content.attrib['url']
guid = itemEl.find('.//guid').text.rpartition(':')[-1]
cdoc = self._download_xml(
mediagen_url, epTitle,
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
turls = []
for rendition in cdoc.findall('.//rendition'):
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
turls.append(finfo)
if len(turls) == 0:
self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
continue
formats = []
for format, rtmp_video_url in turls:
w, h = self._video_dimensions.get(format, (None, None))
formats.append({
'format_id': 'vhttp-%s' % format,
'url': self._transform_rtmp_url(rtmp_video_url),
'ext': self._video_extensions.get(format, 'mp4'),
'format_id': format,
'height': h,
'width': w,
})
formats.append({
'format_id': 'rtmp-%s' % format,
'url': rtmp_video_url,
'ext': self._video_extensions.get(format, 'mp4'),
'height': h,
'width': w,
})
self._sort_formats(formats)
effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
results.append({
'id': shortMediaId,
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
entries.append({
'id': guid,
'title': virtual_id,
'formats': formats,
'uploader': showId,
'upload_date': officialDate,
'title': effTitle,
'thumbnail': None,
'description': compat_str(officialTitle),
'uploader': show_name,
'upload_date': upload_date,
'duration': duration,
'thumbnail': thumbnail,
'description': description,
})
return results
return {
'_type': 'playlist',
'entries': entries,
'title': show_name + ' ' + title,
'description': description,
}

View File

@@ -197,6 +197,21 @@ class GenericIE(InfoExtractor):
'description': 'No description',
},
},
# arte embed
{
'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
'md5': '7653032cbb25bf6c80d80f217055fa43',
'info_dict': {
'id': '048195-004_PLUS7-F',
'ext': 'flv',
'title': 'X:enius',
'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
'upload_date': '20140320',
},
'params': {
'skip_download': 'Requires rtmpdump'
}
},
]
def report_download_webpage(self, video_id):
@@ -525,6 +540,13 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
# Look for embedded arte.tv player
mobj = re.search(
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:

View File

@@ -7,7 +7,7 @@ from ..utils import unescapeHTML
class OoyalaIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
_TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video

View File

@@ -28,6 +28,7 @@ class RTSIE(InfoExtractor):
'uploader': 'Divers',
'upload_date': '19680921',
'timestamp': -40280400,
'thumbnail': 're:^https?://.*\.image'
},
}
@@ -58,4 +59,5 @@ class RTSIE(InfoExtractor):
'duration': duration,
'uploader': info.get('programName'),
'timestamp': upload_timestamp,
'thumbnail': thumbnail,
}

View File

@@ -1,24 +0,0 @@
import re
from .common import InfoExtractor
class SlashdotIE(InfoExtractor):
_VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = {
u'add_ie': ['Ooyala'],
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
u'info_dict': {
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
return self.url_result(ooyala_url, 'Ooyala')

View File

@@ -18,12 +18,14 @@ class TEDIE(SubtitlesInfoExtractor):
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
((?P<type_talk>talks)) # We have a simple talk
|
(?P<type_watch>watch)/[^/]+/[^/]+
)
(/lang/(.*?))? # The url may contain the language
/(?P<name>\w+) # Here goes the name and then ".html"
/(?P<name>[\w-]+) # Here goes the name and then ".html"
.*)$
'''
_TEST = {
_TESTS = [{
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
'info_dict': {
@@ -36,7 +38,17 @@ class TEDIE(SubtitlesInfoExtractor):
'actively fooling us.'),
'uploader': 'Dan Dennett',
}
}
}, {
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
'md5': '226f4fb9c62380d11b7995efa4c87994',
'info_dict': {
'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
'ext': 'mp4',
'title': 'Vishal Sikka: The beauty and power of algorithms',
'thumbnail': 're:^https?://.+\.jpg',
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
}
}]
_FORMATS_PREFERENCE = {
'low': 1,
@@ -57,6 +69,8 @@ class TEDIE(SubtitlesInfoExtractor):
name = m.group('name')
if m.group('type_talk'):
return self._talk_info(url, name)
elif m.group('type_watch'):
return self._watch_info(url, name)
else:
return self._playlist_videos_info(url, name)
@@ -123,3 +137,26 @@ class TEDIE(SubtitlesInfoExtractor):
else:
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}
def _watch_info(self, url, name):
webpage = self._download_webpage(url, name)
config_json = self._html_search_regex(
r"data-config='([^']+)", webpage, 'config')
config = json.loads(config_json)
video_url = config['video']['url']
thumbnail = config.get('image', {}).get('url')
title = self._html_search_regex(
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
description = self._html_search_regex(
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
webpage, 'description', fatal=False)
return {
'id': name,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'description': description,
}

View File

@@ -0,0 +1,57 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
)
class UrortIE(InfoExtractor):
IE_DESC = 'NRK P3 Urørt'
_VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
_TEST = {
'url': 'https://urort.p3.no/#!/Band/Gerilja',
'md5': '5ed31a924be8a05e47812678a86e127b',
'info_dict': {
'id': '33124-4',
'ext': 'mp3',
'title': 'The Bomb',
'thumbnail': 're:^https?://.+\.jpg',
'like_count': int,
'uploader': 'Gerilja',
'uploader_id': 'Gerilja',
},
'params': {
'matchtitle': '^The Bomb$', # To test, we want just one video
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
songs = self._download_json(json_url, playlist_id)
entries = [{
'id': '%d-%s' % (s['BandId'], s['$id']),
'title': s['Title'],
'url': s['TrackUrl'],
'ext': 'mp3',
'uploader_id': playlist_id,
'uploader': s.get('BandName', playlist_id),
'like_count': s.get('LikeCount'),
'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
} for s in songs]
return {
'_type': 'playlist',
'id': playlist_id,
'title': playlist_id,
'entries': entries,
}

View File

@@ -1,38 +0,0 @@
import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..utils import ExtractorError
class ViceIE(InfoExtractor):
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
_TEST = {
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
u'info_dict': {
u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
},
u'params': {
# Requires ffmpeg (m3u8 manifest)
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
try:
ooyala_url = self._og_search_video_url(webpage)
except ExtractorError:
try:
embed_code = self._search_regex(
r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
u'ooyala embed code')
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
except ExtractorError:
raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
return self.url_result(ooyala_url, ie='Ooyala')

View File

@@ -0,0 +1,103 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
strip_jsonp,
)
class WashingtonPostIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
_TEST = {
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
'playlist': [{
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
'info_dict': {
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
'title': 'Breaking Points: The Paper Mine',
'duration': 1287,
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
'uploader': 'The Washington Post',
'timestamp': 1395527908,
'upload_date': '20140322',
},
}, {
'md5': 'f645a07652c2950cd9134bb852c5f5eb',
'info_dict': {
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
'title': 'The town bureaucracy sustains',
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
'duration': 2217,
'timestamp': 1395528005,
'upload_date': '20140322',
'uploader': 'The Washington Post',
},
}]
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_id = mobj.group('id')
webpage = self._download_webpage(url, page_id)
title = self._og_search_title(webpage)
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
entries = []
for i, uuid in enumerate(uuids, start=1):
vinfo_all = self._download_json(
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
page_id,
transform_source=strip_jsonp,
note='Downloading information of video %d/%d' % (i, len(uuids))
)
vinfo = vinfo_all[0]['contentConfig']
uploader = vinfo.get('credits', {}).get('source')
timestamp = int_or_none(
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
formats = [{
'format_id': (
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
if s.get('width')
else s.get('type')),
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
'width': s.get('width'),
'height': s.get('height'),
'acodec': s.get('audioCodec'),
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
'filesize': s.get('fileSize'),
'url': s.get('url'),
'ext': 'mp4',
'protocol': {
'MP4': 'http',
'F4F': 'f4m',
}.get(s.get('type'))
} for s in vinfo.get('streams', [])]
source_media_url = vinfo.get('sourceMediaURL')
if source_media_url:
formats.append({
'format_id': 'source_media',
'url': source_media_url,
})
self._sort_formats(formats)
entries.append({
'id': uuid,
'title': vinfo['title'],
'description': vinfo.get('blurb'),
'uploader': uploader,
'formats': formats,
'duration': int_or_none(vinfo.get('videoDuration'), 100),
'timestamp': timestamp,
})
return {
'_type': 'playlist',
'entries': entries,
'id': page_id,
'title': title,
}

View File

@@ -1186,7 +1186,7 @@ def parse_duration(s):
return None
m = re.match(
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
if not m:
return None
res = int(m.group('secs'))
@@ -1328,3 +1328,7 @@ US_RATINGS = {
'R': 16,
'NC': 18,
}
def strip_jsonp(code):
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.03.24.2'
__version__ = '2014.03.27.1'