Compare commits
38 Commits
2014.03.24
...
2014.03.27
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4c89bbd22c | ||
![]() |
e2b06e76c1 | ||
![]() |
e9c076c317 | ||
![]() |
6c072e7d25 | ||
![]() |
ac6c104871 | ||
![]() |
69c01a9f68 | ||
![]() |
e55213ce35 | ||
![]() |
24a2aac445 | ||
![]() |
98acdc895b | ||
![]() |
bd3b5b8b10 | ||
![]() |
9a90636805 | ||
![]() |
6a66ae96ed | ||
![]() |
2c8a4ba6b5 | ||
![]() |
ad8915b729 | ||
![]() |
34cbc7ee8d | ||
![]() |
a59e40a1ea | ||
![]() |
ad0a75db6b | ||
![]() |
1d0e49e1c7 | ||
![]() |
b4461b6ebe | ||
![]() |
80959224fe | ||
![]() |
865cbf4fc5 | ||
![]() |
196f061cac | ||
![]() |
99b380c33b | ||
![]() |
02e4482e22 | ||
![]() |
b8a792de80 | ||
![]() |
fac55558ad | ||
![]() |
b2799ff96d | ||
![]() |
7a249480b4 | ||
![]() |
f605128d13 | ||
![]() |
ba40a74666 | ||
![]() |
fb8ae2d438 | ||
![]() |
893f8832b5 | ||
![]() |
878d11ec29 | ||
![]() |
515bbe4b5b | ||
![]() |
75f2e25ba9 | ||
![]() |
0d466d34a3 | ||
![]() |
6949d81095 | ||
![]() |
f847ca02d3 |
3
Makefile
3
Makefile
@@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude '__pycache' \
|
||||
--exclude '.git' \
|
||||
--exclude 'testdata' \
|
||||
--exclude 'docs/_build' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl \
|
||||
bin devscripts test youtube_dl docs \
|
||||
CHANGELOG LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
||||
youtube-dl
|
||||
|
@@ -169,6 +169,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate do not download the video and do not write
|
||||
anything to disk
|
||||
--skip-download do not download the video
|
||||
|
@@ -143,5 +143,8 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
||||
|
||||
def test_ComedyCentralShows(self):
|
||||
self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Various small unit tests
|
||||
import io
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
#from youtube_dl.utils import htmlentity_transform
|
||||
@@ -36,6 +37,7 @@ from youtube_dl.utils import (
|
||||
urlencode_postdata,
|
||||
xpath_with_ns,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
@@ -272,5 +274,11 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
||||
|
||||
def test_strip_jsonp(self):
|
||||
stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -94,6 +94,7 @@ class YoutubeDL(object):
|
||||
usenetrc: Use netrc for authentication instead.
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
no_warnings: Do not print out anything for warnings.
|
||||
forceurl: Force printing final URL.
|
||||
forcetitle: Force printing title.
|
||||
forceid: Force printing ID.
|
||||
@@ -376,6 +377,8 @@ class YoutubeDL(object):
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(message)
|
||||
else:
|
||||
if self.params.get('no_warnings'):
|
||||
return
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
|
@@ -364,6 +364,10 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
verbosity.add_option('-q', '--quiet',
|
||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||
verbosity.add_option(
|
||||
'--no-warnings',
|
||||
dest='no_warnings', action='store_true', default=False,
|
||||
help='Ignore warnings')
|
||||
verbosity.add_option('-s', '--simulate',
|
||||
action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
|
||||
verbosity.add_option('--skip-download',
|
||||
@@ -708,6 +712,7 @@ def _real_main(argv=None):
|
||||
'password': opts.password,
|
||||
'videopassword': opts.videopassword,
|
||||
'quiet': (opts.quiet or any_printing),
|
||||
'no_warnings': opts.no_warnings,
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
'forceid': opts.getid,
|
||||
|
@@ -13,8 +13,10 @@ class HlsFD(FileDownloader):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||
args = [
|
||||
'-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc',
|
||||
encodeFilename(tmpfilename, for_subprocess=True)]
|
||||
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
try:
|
||||
|
@@ -23,6 +23,8 @@ class HttpFD(FileDownloader):
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
if 'user_agent' in info_dict:
|
||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||
if 'http_referer' in info_dict:
|
||||
headers['Referer'] = info_dict['http_referer']
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
|
||||
|
@@ -14,6 +14,7 @@ from .arte import (
|
||||
ArteTVConcertIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVDDCIE,
|
||||
ArteTVEmbedIE,
|
||||
)
|
||||
from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@@ -25,6 +26,7 @@ from .bloomberg import BloombergIE
|
||||
from .br import BRIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .canal13cl import Canal13clIE
|
||||
from .canalplus import CanalplusIE
|
||||
@@ -206,7 +208,6 @@ from .rutv import RUTVIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
@@ -255,13 +256,13 @@ from .udemy import (
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
@@ -280,6 +281,7 @@ from .vine import VineIE
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vube import VubeIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wdr import WDRIE
|
||||
from .weibo import WeiboIE
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -14,14 +16,14 @@ from ..utils import (
|
||||
class AddAnimeIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||
IE_NAME = u'AddAnime'
|
||||
_TEST = {
|
||||
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
u'file': u'24MR3YO5SAS9.mp4',
|
||||
u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
|
||||
u'info_dict': {
|
||||
u"description": u"One Piece 606",
|
||||
u"title": u"One Piece 606"
|
||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
||||
'info_dict': {
|
||||
'id': '24MR3YO5SAS9',
|
||||
'ext': 'mp4',
|
||||
'description': 'One Piece 606',
|
||||
'title': 'One Piece 606',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,10 +40,10 @@ class AddAnimeIE(InfoExtractor):
|
||||
redir_webpage = ee.cause.read().decode('utf-8')
|
||||
action = self._search_regex(
|
||||
r'<form id="challenge-form" action="([^"]+)"',
|
||||
redir_webpage, u'Redirect form')
|
||||
redir_webpage, 'Redirect form')
|
||||
vc = self._search_regex(
|
||||
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
|
||||
redir_webpage, u'redirect vc value')
|
||||
redir_webpage, 'redirect vc value')
|
||||
av = re.search(
|
||||
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
|
||||
redir_webpage)
|
||||
@@ -52,19 +54,19 @@ class AddAnimeIE(InfoExtractor):
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
av_val = av_res + len(parsed_url.netloc)
|
||||
confirm_url = (
|
||||
parsed_url.scheme + u'://' + parsed_url.netloc +
|
||||
parsed_url.scheme + '://' + parsed_url.netloc +
|
||||
action + '?' +
|
||||
compat_urllib_parse.urlencode({
|
||||
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
|
||||
self._download_webpage(
|
||||
confirm_url, video_id,
|
||||
note=u'Confirming after redirect')
|
||||
note='Confirming after redirect')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for format_id in ('normal', 'hq'):
|
||||
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
||||
video_url = self._search_regex(rex, webpage, u'video file URLx',
|
||||
video_url = self._search_regex(rex, webpage, 'video file URLx',
|
||||
fatal=False)
|
||||
if not video_url:
|
||||
continue
|
||||
@@ -72,14 +74,13 @@ class AddAnimeIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError(u'Cannot find any video format!')
|
||||
self._sort_formats(formats)
|
||||
video_title = self._og_search_title(webpage)
|
||||
video_description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'description': video_description
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -19,114 +18,41 @@ from ..utils import (
|
||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||
IE_NAME = 'arte.tv'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
|
||||
|
||||
# TODO implement Live Stream
|
||||
# from ..utils import compat_urllib_parse
|
||||
# def extractLiveStream(self, url):
|
||||
# video_lang = url.split('/')[-4]
|
||||
# info = self.grep_webpage(
|
||||
# url,
|
||||
# r'src="(.*?/videothek_js.*?\.js)',
|
||||
# 0,
|
||||
# [
|
||||
# (1, 'url', 'Invalid URL: %s' % url)
|
||||
# ]
|
||||
# )
|
||||
# http_host = url.split('/')[2]
|
||||
# next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
|
||||
# info = self.grep_webpage(
|
||||
# next_url,
|
||||
# r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
|
||||
# '(http://.*?\.swf).*?' +
|
||||
# '(rtmp://.*?)\'',
|
||||
# re.DOTALL,
|
||||
# [
|
||||
# (1, 'path', 'could not extract video path: %s' % url),
|
||||
# (2, 'player', 'could not extract video player: %s' % url),
|
||||
# (3, 'url', 'could not extract video url: %s' % url)
|
||||
# ]
|
||||
# )
|
||||
# video_url = '%s/%s' % (info.get('url'), info.get('path'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VIDEOS_URL, url)
|
||||
if mobj is not None:
|
||||
id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_video(url, id, lang)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
lang = mobj.group('lang')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
mobj = re.match(self._LIVEWEB_URL, url)
|
||||
if mobj is not None:
|
||||
name = mobj.group('name')
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, url) is not None:
|
||||
raise ExtractorError('Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
# return
|
||||
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
def _extract_video(self, url, video_id, lang):
|
||||
"""Extract from videos.arte.tv"""
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml_doc = self._download_xml(
|
||||
ref_xml_url, video_id, note='Downloading metadata')
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config_xml = self._download_webpage(
|
||||
config = self._download_xml(
|
||||
config_xml_url, video_id, note='Downloading configuration')
|
||||
|
||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||
def _key(m):
|
||||
quality = m.group('quality')
|
||||
if quality == 'hd':
|
||||
return 2
|
||||
else:
|
||||
return 1
|
||||
# We pick the best quality
|
||||
video_urls = sorted(video_urls, key=_key)
|
||||
video_url = list(video_urls)[-1].group('url')
|
||||
|
||||
title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
|
||||
thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
|
||||
config_xml, 'thumbnail')
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
||||
formats = [{
|
||||
'forma_id': q.attrib['quality'],
|
||||
'url': q.text,
|
||||
'ext': 'flv',
|
||||
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
|
||||
} for q in config.findall('./urls/url')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
def _extract_liveweb(self, url, name, lang):
|
||||
"""Extract form http://liveweb.arte.tv/"""
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
|
||||
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||
video_id, 'Downloading information')
|
||||
event_doc = config_doc.find('event')
|
||||
url_node = event_doc.find('video').find('urlHd')
|
||||
if url_node is None:
|
||||
url_node = event_doc.find('urlSd')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
||||
'url': url_node.text.replace('MP4', 'mp4'),
|
||||
'ext': 'flv',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
title = config.find('.//name').text
|
||||
thumbnail = config.find('.//firstThumbnailUrl').text
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
@@ -152,9 +78,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
||||
self.report_extraction(video_id)
|
||||
info = json.loads(json_info)
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
info_dict = {
|
||||
@@ -176,6 +100,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
else:
|
||||
l = lang
|
||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
@@ -305,3 +231,22 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
http://www\.arte\.tv
|
||||
/playerv2/embed\.php\?json_url=
|
||||
(?P<json_url>
|
||||
http://arte\.tv/papi/tvguide/videos/stream/player/
|
||||
(?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
|
||||
)
|
||||
'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
json_url = mobj.group('json_url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
@@ -11,22 +11,24 @@ from ..utils import (
|
||||
|
||||
|
||||
class AUEngineIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
||||
'file': 'lfvlytY6.mp4',
|
||||
'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
|
||||
'info_dict': {
|
||||
'id': 'lfvlytY6',
|
||||
'ext': 'mp4',
|
||||
'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
||||
webpage, 'title')
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
|
||||
title = title.strip()
|
||||
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
||||
links = map(compat_urllib_parse.unquote, links)
|
||||
@@ -39,14 +41,15 @@ class AUEngineIE(InfoExtractor):
|
||||
elif '/videos/' in link:
|
||||
video_url = link
|
||||
if not video_url:
|
||||
raise ExtractorError(u'Could not find video URL')
|
||||
raise ExtractorError('Could not find video URL')
|
||||
ext = '.' + determine_ext(video_url)
|
||||
if ext == title[-len(ext):]:
|
||||
title = title[:-len(ext)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
|
||||
}
|
||||
|
50
youtube_dl/extractor/byutv.py
Normal file
50
youtube_dl/extractor/byutv.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
|
||||
'info_dict': {
|
||||
'id': 'granite-flats-talking',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f',
|
||||
'title': 'Talking',
|
||||
'thumbnail': 're:^https?://.*promo.*'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
episode_code = self._search_regex(
|
||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||
episode_json = re.sub(
|
||||
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
|
||||
ep = json.loads(episode_json)
|
||||
|
||||
if ep['providerType'] == 'Ooyala':
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'title': ep['title'],
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('Unsupported provider %s' % ep['provider'])
|
@@ -1,22 +1,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class ClipfishIE(InfoExtractor):
|
||||
IE_NAME = u'clipfish'
|
||||
IE_NAME = 'clipfish'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||
_TEST = {
|
||||
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
u'file': u'3966754.mp4',
|
||||
u'md5': u'2521cd644e862936cf2e698206e47385',
|
||||
u'info_dict': {
|
||||
u'title': u'FIFA 14 - E3 2013 Trailer',
|
||||
u'duration': 82,
|
||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
'md5': '2521cd644e862936cf2e698206e47385',
|
||||
'info_dict': {
|
||||
'id': '3966754',
|
||||
'ext': 'mp4',
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'duration': 82,
|
||||
},
|
||||
u'skip': 'Blocked in the US'
|
||||
}
|
||||
@@ -33,21 +39,10 @@ class ClipfishIE(InfoExtractor):
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||
raise ExtractorError(u'Cannot find video URL in document %r' %
|
||||
raise ExtractorError('Cannot find video URL in document %r' %
|
||||
xml_bytes)
|
||||
thumbnail = doc.find('imageurl').text
|
||||
duration_str = doc.find('duration').text
|
||||
m = re.match(
|
||||
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
||||
duration_str)
|
||||
if m:
|
||||
duration = (
|
||||
(int(m.group('hours')) * 60 * 60) +
|
||||
(int(m.group('minutes')) * 60) +
|
||||
(int(m.group('seconds')))
|
||||
)
|
||||
else:
|
||||
duration = None
|
||||
duration = parse_duration(doc.find('duration').text)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,13 +13,14 @@ class ClipsyndicateIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
|
||||
u'info_dict': {
|
||||
u'id': u'4629301',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Brick Briscoe',
|
||||
u'duration': 612,
|
||||
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
|
||||
'info_dict': {
|
||||
'id': '4629301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brick Briscoe',
|
||||
'duration': 612,
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -26,13 +29,13 @@ class ClipsyndicateIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
js_player = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||
video_id, u'Downlaoding player')
|
||||
video_id, 'Downlaoding player')
|
||||
# it includes a required token
|
||||
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
|
||||
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
|
||||
|
||||
pdoc = self._download_xml(
|
||||
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||
video_id, u'Downloading video info',
|
||||
video_id, 'Downloading video info',
|
||||
transform_source=fix_xml_ampersands)
|
||||
|
||||
track_doc = pdoc.find('trackList/track')
|
||||
|
@@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -32,31 +32,32 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(InfoExtractor):
|
||||
IE_DESC = 'The Daily Show / Colbert Report'
|
||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow or :colbert
|
||||
# urls for episodes like:
|
||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
||||
|(https?://)?(www\.)?
|
||||
(?P<showname>thedailyshow|colbertnation)\.com/
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||
(full-episodes/(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
|
||||
$"""
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||
$'''
|
||||
_TEST = {
|
||||
'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'file': '422212.mp4',
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
'info_dict': {
|
||||
"upload_date": "20121214",
|
||||
"description": "Kristen Stewart",
|
||||
"uploader": "thedailyshow",
|
||||
"title": "thedailyshow-kristen-stewart part 1"
|
||||
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121213',
|
||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow-kristen-stewart part 1',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,11 +80,6 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
'400': (384, 216),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
@staticmethod
|
||||
def _transform_rtmp_url(rtmp_video_url):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
|
||||
@@ -99,9 +95,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
|
||||
if mobj.group('shortname'):
|
||||
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
||||
url = 'http://www.thedailyshow.com/full-episodes/'
|
||||
url = 'http://thedailyshow.cc.com/full-episodes/'
|
||||
else:
|
||||
url = 'http://www.colbertnation.com/full-episodes/'
|
||||
url = 'http://thecolbertreport.cc.com/full-episodes/'
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
assert mobj is not None
|
||||
|
||||
@@ -120,9 +116,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
epTitle = mobj.group('showname')
|
||||
else:
|
||||
epTitle = mobj.group('episode')
|
||||
show_name = mobj.group('showname')
|
||||
|
||||
self.report_extraction(epTitle)
|
||||
webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
if dlNewest:
|
||||
url = htmlHandle.geturl()
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
@@ -130,71 +126,86 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
||||
if mobj.group('episode') == '':
|
||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
||||
epTitle = mobj.group('episode')
|
||||
epTitle = mobj.group('episode').rpartition('/')[-1]
|
||||
|
||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||
|
||||
if len(mMovieParams) == 0:
|
||||
# The Colbert Report embeds the information in a without
|
||||
# a URL prefix; so extract the alternate reference
|
||||
# and then add the URL prefix manually.
|
||||
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
||||
if len(altMovieParams) == 0:
|
||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
||||
else:
|
||||
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||
idoc = self._download_xml(indexUrl, epTitle,
|
||||
'Downloading show index',
|
||||
'unable to download episode index')
|
||||
# Correct cc.com in uri
|
||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
|
||||
|
||||
results = []
|
||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
|
||||
idoc = self._download_xml(
|
||||
index_url, epTitle,
|
||||
'Downloading show index', 'Unable to download episode index')
|
||||
|
||||
itemEls = idoc.findall('.//item')
|
||||
for partNum,itemEl in enumerate(itemEls):
|
||||
mediaId = itemEl.findall('./guid')[0].text
|
||||
shortMediaId = mediaId.split(':')[-1]
|
||||
showId = mediaId.split(':')[-2].replace('.com', '')
|
||||
officialTitle = itemEl.findall('./title')[0].text
|
||||
officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
title = idoc.find('./channel/title').text
|
||||
description = idoc.find('./channel/description').text
|
||||
|
||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||
cdoc = self._download_xml(configUrl, epTitle,
|
||||
'Downloading configuration for %s' % shortMediaId)
|
||||
entries = []
|
||||
item_els = idoc.findall('.//item')
|
||||
for part_num, itemEl in enumerate(item_els):
|
||||
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
||||
|
||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
duration = int_or_none(content.attrib.get('duration'))
|
||||
mediagen_url = content.attrib['url']
|
||||
guid = itemEl.find('.//guid').text.rpartition(':')[-1]
|
||||
|
||||
cdoc = self._download_xml(
|
||||
mediagen_url, epTitle,
|
||||
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
||||
|
||||
turls = []
|
||||
for rendition in cdoc.findall('.//rendition'):
|
||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||
turls.append(finfo)
|
||||
|
||||
if len(turls) == 0:
|
||||
self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
|
||||
continue
|
||||
|
||||
formats = []
|
||||
for format, rtmp_video_url in turls:
|
||||
w, h = self._video_dimensions.get(format, (None, None))
|
||||
formats.append({
|
||||
'format_id': 'vhttp-%s' % format,
|
||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'format_id': format,
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
'url': rtmp_video_url,
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
|
||||
results.append({
|
||||
'id': shortMediaId,
|
||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||
entries.append({
|
||||
'id': guid,
|
||||
'title': virtual_id,
|
||||
'formats': formats,
|
||||
'uploader': showId,
|
||||
'upload_date': officialDate,
|
||||
'title': effTitle,
|
||||
'thumbnail': None,
|
||||
'description': compat_str(officialTitle),
|
||||
'uploader': show_name,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
})
|
||||
|
||||
return results
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': show_name + ' ' + title,
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -197,6 +197,21 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'No description',
|
||||
},
|
||||
},
|
||||
# arte embed
|
||||
{
|
||||
'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
|
||||
'md5': '7653032cbb25bf6c80d80f217055fa43',
|
||||
'info_dict': {
|
||||
'id': '048195-004_PLUS7-F',
|
||||
'ext': 'flv',
|
||||
'title': 'X:enius',
|
||||
'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
|
||||
'upload_date': '20140320',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -525,6 +540,13 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'TED')
|
||||
|
||||
# Look for embedded arte.tv player
|
||||
mobj = re.search(
|
||||
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
|
@@ -7,7 +7,7 @@ from ..utils import unescapeHTML
|
||||
|
||||
|
||||
class OoyalaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
|
||||
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||
|
@@ -28,6 +28,7 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Divers',
|
||||
'upload_date': '19680921',
|
||||
'timestamp': -40280400,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
},
|
||||
}
|
||||
|
||||
@@ -58,4 +59,5 @@ class RTSIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'uploader': info.get('programName'),
|
||||
'timestamp': upload_timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -1,24 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlashdotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Ooyala'],
|
||||
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
|
||||
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
|
||||
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
|
||||
u'info_dict': {
|
||||
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
|
||||
return self.url_result(ooyala_url, 'Ooyala')
|
@@ -18,12 +18,14 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||
|
|
||||
((?P<type_talk>talks)) # We have a simple talk
|
||||
|
|
||||
(?P<type_watch>watch)/[^/]+/[^/]+
|
||||
)
|
||||
(/lang/(.*?))? # The url may contain the language
|
||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||
/(?P<name>[\w-]+) # Here goes the name and then ".html"
|
||||
.*)$
|
||||
'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
||||
'info_dict': {
|
||||
@@ -36,7 +38,17 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'md5': '226f4fb9c62380d11b7995efa4c87994',
|
||||
'info_dict': {
|
||||
'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
||||
}
|
||||
}]
|
||||
|
||||
_FORMATS_PREFERENCE = {
|
||||
'low': 1,
|
||||
@@ -57,6 +69,8 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
name = m.group('name')
|
||||
if m.group('type_talk'):
|
||||
return self._talk_info(url, name)
|
||||
elif m.group('type_watch'):
|
||||
return self._watch_info(url, name)
|
||||
else:
|
||||
return self._playlist_videos_info(url, name)
|
||||
|
||||
@@ -123,3 +137,26 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
else:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
||||
def _watch_info(self, url, name):
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
config_json = self._html_search_regex(
|
||||
r"data-config='([^']+)", webpage, 'config')
|
||||
config = json.loads(config_json)
|
||||
video_url = config['video']['url']
|
||||
thumbnail = config.get('image', {}).get('url')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': name,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
||||
|
57
youtube_dl/extractor/urort.py
Normal file
57
youtube_dl/extractor/urort.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class UrortIE(InfoExtractor):
|
||||
IE_DESC = 'NRK P3 Urørt'
|
||||
_VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://urort.p3.no/#!/Band/Gerilja',
|
||||
'md5': '5ed31a924be8a05e47812678a86e127b',
|
||||
'info_dict': {
|
||||
'id': '33124-4',
|
||||
'ext': 'mp3',
|
||||
'title': 'The Bomb',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'like_count': int,
|
||||
'uploader': 'Gerilja',
|
||||
'uploader_id': 'Gerilja',
|
||||
},
|
||||
'params': {
|
||||
'matchtitle': '^The Bomb$', # To test, we want just one video
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
|
||||
json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
|
||||
songs = self._download_json(json_url, playlist_id)
|
||||
|
||||
entries = [{
|
||||
'id': '%d-%s' % (s['BandId'], s['$id']),
|
||||
'title': s['Title'],
|
||||
'url': s['TrackUrl'],
|
||||
'ext': 'mp3',
|
||||
'uploader_id': playlist_id,
|
||||
'uploader': s.get('BandName', playlist_id),
|
||||
'like_count': s.get('LikeCount'),
|
||||
'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
|
||||
} for s in songs]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': playlist_id,
|
||||
'entries': entries,
|
||||
}
|
@@ -1,38 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
},
|
||||
u'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
try:
|
||||
ooyala_url = self._og_search_video_url(webpage)
|
||||
except ExtractorError:
|
||||
try:
|
||||
embed_code = self._search_regex(
|
||||
r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
|
||||
u'ooyala embed code')
|
||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
||||
except ExtractorError:
|
||||
raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
103
youtube_dl/extractor/washingtonpost.py
Normal file
103
youtube_dl/extractor/washingtonpost.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'playlist': [{
|
||||
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
|
||||
'info_dict': {
|
||||
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Breaking Points: The Paper Mine',
|
||||
'duration': 1287,
|
||||
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1395527908,
|
||||
'upload_date': '20140322',
|
||||
},
|
||||
}, {
|
||||
'md5': 'f645a07652c2950cd9134bb852c5f5eb',
|
||||
'info_dict': {
|
||||
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
|
||||
'ext': 'mp4',
|
||||
'title': 'The town bureaucracy sustains',
|
||||
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
||||
'duration': 2217,
|
||||
'timestamp': 1395528005,
|
||||
'upload_date': '20140322',
|
||||
'uploader': 'The Washington Post',
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
title = self._og_search_title(webpage)
|
||||
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
|
||||
entries = []
|
||||
for i, uuid in enumerate(uuids, start=1):
|
||||
vinfo_all = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
|
||||
page_id,
|
||||
transform_source=strip_jsonp,
|
||||
note='Downloading information of video %d/%d' % (i, len(uuids))
|
||||
)
|
||||
vinfo = vinfo_all[0]['contentConfig']
|
||||
uploader = vinfo.get('credits', {}).get('source')
|
||||
timestamp = int_or_none(
|
||||
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
|
||||
|
||||
formats = [{
|
||||
'format_id': (
|
||||
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
|
||||
if s.get('width')
|
||||
else s.get('type')),
|
||||
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
|
||||
'width': s.get('width'),
|
||||
'height': s.get('height'),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
|
||||
'filesize': s.get('fileSize'),
|
||||
'url': s.get('url'),
|
||||
'ext': 'mp4',
|
||||
'protocol': {
|
||||
'MP4': 'http',
|
||||
'F4F': 'f4m',
|
||||
}.get(s.get('type'))
|
||||
} for s in vinfo.get('streams', [])]
|
||||
source_media_url = vinfo.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': uuid,
|
||||
'title': vinfo['title'],
|
||||
'description': vinfo.get('blurb'),
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(vinfo.get('videoDuration'), 100),
|
||||
'timestamp': timestamp,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': page_id,
|
||||
'title': title,
|
||||
}
|
@@ -1186,7 +1186,7 @@ def parse_duration(s):
|
||||
return None
|
||||
|
||||
m = re.match(
|
||||
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
|
||||
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
|
||||
if not m:
|
||||
return None
|
||||
res = int(m.group('secs'))
|
||||
@@ -1328,3 +1328,7 @@ US_RATINGS = {
|
||||
'R': 16,
|
||||
'NC': 18,
|
||||
}
|
||||
|
||||
|
||||
def strip_jsonp(code):
|
||||
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.03.24.2'
|
||||
__version__ = '2014.03.27.1'
|
||||
|
Reference in New Issue
Block a user