Compare commits

...

31 Commits

Author SHA1 Message Date
Philipp Hagemeister
33bf9033e0 release 2014.06.16 2014-06-16 10:15:24 +02:00
Jaime Marquínez Ferrándiz
35eacd0dae [brightcove] Set the filesize of the formats and use _sort_formats 2014-06-15 11:37:39 +02:00
Jaime Marquínez Ferrándiz
96bef88f5f [brightcove] Modernize some tests 2014-06-15 11:24:05 +02:00
Jaime Marquínez Ferrándiz
5524b242a7 [brightcove] Add support for renditions with 'remote' set to True (fixes #3081)
The url needs to be modified to get the flv video.
2014-06-15 11:20:40 +02:00
Jaime Marquínez Ferrándiz
a013eba65f [brightcove] Improve the 'experienceJSON' regex (#3081)
One of the strings may contain ';', we would get an invalid json string.
2014-06-15 11:08:24 +02:00
Sergey M.
36755d40b4 Merge pull request #3078 from pulpe/youtube_fix
[Youtube] Recognize playlists with LL
2014-06-15 02:49:16 +07:00
pulpe
7d568f5ab8 [Youtube] Recognize playlists with LL 2014-06-14 13:23:28 +02:00
Sergey M․
a7207cd580 [wrzuta] Add age limit 2014-06-14 17:00:59 +07:00
Sergey M.
e8ef659cd9 Merge pull request #3075 from pulpe/wrzuta
[WrzutaIE] Add extractor for wrzuta.pl (fixes #3072)
2014-06-14 16:51:27 +07:00
Sergey M․
b0adbe98fb [rai] Add support for Rai websites (Closes #2930) 2014-06-13 23:44:44 +07:00
pulpe
0c361c41b8 [WrzutaIE] Add extractor for wrzuta.pl (fixes #3072) 2014-06-13 08:51:35 +02:00
Sergey M․
c5469e046a [livestream] Modernize 2014-06-12 20:42:46 +07:00
Sergey M․
4d2f143ce5 [ted] Update test md5 2014-06-12 20:33:53 +07:00
Sergey M․
8f93030c85 [blinkx] Modernize 2014-06-11 18:38:13 +07:00
Sergey M․
fdb9aebead [tube8] Update test and modernize 2014-06-11 18:20:14 +07:00
Sergey M․
3141feb73b [ndtv] Fix title extraction and modernize 2014-06-10 19:37:38 +07:00
Philipp Hagemeister
9706f3f802 release 2014.06.09 2014-06-09 23:16:37 +02:00
Philipp Hagemeister
d5e944359e Remove unused import 2014-06-09 23:14:04 +02:00
Philipp Hagemeister
826ec77fb2 [Vulture] Add support for vulture.com 2014-06-09 23:06:39 +02:00
Philipp Hagemeister
2656f4eb6a [hypem] Modernize 2014-06-09 22:34:41 +02:00
Philipp Hagemeister
2b88feedf7 [generic] Add support for <embed YouTube 2014-06-09 22:06:45 +02:00
Jaime Marquínez Ferrándiz
23566e0d78 rtmp and hls downloaders: Clarify error message when the external tools are not installed
Ask to install them, as we do in the postprocessor.
We get some reports with it, like #3061 or #3048.
2014-06-09 20:23:20 +02:00
Sergey M․
828553b614 [nuvid] Remove superfluous slash 2014-06-09 20:41:33 +07:00
Sergey M․
3048e82a94 [nuvid] Improve extraction 2014-06-09 20:37:04 +07:00
Sergey M․
09ffa08ba1 [veoh] Capture error message 2014-06-08 23:05:20 +07:00
Sergey M․
e0b4cc489f [dreisat] Modernize 2014-06-08 22:45:12 +07:00
Sergey M․
15e423407f [dreisat] Fix thumbnails' width and height 2014-06-08 22:41:24 +07:00
Sergey M․
702e522044 [teachertube] Fix extraction for Python 3 2014-06-08 22:16:48 +07:00
Philipp Hagemeister
814d4257df Remove unused imports 2014-06-07 16:52:34 +02:00
Philipp Hagemeister
23ae281b31 [fc2] Fall back to webpage title if needed 2014-06-07 16:52:11 +02:00
Philipp Hagemeister
94128d6b0d [nrk] Fix test checksum 2014-06-07 16:50:19 +02:00
25 changed files with 525 additions and 156 deletions

View File

@@ -25,7 +25,7 @@ class HlsFD(FileDownloader):
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
cmd = [program] + args
retval = subprocess.call(cmd)

View File

@@ -106,7 +106,7 @@ class RtmpFD(FileDownloader):
try:
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error('RTMP download detected but "rtmpdump" could not be run')
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when

View File

@@ -216,6 +216,7 @@ from .pornotube import PornotubeIE
from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
@@ -332,6 +333,7 @@ from .viki import VikiIE
from .vk import VKIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vulture import VultureIE
from .washingtonpost import WashingtonPostIE
from .wat import WatIE
from .wdr import (
@@ -343,6 +345,7 @@ from .weibo import WeiboIE
from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
from .wrzuta import WrzutaIE
from .xbef import XBefIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE

View File

@@ -4,9 +4,7 @@ import json
import re
from .common import InfoExtractor
from ..utils import (
remove_start,
)
from ..utils import remove_start
class BlinkxIE(InfoExtractor):
@@ -15,9 +13,10 @@ class BlinkxIE(InfoExtractor):
_TEST = {
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
'file': '8aQUy7GV.mp4',
'md5': '2e9a07364af40163a908edbf10bb2492',
'info_dict': {
'id': '8aQUy7GV',
'ext': 'mp4',
'title': 'Police Car Rolls Away',
'uploader': 'stupidvideos.com',
'upload_date': '20131215',
@@ -27,6 +26,7 @@ class BlinkxIE(InfoExtractor):
'thumbnails': [{
'width': 100,
'height': 76,
'resolution': '100x76',
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
}],
},
@@ -37,7 +37,7 @@ class BlinkxIE(InfoExtractor):
video_id = m.group('id')
display_id = video_id[:8]
api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0]
@@ -55,13 +55,13 @@ class BlinkxIE(InfoExtractor):
duration = m['d']
elif m['type'] == 'youtube':
yt_id = m['link']
self.to_screen(u'Youtube video detected: %s' % yt_id)
self.to_screen('Youtube video detected: %s' % yt_id)
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
format_id = u'%s-%sk-%s' % (vcodec, tbr, m['w'])
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
formats.append({
'format_id': format_id,
'url': m['link'],

View File

@@ -15,6 +15,7 @@ from ..utils import (
compat_urllib_request,
compat_parse_qs,
determine_ext,
ExtractorError,
unsmuggle_url,
unescapeHTML,
@@ -29,10 +30,11 @@ class BrightcoveIE(InfoExtractor):
{
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
'file': '2371591881001.mp4',
'md5': '5423e113865d26e40624dce2e4b45d95',
'note': 'Test Brightcove downloads and detection in GenericIE',
'info_dict': {
'id': '2371591881001',
'ext': 'mp4',
'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
'uploader': '8TV',
'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
@@ -41,8 +43,9 @@ class BrightcoveIE(InfoExtractor):
{
# From http://medianetwork.oracle.com/video/player/1785452137001
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
'file': '1785452137001.flv',
'info_dict': {
'id': '1785452137001',
'ext': 'flv',
'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
'uploader': 'Oracle',
@@ -70,7 +73,20 @@ class BrightcoveIE(InfoExtractor):
'description': 'md5:363109c02998fee92ec02211bd8000df',
'uploader': 'National Ballet of Canada',
},
}
},
{
# test flv videos served by akamaihd.net
# From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
# The md5 checksum changes on each download
'info_dict': {
'id': '2996102916001',
'ext': 'flv',
'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
'uploader': 'Red Bull TV',
'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
},
},
]
@classmethod
@@ -187,7 +203,7 @@ class BrightcoveIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
self.report_extraction(video_id)
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
info = json.loads(info)['data']
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
@@ -219,12 +235,26 @@ class BrightcoveIE(InfoExtractor):
renditions = video_info.get('renditions')
if renditions:
renditions = sorted(renditions, key=lambda r: r['size'])
info['formats'] = [{
'url': rend['defaultURL'],
'height': rend.get('frameHeight'),
'width': rend.get('frameWidth'),
} for rend in renditions]
formats = []
for rend in renditions:
url = rend['defaultURL']
if rend['remote']:
# This type of renditions are served through akamaihd.net,
# but they don't use f4m manifests
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
ext = 'flv'
else:
ext = determine_ext(url)
size = rend.get('size')
formats.append({
'url': url,
'ext': ext,
'height': rend.get('frameHeight'),
'width': rend.get('frameWidth'),
'filesize': size if size != 0 else None,
})
self._sort_formats(formats)
info['formats'] = formats
elif video_info.get('FLVFullLengthURL') is not None:
info.update({
'url': video_info['FLVFullLengthURL'],

View File

@@ -1,39 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
unified_strdate,
)
from ..utils import unified_strdate
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = {
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
u'file': u'36983.mp4',
u'md5': u'9dcfe344732808dbfcc901537973c922',
u'info_dict': {
u"title": u"Kaffeeland Schweiz",
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
u"uploader": u"3sat",
u"upload_date": u"20130622"
'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
'md5': '9dcfe344732808dbfcc901537973c922',
'info_dict': {
'id': '36983',
'ext': 'mp4',
'title': 'Kaffeeland Schweiz',
'description': 'md5:cc4424b18b75ae9948b13929a0814033',
'uploader': '3sat',
'upload_date': '20130622'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{
'width': te.attrib['key'].partition('x')[0],
'height': te.attrib['key'].partition('x')[2],
'width': int(te.attrib['key'].partition('x')[0]),
'height': int(te.attrib['key'].partition('x')[2]),
'url': te.text,
} for te in thumbnail_els]

View File

@@ -50,10 +50,13 @@ class FC2IE(InfoExtractor):
raise ExtractorError('Error code: %s' % info['err_code'][0])
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
title_info = info.get('title')
if title_info:
title = title_info[0]
return {
'id': video_id,
'title': info['title'][0],
'title': title,
'url': video_url,
'ext': 'flv',
'thumbnail': thumbnail,

View File

@@ -260,7 +260,24 @@ class GenericIE(InfoExtractor):
'uploader': 'Spi0n',
},
'add_ie': ['Dailymotion'],
}
},
# YouTube embed
{
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
'info_dict': {
'id': 'FXRb4ykk4S0',
'ext': 'mp4',
'title': 'The NBL Auction 2014',
'uploader': 'BADMINTON England',
'uploader_id': 'BADMINTONEvents',
'upload_date': '20140603',
'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
},
'add_ie': ['Youtube'],
'params': {
'skip_download': True,
}
},
]
def report_download_webpage(self, video_id):
@@ -478,8 +495,13 @@ class GenericIE(InfoExtractor):
# Look for embedded YouTube player
matches = re.findall(r'''(?x)
(?:<iframe[^>]+?src=|embedSWF\(\s*)
(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
(?:
<iframe[^>]+?src=|
<embed[^>]+?src=|
embedSWF\(?:\s*
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
(?:embed|v)/.+?)
\1''', webpage)
if matches:
@@ -646,6 +668,14 @@ class GenericIE(InfoExtractor):
url = unescapeHTML(mobj.group('url'))
return self.url_result(url)
# Look for embedded vulture.com player
mobj = re.search(
r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
webpage)
if mobj is not None:
url = unescapeHTML(mobj.group('url'))
return self.url_result(url, ie='Vulture')
# Start with something easy: JW Player in SWFObject
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if not found:

View File

@@ -1,10 +1,11 @@
from __future__ import unicode_literals
import json
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
@@ -13,59 +14,55 @@ from ..utils import (
class HypemIE(InfoExtractor):
"""Information Extractor for hypem"""
_VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
_TEST = {
u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
u'file': u'1v6ga.mp3',
u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
u'info_dict': {
u"title": u"Tame"
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
'info_dict': {
'id': '1v6ga',
'ext': 'mp3',
'title': 'Tame',
'uploader': 'BODYWORK',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
track_id = mobj.group(1)
data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url)
response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
response, urlh = self._download_webpage_handle(
request, track_id, 'Downloading webpage with the url')
cookie = urlh.headers.get('Set-Cookie', '')
self.report_extraction(track_id)
html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
html_tracks = self._html_search_regex(
r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>',
response, 'tracks')
try:
track_list = json.loads(html_tracks)
track = track_list[u'tracks'][0]
track = track_list['tracks'][0]
except ValueError:
raise ExtractorError(u'Hypemachine contained invalid JSON.')
raise ExtractorError('Hypemachine contained invalid JSON.')
key = track[u"key"]
track_id = track[u"id"]
artist = track[u"artist"]
title = track[u"song"]
key = track['key']
track_id = track['id']
artist = track['artist']
title = track['song']
serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
request = compat_urllib_request.Request(
serve_url, '', {'Content-Type': 'application/json'})
request.add_header('cookie', cookie)
song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
try:
song_data = json.loads(song_data_json)
except ValueError:
raise ExtractorError(u'Hypemachine contained invalid JSON.')
final_url = song_data[u"url"]
song_data = self._download_json(request, track_id, 'Downloading metadata')
final_url = song_data["url"]
return [{
'id': track_id,
'url': final_url,
'ext': "mp3",
'title': title,
'artist': artist,
}]
return {
'id': track_id,
'url': final_url,
'ext': 'mp3',
'title': title,
'uploader': artist,
}

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
import json
@@ -6,31 +8,34 @@ from ..utils import (
compat_urllib_parse_urlparse,
compat_urlparse,
xpath_with_ns,
compat_str,
)
class LivestreamIE(InfoExtractor):
IE_NAME = u'livestream'
IE_NAME = 'livestream'
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
_TEST = {
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
u'file': u'4719370.mp4',
u'md5': u'0d2186e3187d185a04b3cdd02b828836',
u'info_dict': {
u'title': u'Live from Webster Hall NYC',
u'upload_date': u'20121012',
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b',
'info_dict': {
'id': '4719370',
'ext': 'mp4',
'title': 'Live from Webster Hall NYC',
'upload_date': '20121012',
}
}
def _extract_video_info(self, video_data):
video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
return {'id': video_data['id'],
'url': video_url,
'ext': 'mp4',
'title': video_data['caption'],
'thumbnail': video_data['thumbnail_url'],
'upload_date': video_data['updated_at'].replace('-','')[:8],
}
return {
'id': compat_str(video_data['id']),
'url': video_url,
'ext': 'mp4',
'title': video_data['caption'],
'thumbnail': video_data['thumbnail_url'],
'upload_date': video_data['updated_at'].replace('-', '')[:8],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -40,36 +45,36 @@ class LivestreamIE(InfoExtractor):
if video_id is None:
# This is an event page:
config_json = self._search_regex(r'window.config = ({.*?});',
webpage, u'window config')
config_json = self._search_regex(
r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event']
videos = [self._extract_video_info(video_data['data'])
for video_data in info['feed']['data'] if video_data['type'] == u'video']
for video_data in info['feed']['data'] if video_data['type'] == 'video']
return self.playlist_result(videos, info['id'], info['full_name'])
else:
og_video = self._og_search_video_url(webpage, name=u'player url')
og_video = self._og_search_video_url(webpage, 'player url')
query_str = compat_urllib_parse_urlparse(og_video).query
query = compat_urlparse.parse_qs(query_str)
api_url = query['play_url'][0].replace('.smil', '')
info = json.loads(self._download_webpage(api_url, video_id,
u'Downloading video info'))
info = json.loads(self._download_webpage(
api_url, video_id, 'Downloading video info'))
return self._extract_video_info(info)
# The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor):
IE_NAME = u'livestream:original'
IE_NAME = 'livestream:original'
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
_TEST = {
u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
u'info_dict': {
u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
u'ext': u'flv',
u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': {
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'ext': 'flv',
'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
},
u'params': {
'params': {
# rtmp
u'skip_download': True,
'skip_download': True,
},
}
@@ -84,7 +89,7 @@ class LivestreamOriginalIE(InfoExtractor):
ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
# Remove the extension and number from the path (like 1.jpg)
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')
return {
'id': video_id,

View File

@@ -1,22 +1,28 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import month_by_name
from ..utils import (
month_by_name,
int_or_none,
)
class NDTVIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
_TEST = {
u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710",
u"file": u"300710.mp4",
u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88",
u"info_dict": {
u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.",
u"upload_date": u"20131208",
u"duration": 1327,
u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg",
'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
'info_dict': {
'id': '300710',
'ext': 'mp4',
'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
'upload_date': '20131208',
'duration': 1327,
'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
},
}
@@ -27,13 +33,12 @@ class NDTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
filename = self._search_regex(
r"__filename='([^']+)'", webpage, u'video filename')
video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
r"__filename='([^']+)'", webpage, 'video filename')
video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
filename)
duration_str = filename = self._search_regex(
r"__duration='([^']+)'", webpage, u'duration', fatal=False)
duration = None if duration_str is None else int(duration_str)
duration = int_or_none(self._search_regex(
r"__duration='([^']+)'", webpage, 'duration', fatal=False))
date_m = re.search(r'''(?x)
<p\s+class="vod_dateline">\s*
@@ -41,7 +46,7 @@ class NDTVIE(InfoExtractor):
(?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
''', webpage)
upload_date = None
assert date_m
if date_m is not None:
month = month_by_name(date_m.group('monthname'))
if month is not None:
@@ -49,14 +54,19 @@ class NDTVIE(InfoExtractor):
date_m.group('year'), month, int(date_m.group('day')))
description = self._og_search_description(webpage)
READ_MORE = u' (Read more)'
READ_MORE = ' (Read more)'
if description.endswith(READ_MORE):
description = description[:-len(READ_MORE)]
title = self._og_search_title(webpage)
TITLE_SUFFIX = ' - NDTV'
if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
return {
'id': video_id,
'url': video_url,
'title': self._og_search_title(webpage),
'title': title,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,

View File

@@ -6,7 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
unified_strdate,
)
@@ -89,7 +89,7 @@ class NRKTVIE(InfoExtractor):
},
{
'url': 'http://tv.nrk.no/program/mdfp15000514',
'md5': '383650ece2b25ecec996ad7b5bb2a384',
'md5': 'af01795a31f1cf7265c8657534d8077b',
'info_dict': {
'id': 'mdfp15000514',
'ext': 'flv',
@@ -111,9 +111,8 @@ class NRKTVIE(InfoExtractor):
description = self._html_search_meta('description', page, 'description')
thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)
if duration:
duration = float(duration)
duration = float_or_none(
self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
formats = []

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unescapeHTML
)

View File

@@ -3,6 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
compat_urllib_request,
)
class NuvidIE(InfoExtractor):
@@ -13,8 +18,10 @@ class NuvidIE(InfoExtractor):
'info_dict': {
'id': '1310741',
'ext': 'mp4',
"title": "Horny babes show their awesome bodeis and",
"age_limit": 18,
'title': 'Horny babes show their awesome bodeis and',
'duration': 129,
'upload_date': '20140508',
'age_limit': 18,
}
}
@@ -22,27 +29,41 @@ class NuvidIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
murl = url.replace('://www.', '://m.')
webpage = self._download_webpage(murl, video_id)
formats = []
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
request = compat_urllib_request.Request(
'http://m.nuvid.com/play/%s' % video_id)
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
webpage = self._download_webpage(
request, video_id, 'Downloading %s page' % format_id)
video_url = self._html_search_regex(
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
if not video_url:
continue
formats.append({
'url': video_url,
'format_id': format_id,
})
webpage = self._download_webpage(
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
title = self._html_search_regex(
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>',
webpage, 'title').strip()
url_end = self._html_search_regex(
r'href="(/[^"]+)"[^>]*data-link_type="mp4"',
webpage, 'video_url')
video_url = 'http://m.nuvid.com' + url_end
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
thumbnail = self._html_search_regex(
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
webpage, 'thumbnail URL', fatal=False)
duration = parse_duration(self._html_search_regex(
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
upload_date = unified_strdate(self._html_search_regex(
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'thumbnail': thumbnail,
'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
'duration': duration,
'upload_date': upload_date,
'age_limit': 18,
}
'formats': formats,
}

120
youtube_dl/extractor/rai.py Normal file
View File

@@ -0,0 +1,120 @@
from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
compat_urllib_parse,
)
class RaiIE(SubtitlesInfoExtractor):
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
_TESTS = [
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': 'c064c0b2d09c278fb293116ef5d0a32d',
'info_dict': {
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'mp4',
'title': 'Report del 07/04/2014',
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
'upload_date': '20140407',
'duration': 6160,
}
},
{
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
'md5': '8bb9c151924ce241b74dd52ef29ceafa',
'info_dict': {
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
'ext': 'mp4',
'title': 'TG PRIMO TEMPO',
'description': '',
'upload_date': '20140612',
'duration': 1758,
}
},
{
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
'md5': '35cf7c229f22eeef43e48b5cf923bef0',
'info_dict': {
'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13',
'ext': 'mp4',
'title': 'State of the Net, Antonella La Carpia: regole virali',
'description': 'md5:b0ba04a324126903e3da7763272ae63c',
'upload_date': '20140613',
}
},
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html',
'md5': '35694f062977fe6619943f08ed935730',
'info_dict': {
'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132',
'ext': 'mp4',
'title': 'Alluvione in Sardegna e dissesto idrogeologico',
'description': 'Edizione delle ore 20:30 ',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
title = media.get('name')
description = media.get('desc')
thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
duration = parse_duration(media.get('length'))
uploader = media.get('author')
upload_date = unified_strdate(media.get('date'))
formats = []
for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
media_url = media.get(format_id)
if not media_url:
continue
formats.append({
'url': media_url,
'format_id': format_id,
'ext': 'mp4',
})
if self._downloader.params.get('listsubtitles', False):
page = self._download_webpage(url, video_id)
self._list_available_subtitles(video_id, page)
return
subtitles = {}
if self._have_to_download_any_subtitles:
page = self._download_webpage(url, video_id)
subtitles = self.extract_subtitles(video_id, page)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
def _get_available_subtitles(self, video_id, webpage):
subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m:
captions = m.group('captions')
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
return subtitles

View File

@@ -3,9 +3,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class SlutloadIE(InfoExtractor):

View File

@@ -1,7 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import json
import re
import itertools

View File

@@ -55,11 +55,13 @@ class TeacherTubeIE(InfoExtractor):
quality = qualities(['mp3', 'flv', 'mp4'])
_, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))
formats = [
{
'url': media_url,
'quality': quality(determine_ext(media_url))
} for media_url in set(zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))[1])
} for media_url in set(media_urls)
]
self._sort_formats(formats)

View File

@@ -27,7 +27,7 @@ class TEDIE(SubtitlesInfoExtractor):
'''
_TESTS = [{
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
'md5': 'fc94ac279feebbce69f21c0c6ee82810',
'info_dict': {
'id': '102',
'ext': 'mp4',

View File

@@ -17,9 +17,10 @@ class Tube8IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
_TEST = {
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
'file': '229795.mp4',
'md5': 'e9e0b0c86734e5e3766e653509475db0',
'md5': '44bf12b98313827dd52d35b8706a4ea0',
'info_dict': {
'id': '229795',
'ext': 'mp4',
'description': 'hot teen Kasia grinding',
'uploader': 'unknown',
'title': 'Kasia music video',

View File

@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
int_or_none,
ExtractorError,
)
@@ -94,8 +95,12 @@ class VeohIE(InfoExtractor):
if video_id.startswith('v'):
rsp = self._download_xml(
r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
if rsp.get('stat') == 'ok':
stat = rsp.get('stat')
if stat == 'ok':
return self._extract_video(rsp.find('./videoList/video'))
elif stat == 'fail':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, rsp.find('./errorList/error').get('errorMessage')), expected=True)
webpage = self._download_webpage(url, video_id)
age_limit = 0

View File

@@ -0,0 +1,69 @@
from __future__ import unicode_literals
import json
import os.path
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class VultureIE(InfoExtractor):
IE_NAME = 'vulture.com'
_VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/'
_TEST = {
'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1',
'md5': '8d997845642a2b5152820f7257871bc8',
'info_dict': {
'id': '6GHRQL3RV7MSD1H4',
'ext': 'mp4',
'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED',
'uploader_id': 'Sarah',
'thumbnail': 're:^http://.*\.jpg$',
'timestamp': 1401288564,
'upload_date': '20140528',
'description': 'Uplifting and witty, as predicted.',
'duration': 1015,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
query_string = self._search_regex(
r"queryString\s*=\s*'([^']+)'", webpage, 'query string')
video_id = self._search_regex(
r'content=([^&]+)', query_string, 'video ID')
query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string
query_webpage = self._download_webpage(
query_url, display_id, note='Downloading query page')
params_json = self._search_regex(
r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
query_webpage,
'player params')
params = json.loads(params_json)
upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T'))
uploader_id = params.get('user', {}).get('handle')
media_item = params['media_item']
title = os.path.splitext(media_item['title'])[0]
duration = int_or_none(media_item.get('duration_seconds'))
return {
'id': video_id,
'display_id': display_id,
'url': media_item['pipeline_xid'],
'title': title,
'timestamp': upload_timestamp,
'thumbnail': params.get('thumbnail_url'),
'uploader_id': uploader_id,
'description': params.get('description'),
'duration': duration,
}

View File

@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
qualities,
)
class WrzutaIE(InfoExtractor):
IE_NAME = 'wrzuta.pl'
_VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/(?P<typ>film|audio)/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://laboratoriumdextera.wrzuta.pl/film/aq4hIZWrkBu/nike_football_the_last_game',
'md5': '9e67e05bed7c03b82488d87233a9efe7',
'info_dict': {
'id': 'aq4hIZWrkBu',
'ext': 'mp4',
'title': 'Nike Football: The Last Game',
'duration': 307,
'uploader_id': 'laboratoriumdextera',
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
},
}, {
'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad',
'md5': '1e546a18e1c22ac6e9adce17b8961ff5',
'info_dict': {
'id': '9oXJqdcndqv',
'ext': 'ogg',
'title': 'David Guetta & Showtek ft. Vassy - Bad',
'duration': 270,
'uploader_id': 'w729',
'description': 'md5:4628f01c666bbaaecefa83476cfa794a',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
typ = mobj.group('typ')
uploader = mobj.group('uploader')
webpage = self._download_webpage(url, video_id)
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
audio_table = {'flv': 'mp3', 'webm': 'ogg'}
embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
formats = []
for media in embedpage['url']:
if typ == 'audio':
ext = audio_table[media['type'].split('@')[0]]
else:
ext = media['type'].split('@')[0]
formats.append({
'format_id': '%s_%s' % (ext, media['quality'].lower()),
'url': media['url'],
'ext': ext,
'quality': quality(media['quality']),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'duration': int_or_none(embedpage['duration']),
'uploader_id': uploader,
'description': self._og_search_description(webpage),
'age_limit': embedpage.get('minimalAge', 0),
}

View File

@@ -1386,13 +1386,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
| p/
)
(
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
(?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.*
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'

View File

@@ -1,2 +1,2 @@
__version__ = '2014.06.07'
__version__ = '2014.06.16'