Compare commits
22 Commits
2014.04.21
...
2014.04.21
Author | SHA1 | Date | |
---|---|---|---|
![]() |
1d9d26d09b | ||
![]() |
c0292e8ab7 | ||
![]() |
f44e5d8b43 | ||
![]() |
6ea74538e3 | ||
![]() |
24b8924b46 | ||
![]() |
86a3c67112 | ||
![]() |
8be874370d | ||
![]() |
aec74dd95a | ||
![]() |
6890574256 | ||
![]() |
d03745c684 | ||
![]() |
28746fbd59 | ||
![]() |
0321213c11 | ||
![]() |
3f0aae4244 | ||
![]() |
48099643cc | ||
![]() |
621f33c9d0 | ||
![]() |
f07a9f6f43 | ||
![]() |
e51880fd32 | ||
![]() |
88ce273da4 | ||
![]() |
b9ba5dfa28 | ||
![]() |
4086f11929 | ||
![]() |
478c2c6193 | ||
![]() |
d2d6481afb |
@@ -85,7 +85,7 @@ def gettestcases(include_onlymatching=False):
|
||||
else:
|
||||
tests = getattr(ie, '_TESTS', [])
|
||||
for t in tests:
|
||||
if not include_onlymatching and getattr(t, 'only_matching', False):
|
||||
if not include_onlymatching and t.get('only_matching', False):
|
||||
continue
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
||||
|
@@ -181,7 +181,7 @@ class TestTedSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 28)
|
||||
self.assertTrue(len(subtitles.keys()) >= 28)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
|
@@ -20,6 +20,7 @@ from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
@@ -40,6 +41,7 @@ from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .clubic import ClubicIE
|
||||
from .cmt import CMTIE
|
||||
from .cnet import CNETIE
|
||||
from .cnn import (
|
||||
@@ -295,6 +297,7 @@ from .vine import VineIE
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wdr import (
|
||||
|
@@ -74,7 +74,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
json_url = self._html_search_regex(
|
||||
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
|
106
youtube_dl/extractor/bilibili.py
Normal file
106
youtube_dl/extractor/bilibili.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
||||
'info_dict': {
|
||||
'id': '1074402',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_code = self._search_regex(
|
||||
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
||||
|
||||
title = self._html_search_meta(
|
||||
'media:title', video_code, 'title', fatal=True)
|
||||
duration_str = self._html_search_meta(
|
||||
'duration', video_code, 'duration')
|
||||
if duration_str is None:
|
||||
duration = None
|
||||
else:
|
||||
duration_mobj = re.match(
|
||||
r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
|
||||
duration_str)
|
||||
duration = (
|
||||
int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
|
||||
int(duration_mobj.group('minutes')) * 60 +
|
||||
int(duration_mobj.group('seconds')))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'uploadDate', video_code, fatal=False))
|
||||
thumbnail = self._html_search_meta(
|
||||
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
|
||||
|
||||
player_params = compat_parse_qs(self._html_search_regex(
|
||||
r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
|
||||
webpage, 'player params'))
|
||||
|
||||
if 'cid' in player_params:
|
||||
cid = player_params['cid'][0]
|
||||
|
||||
lq_doc = self._download_xml(
|
||||
'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading LQ video info'
|
||||
)
|
||||
lq_durl = lq_doc.find('.//durl')
|
||||
formats = [{
|
||||
'format_id': 'lq',
|
||||
'quality': 1,
|
||||
'url': lq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
|
||||
hq_doc = self._download_xml(
|
||||
'http://interface.bilibili.cn/playurl?cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading HQ video info',
|
||||
fatal=False,
|
||||
)
|
||||
if hq_doc is not False:
|
||||
hq_durl = hq_doc.find('.//durl')
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
'ext': 'flv',
|
||||
'url': hq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
hq_durl.find('./size'), get_attr='text'),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
58
youtube_dl/extractor/clubic.py
Normal file
58
youtube_dl/extractor/clubic.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class ClubicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
|
||||
'md5': '1592b694ba586036efac1776b0b43cd3',
|
||||
'info_dict': {
|
||||
'id': '448474',
|
||||
'ext': 'mp4',
|
||||
'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
|
||||
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
|
||||
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
|
||||
config_json = self._search_regex(
|
||||
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
||||
'configuration')
|
||||
config = json.loads(config_json)
|
||||
|
||||
video_info = config['videoInfo']
|
||||
sources = config['sources']
|
||||
quality_order = qualities(['sd', 'hq'])
|
||||
|
||||
formats = [{
|
||||
'format_id': src['streamQuality'],
|
||||
'url': src['src'],
|
||||
'quality': quality_order(src['streamQuality']),
|
||||
} for src in sources]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'formats': formats,
|
||||
'description': clean_html(video_info.get('description')),
|
||||
'thumbnail': config.get('poster'),
|
||||
}
|
@@ -33,7 +33,7 @@ class CNETIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._html_search_regex(
|
||||
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
|
||||
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = json.loads(data_json)
|
||||
vdata = data['video']
|
||||
|
@@ -279,9 +279,12 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note=u'Downloading XML', errnote=u'Unable to download XML',
|
||||
transform_source=None):
|
||||
transform_source=None, fatal=True):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
||||
if xml_string is False:
|
||||
return xml_string
|
||||
if transform_source:
|
||||
xml_string = transform_source(xml_string)
|
||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||
|
@@ -76,9 +76,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
check_form = {
|
||||
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
||||
'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
|
||||
'name_action_selected': 'dont_save',
|
||||
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
@@ -531,7 +531,7 @@ class GenericIE(InfoExtractor):
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
# Look for Aparat videos
|
||||
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group(1), 'Aparat')
|
||||
|
||||
@@ -619,7 +619,13 @@ class GenericIE(InfoExtractor):
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
# Look for gorilla-vid style embedding
|
||||
mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
|
||||
mobj = re.search(r'''(?sx)
|
||||
(?:
|
||||
jw_plugins|
|
||||
JWPlayerOptions|
|
||||
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
||||
)
|
||||
.*?file\s*:\s*["\'](.*?)["\']''', webpage)
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit
|
||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||
|
@@ -14,7 +14,7 @@ class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
|
||||
'md5': '1574e9b4d6438446d5b7dbcdf2786276',
|
||||
'info_dict': {
|
||||
'id': 'r303r',
|
||||
'ext': 'flv',
|
||||
|
@@ -3,9 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
|
@@ -47,7 +47,7 @@ class RutubeIE(InfoExtractor):
|
||||
author = video.get('author') or {}
|
||||
|
||||
options = self._download_json(
|
||||
'http://rutube.ru/api/play/options/%s/?format=json' %video_id,
|
||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading options JSON')
|
||||
|
||||
m3u8_url = options['video_balancer'].get('m3u8')
|
||||
|
@@ -97,7 +97,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
playlist_info = info['playlist']
|
||||
|
||||
playlist_entries = [
|
||||
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||
for talk in info['talks']
|
||||
]
|
||||
return self.playlist_result(
|
||||
@@ -163,7 +163,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
sub_lang_list[l] = url
|
||||
return sub_lang_list
|
||||
else:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
||||
def _watch_info(self, url, name):
|
||||
@@ -178,7 +178,10 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
||||
[
|
||||
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
||||
r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
|
||||
],
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
|
66
youtube_dl/extractor/vuclip.py
Normal file
66
youtube_dl/extractor/vuclip.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
parse_duration,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class VuClipIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://m.vuclip.com/w?cid=843902317&fid=63532&z=1007&nvar&frm=index.html&bu=4757321434',
|
||||
'md5': '92ac9d1ccefec4f0bb474661ab144fcf',
|
||||
'info_dict': {
|
||||
'id': '843902317',
|
||||
'ext': '3gp',
|
||||
'title': 'Movie Trailer: Noah',
|
||||
'duration': 139,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
ad_m = re.search(
|
||||
r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
|
||||
if ad_m:
|
||||
urlr = compat_urllib_parse_urlparse(url)
|
||||
adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
|
||||
webpage = self._download_webpage(
|
||||
adfree_url, video_id, note='Download post-ad page')
|
||||
|
||||
links_code = self._search_regex(
|
||||
r'(?s)<div class="social align_c".*?>(.*?)<hr\s*/?>', webpage,
|
||||
'links')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
|
||||
|
||||
quality_order = qualities(['Reg', 'Hi'])
|
||||
formats = []
|
||||
for url, q in re.findall(
|
||||
r'<a href="(?P<url>[^"]+)".*?>(?P<q>[^<]+)</a>', links_code):
|
||||
format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'quality': quality_order(q),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'\(([0-9:]+)\)</span></h1>', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
}
|
@@ -1245,7 +1245,10 @@ class HEADRequest(compat_urllib_request.Request):
|
||||
return "HEAD"
|
||||
|
||||
|
||||
def int_or_none(v, scale=1, default=None):
|
||||
def int_or_none(v, scale=1, default=None, get_attr=None):
|
||||
if get_attr:
|
||||
if v is not None:
|
||||
v = getattr(v, get_attr, None)
|
||||
return default if v is None else (int(v) // scale)
|
||||
|
||||
|
||||
@@ -1406,3 +1409,14 @@ US_RATINGS = {
|
||||
|
||||
def strip_jsonp(code):
|
||||
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
||||
|
||||
|
||||
def qualities(quality_ids):
|
||||
""" Get a numeric quality value out of a list of possible values """
|
||||
def q(qid):
|
||||
try:
|
||||
return quality_ids.index(qid)
|
||||
except ValueError:
|
||||
return -1
|
||||
return q
|
||||
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.04.21.1'
|
||||
__version__ = '2014.04.21.6'
|
||||
|
Reference in New Issue
Block a user