Compare commits
5 Commits
2013.11.17
...
2013.11.18
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a81b4d5c8f | ||
|
|
887c6acdf2 | ||
|
|
83aa529330 | ||
|
|
96b31b6533 | ||
|
|
fccd377198 |
@@ -162,6 +162,16 @@ class GenericIE(InfoExtractor):
|
||||
raise ExtractorError(u'Failed to download URL: %s' % url)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
||||
|
||||
# Look for BrightCove:
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is not None:
|
||||
@@ -177,11 +187,13 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(surl, 'Vimeo')
|
||||
|
||||
# Look for embedded YouTube player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?youtube.com/embed/.+?)\1', webpage)
|
||||
if mobj:
|
||||
surl = unescapeHTML(mobj.group(u'url'))
|
||||
return self.url_result(surl, 'Youtube')
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for Bandcamp pages with custom domain
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
@@ -226,15 +238,6 @@ class GenericIE(InfoExtractor):
|
||||
video_extension = os.path.splitext(video_id)[1][1:]
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
||||
url, u'video uploader')
|
||||
|
||||
@@ -139,9 +139,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
IE_DESC = u'YouTube.com'
|
||||
_VALID_URL = r"""^
|
||||
_VALID_URL = r"""(?xi)^
|
||||
(
|
||||
(?:https?://)? # http(s):// (optional)
|
||||
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
|
||||
tube\.majestyc\.net/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
@@ -363,6 +363,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
u"uploader_id": u"justintimberlakeVEVO"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
|
||||
u"file": u"yZIXLfi8CZQ.mp4",
|
||||
u"note": u"Embed-only video (#1746)",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120608",
|
||||
u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
|
||||
u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
|
||||
u"uploader": u"SET India",
|
||||
u"uploader_id": u"setindia"
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -370,7 +382,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
if YoutubePlaylistIE.suitable(url): return False
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(YoutubeIE, self).__init__(*args, **kwargs)
|
||||
@@ -1272,7 +1284,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
# this can be viewed without login into Youtube
|
||||
data = compat_urllib_parse.urlencode({'video_id': video_id,
|
||||
'el': 'embedded',
|
||||
'el': 'player_embedded',
|
||||
'gl': 'US',
|
||||
'hl': 'en',
|
||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||
|
||||
@@ -176,7 +176,7 @@ def compat_ord(c):
|
||||
compiled_regex_type = type(re.compile(''))
|
||||
|
||||
std_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome) (iPhone)',
|
||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.11.17'
|
||||
__version__ = '2013.11.18'
|
||||
|
||||
Reference in New Issue
Block a user