Compare commits
12 Commits
2011.09.30
...
2011.10.19
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
45aa690868 | ||
|
|
beb245e92f | ||
|
|
c424df0d2f | ||
|
|
87929e4b35 | ||
|
|
d76736fc5e | ||
|
|
0f9b77223e | ||
|
|
9f47175a40 | ||
|
|
a1a8713aad | ||
|
|
6501a06d46 | ||
|
|
8d89fbae5a | ||
|
|
7a2cf5455c | ||
|
|
7125a7ca8b |
@@ -1 +1 @@
|
||||
2011.09.30
|
||||
2011.10.19
|
||||
|
||||
184
youtube-dl
184
youtube-dl
@@ -15,7 +15,7 @@ __author__ = (
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
__version__ = '2011.09.30'
|
||||
__version__ = '2011.10.19'
|
||||
|
||||
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
|
||||
|
||||
@@ -1236,7 +1236,7 @@ class YoutubeIE(InfoExtractor):
|
||||
|
||||
# Get video webpage
|
||||
self.report_video_webpage_download(video_id)
|
||||
request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
|
||||
request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
|
||||
try:
|
||||
video_webpage = urllib2.urlopen(request).read()
|
||||
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
||||
@@ -2059,6 +2059,18 @@ class VimeoIE(InfoExtractor):
|
||||
return
|
||||
sig = mobj.group(1).decode('utf-8')
|
||||
|
||||
# Vimeo specific: extract video quality information
|
||||
mobj = re.search(r'<isHD>(\d+)</isHD>', webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract video quality information')
|
||||
return
|
||||
quality = mobj.group(1).decode('utf-8')
|
||||
|
||||
if int(quality) == 1:
|
||||
quality = 'hd'
|
||||
else:
|
||||
quality = 'sd'
|
||||
|
||||
# Vimeo specific: Extract request signature expiration
|
||||
mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
|
||||
if mobj is None:
|
||||
@@ -2066,7 +2078,7 @@ class VimeoIE(InfoExtractor):
|
||||
return
|
||||
sig_exp = mobj.group(1).decode('utf-8')
|
||||
|
||||
video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
|
||||
video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality)
|
||||
|
||||
try:
|
||||
# Process video information
|
||||
@@ -2458,7 +2470,7 @@ class YahooSearchIE(InfoExtractor):
|
||||
class YoutubePlaylistIE(InfoExtractor):
|
||||
"""Information Extractor for YouTube playlists."""
|
||||
|
||||
_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
|
||||
_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
|
||||
_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
|
||||
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
|
||||
_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
|
||||
@@ -3306,6 +3318,168 @@ class EscapistIE(InfoExtractor):
|
||||
self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
"""Information extractor for collegehumor.com"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
||||
IE_NAME = u'collegehumor'
|
||||
|
||||
def report_webpage(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
|
||||
|
||||
def report_extraction(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
|
||||
|
||||
def _simplify_title(self, title):
|
||||
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
|
||||
res = res.strip(ur'_')
|
||||
return res
|
||||
|
||||
def _real_extract(self, url):
|
||||
htmlParser = HTMLParser.HTMLParser()
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
||||
return
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
self.report_webpage(video_id)
|
||||
request = urllib2.Request(url)
|
||||
try:
|
||||
webpage = urllib2.urlopen(request).read()
|
||||
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
||||
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
|
||||
return
|
||||
|
||||
m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
|
||||
if m is None:
|
||||
self._downloader.trouble(u'ERROR: Cannot extract internal video ID')
|
||||
return
|
||||
internal_video_id = m.group('internalvideoid')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'internal_id': internal_video_id,
|
||||
}
|
||||
|
||||
self.report_extraction(video_id)
|
||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
|
||||
try:
|
||||
metaXml = urllib2.urlopen(xmlUrl).read()
|
||||
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
||||
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
|
||||
return
|
||||
|
||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||
try:
|
||||
videoNode = mdoc.findall('./video')[0]
|
||||
info['description'] = videoNode.findall('./description')[0].text
|
||||
info['title'] = videoNode.findall('./caption')[0].text
|
||||
info['stitle'] = self._simplify_title(info['title'])
|
||||
info['url'] = videoNode.findall('./file')[0].text
|
||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||
info['ext'] = info['url'].rpartition('.')[2]
|
||||
info['format'] = info['ext']
|
||||
except IndexError:
|
||||
self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
|
||||
return
|
||||
|
||||
self._downloader.increment_downloads()
|
||||
|
||||
try:
|
||||
self._downloader.process_info(info)
|
||||
except UnavailableVideoError, err:
|
||||
self._downloader.trouble(u'\nERROR: unable to download video')
|
||||
|
||||
|
||||
class XVideosIE(InfoExtractor):
|
||||
"""Information extractor for xvideos.com"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
|
||||
IE_NAME = u'xvideos'
|
||||
|
||||
def report_webpage(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
|
||||
|
||||
def report_extraction(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
|
||||
|
||||
def _simplify_title(self, title):
|
||||
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
|
||||
res = res.strip(ur'_')
|
||||
return res
|
||||
|
||||
def _real_extract(self, url):
|
||||
htmlParser = HTMLParser.HTMLParser()
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
||||
return
|
||||
video_id = mobj.group(1).decode('utf-8')
|
||||
|
||||
self.report_webpage(video_id)
|
||||
|
||||
request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
|
||||
try:
|
||||
webpage = urllib2.urlopen(request).read()
|
||||
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
||||
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
|
||||
return
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
|
||||
# Extract video URL
|
||||
mobj = re.search(r'flv_url=(.+?)&', webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract video url')
|
||||
return
|
||||
video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
|
||||
|
||||
|
||||
# Extract title
|
||||
mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract video title')
|
||||
return
|
||||
video_title = mobj.group(1).decode('utf-8')
|
||||
|
||||
|
||||
# Extract video thumbnail
|
||||
mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage)
|
||||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
|
||||
return
|
||||
video_thumbnail = mobj.group(1).decode('utf-8')
|
||||
|
||||
|
||||
|
||||
self._downloader.increment_downloads()
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'stitle': self._simplify_title(video_title),
|
||||
'ext': 'flv',
|
||||
'format': 'flv',
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': None,
|
||||
'player_url': None,
|
||||
}
|
||||
|
||||
try:
|
||||
self._downloader.process_info(info)
|
||||
except UnavailableVideoError, err:
|
||||
self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
|
||||
|
||||
|
||||
class PostProcessor(object):
|
||||
"""Post Processor class.
|
||||
@@ -3701,6 +3875,8 @@ def gen_extractors():
|
||||
MyVideoIE(),
|
||||
ComedyCentralIE(),
|
||||
EscapistIE(),
|
||||
CollegeHumorIE(),
|
||||
XVideosIE(),
|
||||
|
||||
GenericIE()
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user