Compare commits

..

19 Commits

Author SHA1 Message Date
Philipp Hagemeister
57adeaea87 release 2013.05.23 2013-05-23 13:37:19 +02:00
Philipp Hagemeister
8f3f1aef05 Fix HowCast IE 2013-05-23 13:34:33 +02:00
Filippo Valsorda
51d2453c7a small tweaks 2013-05-21 16:07:27 +02:00
Jaime Marquínez Ferrándiz
45014296be Add TeamcocoIE (closes #212) 2013-05-21 14:37:32 +02:00
Anna Bernardi
afef36c950 add support for Flickr videos - closes #261 2013-05-20 23:19:38 +02:00
Filippo Valsorda
b31756c18e Python 2 compat fixes for MyVideo.de rtmpdump downloads 2013-05-20 11:57:10 +02:00
Filippo Valsorda
f008688520 make rtmpdump inherit the verbose option for debugging 2013-05-20 11:54:21 +02:00
Filippo Valsorda
5b68ea215b Merge pull request #842 - myvideo, rtmp support
@dersphere code, from dersphere/plugin.video.myvideo_de.git
rewritten by @mc2avr
released in the Public Domain by the author
ref: https://github.com/rg3/youtube-dl/pull/842
2013-05-20 09:49:58 +02:00
Jaime Marquínez Ferrándiz
b1d568f0bc HowcastIE: extract thumbnail 2013-05-20 08:39:41 +02:00
Jaime Marquínez Ferrándiz
17bd1b2f41 VineIE: extract more information and minor style changes 2013-05-20 08:31:03 +02:00
Anna Bernardi
5b0d3cc0cd Add support for Vine - closes #845 2013-05-20 00:33:14 +02:00
Filippo Valsorda
d4f76f1674 Add support for Howcast.com - closes #835 2013-05-18 19:17:19 +02:00
Jaime Marquínez Ferrándiz
340fa21198 UstreamIE: get thumbnail and uploader name 2013-05-18 11:54:18 +02:00
mc2avr
de5d66d431 MyVideoIE: add rtmp support 2013-05-15 23:38:44 +02:00
Jaime Marquínez Ferrándiz
7bdb17d4d5 Add extra_info argument to extract_info and process_ie_result
It allows to update the info_dicts with other values

(closes #840)
2013-05-14 14:40:40 +02:00
Philipp Hagemeister
419c64b107 Throw a better error if the protocol is invalid 2013-05-13 19:54:07 +02:00
Philipp Hagemeister
99a5ae3f8e Simplify generic search IE (Closes #839) 2013-05-13 19:53:52 +02:00
Philipp Hagemeister
c7563c528b Merge remote-tracking branch 'jaimeMF/SearchIE' 2013-05-13 19:43:35 +02:00
Jaime Marquínez Ferrándiz
e30e9318da Add base class SearchInfoExtractor for search queries IEs 2013-05-13 14:58:44 +02:00
5 changed files with 477 additions and 108 deletions

View File

@@ -152,7 +152,8 @@
"file": "20274954.flv",
"md5": "088f151799e8f572f84eb62f17d73e5c",
"info_dict": {
"title": "Young Americans for Liberty February 7, 2012 2:28 AM"
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
"uploader": "Young Americans for Liberty"
}
},
{
@@ -440,5 +441,46 @@
"title": "Obama Celebrates Iraq Victory"
},
"skip": "Requires rtmpdump"
},
{
"name": "Howcast",
"url": "http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly",
"file": "390161.mp4",
"md5": "1d7ba54e2c9d7dc6935ef39e00529138",
"info_dict":{
"title":"How to Tie a Square Knot Properly",
"description":"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot."
}
},
{
"name": "Vine",
"url": "https://vine.co/v/b9KOOWX7HUx",
"file": "b9KOOWX7HUx.mp4",
"md5": "2f36fed6235b16da96ce9b4dc890940d",
"info_dict":{
"title": "Chicken.",
"uploader": "Jack Dorsey"
}
},
{
"name": "Flickr",
"url": "http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/",
"file": "5645318632.mp4",
"md5": "6fdc01adbc89d72fc9c4f15b4a4ba87b",
"info_dict":{
"title": "Dark Hollow Waterfalls",
"uploader_id": "forestwander-nature-pictures",
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up."
}
},
{
"name": "Teamcoco",
"url": "http://teamcoco.com/video/louis-ck-interview-george-w-bush",
"file": "19705.mp4",
"md5": "27b6f7527da5acf534b15f21b032656e",
"info_dict":{
"title": "Louis C.K. Interview Pt. 1 11/3/11",
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one."
}
}
]

View File

@@ -436,10 +436,11 @@ class FileDownloader(object):
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
return None
def extract_info(self, url, download=True, ie_key=None):
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
'''
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
'''
if ie_key:
@@ -463,10 +464,14 @@ class FileDownloader(object):
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
for result in ie_result:
result.update(extra_info)
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
else:
ie_result.update(extra_info)
if 'extractor' not in ie_result:
ie_result['extractor'] = ie.IE_NAME
return self.process_ie_result(ie_result, download=download)
@@ -482,7 +487,7 @@ class FileDownloader(object):
else:
self.report_error(u'no suitable InfoExtractor: %s' % url)
def process_ie_result(self, ie_result, download=True):
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""
Take the result of the ie(may be modified) and resolve all unresolved
references (URLs, playlist items).
@@ -501,7 +506,12 @@ class FileDownloader(object):
self.process_info(ie_result)
return ie_result
elif result_type == 'url':
return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'))
# We have to add extra_info to the results because it may be
# contained in a playlist
return self.extract_info(ie_result['url'],
download,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist':
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
@@ -525,9 +535,13 @@ class FileDownloader(object):
for i,entry in enumerate(entries,1):
self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
entry['playlist'] = playlist
entry['playlist_index'] = i + playliststart
entry_result = self.process_ie_result(entry, download=download)
extra = {
'playlist': playlist,
'playlist_index': i + playliststart,
}
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
return ie_result
@@ -734,7 +748,7 @@ class FileDownloader(object):
except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file')
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
@@ -749,12 +763,15 @@ class FileDownloader(object):
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
if self.params.get('verbose', False): basic_args[1] = '-v'
if player_url is not None:
basic_args += ['-W', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if play_path is not None:
basic_args += ['-y', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
@@ -810,7 +827,8 @@ class FileDownloader(object):
return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None),
info_dict.get('page_url', None),
info_dict.get('play_path', None))
info_dict.get('play_path', None),
info_dict.get('tc_url', None))
tmpfilename = self.temp_name(filename)
stream = None

View File

@@ -16,6 +16,9 @@ import xml.etree.ElementTree
import random
import math
import operator
import hashlib
import binascii
import urllib
from .utils import *
@@ -188,6 +191,45 @@ class InfoExtractor(object):
video_info['title'] = playlist_title
return video_info
class SearchInfoExtractor(InfoExtractor):
"""
Base class for paged search queries extractors.
They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
Instances should define _SEARCH_KEY and _MAX_RESULTS.
"""
@classmethod
def _make_valid_url(cls):
return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
@classmethod
def suitable(cls, url):
return re.match(cls._make_valid_url(), url) is not None
def _real_extract(self, query):
mobj = re.match(self._make_valid_url(), query)
if mobj is None:
raise ExtractorError(u'Invalid search query "%s"' % query)
prefix = mobj.group('prefix')
query = mobj.group('query')
if prefix == '':
return self._get_n_results(query, 1)
elif prefix == 'all':
return self._get_n_results(query, self._MAX_RESULTS)
else:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
elif n > self._MAX_RESULTS:
self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
n = self._MAX_RESULTS
return self._get_n_results(query, n)
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
raise NotImplementedError("This method must be implemented by sublclasses")
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
@@ -1265,6 +1307,8 @@ class GenericIE(InfoExtractor):
opener.add_handler(handler())
response = opener.open(HeadRequest(url))
if response is None:
raise ExtractorError(u'Invalid URL protocol')
new_url = response.geturl()
if url == new_url:
@@ -1336,42 +1380,18 @@ class GenericIE(InfoExtractor):
}]
class YoutubeSearchIE(InfoExtractor):
class YoutubeSearchIE(SearchInfoExtractor):
"""Information Extractor for YouTube search queries."""
_VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
_max_youtube_results = 1000
_MAX_RESULTS = 1000
IE_NAME = u'youtube:search'
_SEARCH_KEY = 'ytsearch'
def report_download_page(self, query, pagenum):
"""Report attempt to download search page with given number."""
query = query.decode(preferredencoding())
self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
def _real_extract(self, query):
mobj = re.match(self._VALID_URL, query)
if mobj is None:
raise ExtractorError(u'Invalid search query "%s"' % query)
prefix, query = query.split(':')
prefix = prefix[8:]
query = query.encode('utf-8')
if prefix == '':
return self._get_n_results(query, 1)
elif prefix == 'all':
self._get_n_results(query, self._max_youtube_results)
else:
try:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'Invalid download number %s for query "%s"' % (n, query))
elif n > self._max_youtube_results:
self._downloader.report_warning(u'ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
n = self._max_youtube_results
return self._get_n_results(query, n)
except ValueError: # parsing prefix as integer fails
return self._get_n_results(query, 1)
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
@@ -1404,30 +1424,12 @@ class YoutubeSearchIE(InfoExtractor):
return self.playlist_result(videos, query)
class GoogleSearchIE(InfoExtractor):
class GoogleSearchIE(SearchInfoExtractor):
"""Information Extractor for Google Video search queries."""
_VALID_URL = r'gvsearch(?P<prefix>|\d+|all):(?P<query>[\s\S]+)'
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
_max_google_results = 1000
_MAX_RESULTS = 1000
IE_NAME = u'video.google:search'
def _real_extract(self, query):
mobj = re.match(self._VALID_URL, query)
prefix = mobj.group('prefix')
query = mobj.group('query')
if prefix == '':
return self._get_n_results(query, 1)
elif prefix == 'all':
return self._get_n_results(query, self._max_google_results)
else:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
elif n > self._max_google_results:
self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
n = self._max_google_results
return self._get_n_results(query, n)
_SEARCH_KEY = 'gvsearch'
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
@@ -1453,37 +1455,12 @@ class GoogleSearchIE(InfoExtractor):
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
return res
class YahooSearchIE(InfoExtractor):
class YahooSearchIE(SearchInfoExtractor):
"""Information Extractor for Yahoo! Video search queries."""
_VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
_max_yahoo_results = 1000
_MAX_RESULTS = 1000
IE_NAME = u'screen.yahoo:search'
def _real_extract(self, query):
mobj = re.match(self._VALID_URL, query)
if mobj is None:
raise ExtractorError(u'Invalid search query "%s"' % query)
prefix, query = query.split(':')
prefix = prefix[8:]
query = query.encode('utf-8')
if prefix == '':
return self._get_n_results(query, 1)
elif prefix == 'all':
return self._get_n_results(query, self._max_yahoo_results)
else:
try:
n = int(prefix)
if n <= 0:
raise ExtractorError(u'Invalid download number %s for query "%s"' % (n, query))
elif n > self._max_yahoo_results:
self._downloader.report_warning(u'yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
n = self._max_yahoo_results
return self._get_n_results(query, n)
except ValueError: # parsing prefix as integer fails
return self._get_n_results(query, 1)
_SEARCH_KEY = 'yvsearch'
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
@@ -2005,37 +1982,158 @@ class MyVideoIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo'
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Released into the Public Domain by Tristan Fischer on 2013-05-19
# https://github.com/rg3/youtube-dl/pull/842
def __rc4crypt(self,data, key):
x = 0
box = list(range(256))
for i in list(range(256)):
x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
box[i], box[x] = box[x], box[i]
x = 0
y = 0
out = ''
for char in data:
x = (x + 1) % 256
y = (y + box[x]) % 256
box[x], box[y] = box[y], box[x]
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
return out
def __md5(self,s):
return hashlib.md5(s).hexdigest().encode()
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
raise ExtractorError(u'invalid URL: %s' % url)
video_id = mobj.group(1)
GK = (
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
b'TnpsbA0KTVRkbU1tSTRNdz09'
)
# Get video webpage
webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
if mobj is not None:
self.report_extraction(video_id)
video_url = mobj.group(1) + '.flv'
mobj = re.search('<title>([^<]+)</title>', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1)
mobj = re.search('[.](.+?)$', video_url)
if mobj is None:
raise ExtractorError(u'Unable to extract extention')
video_ext = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': u'flv',
}]
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video')
params = {}
encxml = ''
sec = mobj.group(1)
for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
if not a == '_encxml':
params[a] = b
else:
encxml = compat_urllib_parse.unquote(b)
if not params.get('domain'):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
self._downloader.report_warning(u'avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
) % video_id
# get enc data
enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
enc_data_b = binascii.unhexlify(enc_data)
sk = self.__md5(
base64.b64decode(base64.b64decode(GK)) +
self.__md5(
str(video_id).encode('utf-8')
)
)
dec_data = self.__rc4crypt(enc_data_b, sk)
# extracting infos
self.report_extraction(video_id)
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/.*?\.jpg\'',
webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
video_url = mobj.group(1) + ('/%s.flv' % video_id)
mobj = re.search('<title>([^<]+)</title>', webpage)
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
raise ExtractorError(u'unable to extract rtmpurl')
video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1))
if 'myvideo2flash' in video_rtmpurl:
self._downloader.report_warning(u'forcing RTMPT ...')
video_rtmpurl = video_rtmpurl.replace('rtmpe://', 'rtmpt://')
# extract non rtmp videos
if (video_rtmpurl is None) or (video_rtmpurl == ''):
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract url')
video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
mobj = re.search('source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract swfobj')
video_file = compat_urllib_parse.unquote(mobj.group(1))
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_playpath = '%s:%s' % (prefix, ppath)
video_hls_playlist = ''
else:
video_playpath = ''
video_hls_playlist = (
video_filepath + video_file
).replace('.f4m', '.m3u8')
mobj = re.search('swfobject.embedSWF\(\'(.+?)\'', webpage)
if mobj is None:
raise ExtractorError(u'unable to extract swfobj')
video_swfobj = compat_urllib_parse.unquote(mobj.group(1))
mobj = re.search("<h1(?: class='globalHd')?>(.*?)</h1>", webpage)
if mobj is None:
raise ExtractorError(u'unable to extract title')
video_title = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': u'flv',
'id': video_id,
'url': video_rtmpurl,
'tc_url': video_rtmpurl,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': u'flv',
'play_path': video_playpath,
'video_file': video_file,
'video_hls_playlist': video_hls_playlist,
'player_url': video_swfobj,
}]
class ComedyCentralIE(InfoExtractor):
@@ -3327,18 +3425,26 @@ class UstreamIE(InfoExtractor):
video_id = m.group('videoID')
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
m = re.search(r'data-title="(?P<title>.+)"',webpage)
title = m.group('title')
m = re.search(r'<a class="state" data-content-type="channel" data-content-id="(?P<uploader>\d+)"',webpage)
uploader = m.group('uploader')
self.report_extraction(video_id)
try:
m = re.search(r'data-title="(?P<title>.+)"',webpage)
title = m.group('title')
m = re.search(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
webpage, re.DOTALL)
uploader = unescapeHTML(m.group('uploader').strip())
m = re.search(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage)
thumb = m.group('thumb')
except AttributeError:
raise ExtractorError(u'Unable to extract info')
info = {
'id':video_id,
'url':video_url,
'ext': 'flv',
'title': title,
'uploader': uploader
'uploader': uploader,
'thumbnail': thumb,
}
return [info]
return info
class WorldStarHipHopIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
@@ -3960,7 +4066,7 @@ class TumblrIE(InfoExtractor):
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
video = re.search(re_video, webpage)
if video is None:
self.to_screen("No video founded")
self.to_screen("No video found")
return []
video_url = video.group('video_url')
ext = video.group('ext')
@@ -4059,7 +4165,7 @@ class RedTubeIE(InfoExtractor):
class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr"""
_VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
@@ -4086,6 +4192,201 @@ class InaIE(InfoExtractor):
'title': video_title,
}]
class HowcastIE(InfoExtractor):
"""Information Extractor for Howcast.com"""
_VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'http://www.howcast.com/videos/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video URL')
video_url = mobj.group(1)
mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
if mobj is None:
self._downloader.report_warning(u'unable to extract description')
video_description = None
else:
video_description = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta content=\'(.+?)\' property=\'og:image\'', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract thumbnail')
thumbnail = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'description': video_description,
'thumbnail': thumbnail,
}]
class VineIE(InfoExtractor):
"""Information Extractor for Vine.co"""
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'https://vine.co/v/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
mobj = re.search(r'<meta property="twitter:player:stream" content="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video URL')
video_url = mobj.group(1)
mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1)
mobj = re.search(r'<meta property="og:image" content="(.+?)(\?.*?)?"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract thumbnail')
thumbnail = mobj.group(1)
mobj = re.search(r'<div class="user">.*?<h2>(.+?)</h2>', webpage, re.DOTALL)
if mobj is None:
raise ExtractorError(u'Unable to extract uploader')
uploader = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'thumbnail': thumbnail,
'uploader': uploader,
}]
class FlickrIE(InfoExtractor):
"""Information Extractor for Flickr videos"""
_VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_uploader_id = mobj.group('uploader_id')
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search(r"photo_secret: '(\w+)'", webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video secret')
secret = mobj.group(1)
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
mobj = re.search(r'<Item id="id">(\d+-\d+)</Item>', first_xml)
if mobj is None:
raise ExtractorError(u'Unable to extract node_id')
node_id = mobj.group(1)
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
self.report_extraction(video_id)
mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
if mobj is None:
raise ExtractorError(u'Unable to extract video url')
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
mobj = re.search(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
if mobj is None:
self._downloader.report_warning(u'unable to extract description')
video_description = None
else:
video_description = mobj.group(1) or mobj.group(2)
mobj = re.search(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract thumbnail')
thumbnail = mobj.group(1) or mobj.group(2)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'description': video_description,
'thumbnail': thumbnail,
'uploader_id': video_uploader_id,
}]
class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
url_title = mobj.group('url_title')
webpage = self._download_webpage(url, url_title)
mobj = re.search(r'<article class="video" data-id="(\d+?)"', webpage)
video_id = mobj.group(1)
self.report_extraction(video_id)
mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1)
mobj = re.search(r'<meta property="og:image" content="(.+?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract thumbnail')
thumbnail = mobj.group(1)
mobj = re.search(r'<meta property="og:description" content="(.*?)"', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract description')
description = mobj.group(1)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
mobj = re.search(r'<file type="high".*?>(.*?)</file>', data)
if mobj is None:
raise ExtractorError(u'Unable to extract video url')
video_url = mobj.group(1)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'thumbnail': thumbnail,
'description': description,
}]
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
@@ -4143,6 +4444,10 @@ def gen_extractors():
BandcampIE(),
RedTubeIE(),
InaIE(),
HowcastIE(),
VineIE(),
FlickrIE(),
TeamcocoIE(),
GenericIE()
]

View File

@@ -150,6 +150,10 @@ try:
except NameError:
compat_chr = chr
def compat_ord(c):
if type(c) is int: return c
else: return ord(c)
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',

View File

@@ -1,2 +1,2 @@
__version__ = '2013.05.14'
__version__ = '2013.05.23'