Compare commits

..

23 Commits

Author SHA1 Message Date
Philipp Hagemeister
165e179764 release 2013.11.03 2013-11-03 15:50:36 +01:00
Philipp Hagemeister
12ebdd1506 [viddler] Support non-digit IDs (Fixes #1714) 2013-11-03 15:49:59 +01:00
Jaime Marquínez Ferrándiz
1baf9a5938 Merge pull request #1698 from rzhxeo/cinemassacre
[CinemassacreIE] Support more embed urls
2013-11-03 05:17:12 -08:00
Jaime Marquínez Ferrándiz
a56f9de156 Style fixes for extractors: remove spaces around (,),{ and } 2013-11-03 14:06:47 +01:00
Jaime Marquínez Ferrándiz
fa5d47af4b Merge pull request #1679 from rzhxeo/mofosex
Add support for http://www.mofosex.com
2013-11-03 05:04:14 -08:00
Jaime Marquínez Ferrándiz
d607038753 Merge pull request #1677 from rzhxeo/xtube
Add support for http://www.xtube.com
2013-11-03 03:28:02 -08:00
Jaime Marquínez Ferrándiz
9ac6a01aaf Merge pull request #1676 from rzhxeo/extremetube
Add support for http://www.extremetube.com
2013-11-03 03:25:46 -08:00
Jaime Marquínez Ferrándiz
be97abc247 Set the 'extractor_key' field in the info_dict
It's the string returned by the class method 'ie_key', which allows to retrieve the extractor with 'get_info_extractor'
2013-11-03 12:14:44 +01:00
Jaime Marquínez Ferrándiz
9103bbc5cd Add the 'webpage_url' field to info_dict
The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
2013-11-03 12:11:13 +01:00
Jaime Marquínez Ferrándiz
b6c45014ae Set the extra_info inside YoutubeDL.process_ie_result and set only if the keys are missing 2013-11-03 11:57:04 +01:00
rzhxeo
137bbb3e37 [XTubeIE] Add description to TEST 2013-11-02 22:45:48 +01:00
rzhxeo
86ad94bb2e [ExtremeTubeIE] Set age_limit to 18 and fix uploader extraction 2013-11-02 22:33:49 +01:00
Jaime Marquínez Ferrándiz
3e56add7c9 Merge pull request #1678 from rzhxeo/keezmovies
[KeezMoviesIE] Detect URLs with numbers in the SEO part correct
2013-11-02 14:15:52 -07:00
Jaime Marquínez Ferrándiz
f52f01b5d2 [brightcove] Don't set the extension
If the video only has the 'FLVFullLengthURL' key, it can still be an mp4 file.
2013-11-02 21:20:46 +01:00
Jaime Marquínez Ferrándiz
98d7efb537 [exfm] skip tests
The site is down too often.
2013-11-02 20:51:09 +01:00
Jaime Marquínez Ferrándiz
cf51923545 [youtube] Remove vevo test
The video is no longer available and it seems that vevo video don't use encrypted signatures anymore.
2013-11-02 20:46:26 +01:00
Jaime Marquínez Ferrándiz
165e3bb67a [bambuser] Add an extractor for channels (closes #1702) 2013-11-02 19:50:57 +01:00
Jaime Marquínez Ferrándiz
72a5b4f702 Add an extractor for bambuser.com (#1702) 2013-11-02 19:01:01 +01:00
rzhxeo
ab4e151347 [CinemassacreIE] Support more embed urls 2013-11-01 01:24:23 +01:00
rzhxeo
dcc2a706ef Add support for http://www.xtube.com 2013-10-28 19:23:48 +01:00
rzhxeo
2bc67c35ac [KeezMoviesIE] Detect URLs with numbers in the SEO part correct 2013-10-28 18:22:55 +01:00
rzhxeo
77ae65877e Add support for http://www.mofosex.com 2013-10-28 18:18:58 +01:00
rzhxeo
32a35e4418 Add support for http://www.extremetube.com 2013-10-28 17:35:01 +01:00
26 changed files with 329 additions and 66 deletions

View File

@@ -128,6 +128,18 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'35')
def test_add_extra_info(self):
test_dict = {
'extractor': 'Foo',
}
extra_info = {
'extractor': 'Bar',
'playlist': 'funny videos',
}
YDL.add_extra_info(test_dict, extra_info)
self.assertEqual(test_dict['extractor'], 'Foo')
self.assertEqual(test_dict['playlist'], 'funny videos')
if __name__ == '__main__':
unittest.main()

View File

@@ -148,6 +148,9 @@ def generator(test_case):
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
finally:
try_rm_tcs_files()

View File

@@ -20,6 +20,7 @@ from youtube_dl.extractor import (
SoundcloudUserIE,
LivestreamIE,
NHLVideocenterIE,
BambuserChannelIE,
)
@@ -85,5 +86,13 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Highlights')
self.assertEqual(len(result['entries']), 12)
def test_bambuser_channel(self):
dl = FakeYDL()
ie = BambuserChannelIE(dl)
result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'pixelversity')
self.assertTrue(len(result['entries']) >= 66)
if __name__ == '__main__':
unittest.main()

View File

@@ -318,6 +318,12 @@ class YoutubeDL(object):
% info_dict)
return None
@staticmethod
def add_extra_info(info_dict, extra_info):
'''Set the keys from extra_info in info dict if they are missing'''
for key, value in extra_info.items():
info_dict.setdefault(key, value)
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
'''
Returns a list with a dictionary for each video we find.
@@ -344,17 +350,17 @@ class YoutubeDL(object):
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
for result in ie_result:
result.update(extra_info)
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
else:
ie_result.update(extra_info)
if 'extractor' not in ie_result:
ie_result['extractor'] = ie.IE_NAME
return self.process_ie_result(ie_result, download=download)
self.add_extra_info(ie_result,
{
'extractor': ie.IE_NAME,
'webpage_url': url,
'extractor_key': ie.ie_key(),
})
return self.process_ie_result(ie_result, download, extra_info)
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
break
@@ -378,7 +384,7 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
if result_type == 'video':
ie_result.update(extra_info)
self.add_extra_info(ie_result, extra_info)
return self.process_video_result(ie_result)
elif result_type == 'url':
# We have to add extra_info to the results because it may be
@@ -388,6 +394,7 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist':
self.add_extra_info(ie_result, extra_info)
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -413,12 +420,10 @@ class YoutubeDL(object):
extra = {
'playlist': playlist,
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
}
if not 'extractor' in entry:
# We set the extractor, if it's an url it will be set then to
# the new extractor, but if it's already a video we must make
# sure it's present: see issue #877
entry['extractor'] = ie_result['extractor']
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
@@ -427,10 +432,15 @@ class YoutubeDL(object):
return ie_result
elif result_type == 'compat_list':
def _fixup(r):
r.setdefault('extractor', ie_result['extractor'])
self.add_extra_info(r,
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
})
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download=download)
self.process_ie_result(_fixup(r), download, extra_info)
for r in ie_result['entries']
]
return ie_result

View File

@@ -9,6 +9,7 @@ from .arte import (
ArteTVFutureIE,
)
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
@@ -39,6 +40,7 @@ from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .escapist import EscapistIE
from .exfm import ExfmIE
from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
from .fktv import (
@@ -81,6 +83,7 @@ from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mofosex import MofosexIE
from .mtv import MTVIE
from .muzu import MuzuTVIE
from .myspace import MySpaceIE
@@ -151,6 +154,7 @@ from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
from .xtube import XTubeIE
from .yahoo import YahooIE, YahooSearchIE
from .youjizz import YouJizzIE
from .youku import YoukuIE

View File

@@ -0,0 +1,80 @@
import re
import json
import itertools
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
)
class BambuserIE(InfoExtractor):
IE_NAME = u'bambuser'
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
_API_KEY = '005f64509e19a868399060af746a00aa'
_TEST = {
u'url': u'http://bambuser.com/v/4050584',
u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
u'info_dict': {
u'id': u'4050584',
u'ext': u'flv',
u'title': u'Education engineering days - lightning talks',
u'duration': 3741,
u'uploader': u'pixelversity',
u'uploader_id': u'344706',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)['result']
return {
'id': video_id,
'title': info['title'],
'url': info['url'],
'thumbnail': info.get('preview'),
'duration': int(info['length']),
'view_count': int(info['views_total']),
'uploader': info['username'],
'uploader_id': info['uid'],
}
class BambuserChannelIE(InfoExtractor):
IE_NAME = u'bambuser:channel'
_VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
urls = []
last_id = ''
for i in itertools.count(1):
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
info_json = self._download_webpage(req, user,
u'Downloading page %d' % i)
results = json.loads(info_json)['result']
if len(results) == 0:
break
last_id = results[-1]['vid']
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
return {
'_type': 'playlist',
'title': user,
'entries': urls,
}

View File

@@ -23,7 +23,7 @@ class BrightcoveIE(InfoExtractor):
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
u'md5': u'8eccab865181d29ec2958f32a6a754f5',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
@@ -122,12 +122,10 @@ class BrightcoveIE(InfoExtractor):
best_format = renditions[-1]
info.update({
'url': best_format['defaultURL'],
'ext': 'mp4',
})
elif video_info.get('FLVFullLengthURL') is not None:
info.update({
'url': video_info['FLVFullLengthURL'],
'ext': 'flv',
})
else:
raise ExtractorError(u'Unable to extract video url for %s' % info['id'])

View File

@@ -41,7 +41,7 @@ class CinemassacreIE(InfoExtractor):
webpage_url = u'http://' + mobj.group('url')
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
if not mobj:
raise ExtractorError(u'Can\'t extract embed url and video id')
playerdata_url = mobj.group(u'embed_url')

View File

@@ -71,6 +71,9 @@ class InfoExtractor(object):
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
Unless mentioned otherwise, the fields should be Unicode strings.

View File

@@ -25,7 +25,7 @@ class DepositFilesIE(InfoExtractor):
url = 'http://depositfiles.com/en/files/' + file_id
# Retrieve file webpage with 'Free download' button pressed
free_download_indication = { 'gateway_result' : '1' }
free_download_indication = {'gateway_result' : '1'}
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
try:
self.report_download_webpage(file_id)

View File

@@ -21,6 +21,7 @@ class ExfmIE(InfoExtractor):
u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
},
u'note': u'Soundcloud song',
u'skip': u'The site is down too often',
},
{
u'url': u'http://ex.fm/song/wddt8',
@@ -30,6 +31,7 @@ class ExfmIE(InfoExtractor):
u'title': u'Safe and Sound',
u'uploader': u'Capital Cities',
},
u'skip': u'The site is down too often',
},
]

View File

@@ -0,0 +1,50 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
class ExtremeTubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_TEST = {
u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
u'file': u'652431.mp4',
u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
u'info_dict': {
u"title": u"Music Video 14 british euro brit european cumshots swallow",
u"uploader": u"unknown",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format = "-".join(format)
return {
'id': video_id,
'title': video_title,
'uploader': uploader,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': 18,
}

View File

@@ -30,7 +30,7 @@ class HypemIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
track_id = mobj.group(1)
data = { 'ax': 1, 'ts': time.time() }
data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url)
@@ -68,4 +68,4 @@ class HypemIE(InfoExtractor):
'ext': "mp3",
'title': title,
'artist': artist,
}]
}]

View File

@@ -12,7 +12,7 @@ from ..aes import (
)
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))'
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_TEST = {
u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
u'file': u'1214711.mp4',
@@ -43,10 +43,10 @@ class KeezMoviesIE(InfoExtractor):
if webpage.find('encrypted=true')!=-1:
password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join( format )
format = "-".join(format)
age_limit = self._rta_search(webpage)

View File

@@ -0,0 +1,49 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
class MofosexIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
_TEST = {
u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
u'file': u'5018.mp4',
u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
u'info_dict': {
u"title": u"Japanese Teen Music Video",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format = "-".join(format)
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': age_limit,
}

View File

@@ -47,10 +47,10 @@ class PornHubIE(InfoExtractor):
formats = []
for video_url in video_urls:
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format = "-".join( format )
format = "-".join(format)
formats.append({
'url': video_url,
'ext': extension,

View File

@@ -49,10 +49,10 @@ class SpankwireIE(InfoExtractor):
formats = []
for video_url in video_urls:
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join( format )
format = "-".join(format)
formats.append({
'url': video_url,
'ext': extension,

View File

@@ -46,10 +46,10 @@ class Tube8IE(InfoExtractor):
if webpage.find('"encrypted":true')!=-1:
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
format = "-".join( format )
format = "-".join(format)
return {
'id': video_id,

View File

@@ -8,7 +8,7 @@ from ..utils import (
class ViddlerIE(InfoExtractor):
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
_TEST = {
u"url": u"http://www.viddler.com/v/43903784",
u'file': u'43903784.mp4',

View File

@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [
@@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
if not mobj.group('proto'):
url = 'https://' + url
elif mobj.group('pro'):
if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id
elif mobj.group('direct_link'):
else:
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
@@ -205,7 +203,7 @@ class VimeoIE(InfoExtractor):
# Vimeo specific: extract video codec and quality information
# First consider quality, then codecs, then take everything
codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
files = { 'hd': [], 'sd': [], 'other': []}
files = {'hd': [], 'sd': [], 'other': []}
config_files = config["video"].get("files") or config["request"].get("files")
for codec_name, codec_extension in codecs:
for quality in config_files.get(codec_name, []):
@@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):
if len(formats) == 0:
raise ExtractorError(u'No known codec found')
return [{
return {
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
@@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):
'thumbnail': video_thumbnail,
'description': video_description,
'formats': formats,
}]
'webpage_url': url,
}
class VimeoChannelIE(InfoExtractor):

View File

@@ -0,0 +1,55 @@
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
class XTubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
_TEST = {
u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_',
u'file': u'kVTUy_G222_.mp4',
u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab',
u'info_dict': {
u"title": u"strange erotica",
u"description": u"surreal gay themed erotica...almost an ET kind of thing",
u"uploader": u"greenshowers",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', default=None)
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format[0] += 'p'
format[1] += 'k'
format = "-".join(format)
return {
'id': video_id,
'title': video_title,
'uploader': video_uploader,
'description': video_description,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': 18,
}

View File

@@ -132,7 +132,7 @@ class YahooSearchIE(SearchInfoExtractor):
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
res['entries'].append(e)
if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )):
if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1)):
break
return res

View File

@@ -18,7 +18,7 @@ class YoukuIE(InfoExtractor):
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
u"file": u"XNDgyMDQ2NTQw_part00.flv",
u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
u"params": { u"test": False },
u"params": {u"test": False},
u"info_dict": {
u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
}
@@ -37,8 +37,8 @@ class YoukuIE(InfoExtractor):
source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
seed = float(seed)
for i in range(len(source)):
seed = (seed * 211 + 30031 ) % 65536
index = math.floor(seed / 65536 * len(source) )
seed = (seed * 211 + 30031) % 65536
index = math.floor(seed / 65536 * len(source))
mixed.append(source[int(index)])
source.remove(source[int(index)])
#return ''.join(mixed)

View File

@@ -81,14 +81,14 @@ class YouPornIE(InfoExtractor):
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
# A path looks like this:
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
video_url = unescapeHTML( link )
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
video_url = unescapeHTML(link)
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
# size = format[0]
# bitrate = format[1]
format = "-".join( format )
format = "-".join(format)
# title = u'%s-%s-%s' % (video_title, size, bitrate)
formats.append({

View File

@@ -339,18 +339,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
}
},
{
u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
u"file": u"1ltcDfZMA3U.mp4",
u"note": u"Test VEVO video (#897)",
u"info_dict": {
u"upload_date": u"20070518",
u"title": u"Maps - It Will Find You",
u"description": u"Music video by Maps performing It Will Find You.",
u"uploader": u"MuteUSA",
u"uploader_id": u"MuteUSA"
}
},
{
u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
u"file": u"UxxajLWwzqY.mp4",
@@ -1497,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
})
return results

View File

@@ -1,2 +1,2 @@
__version__ = '2013.11.02'
__version__ = '2013.11.03'