Compare commits

..

18 Commits

Author SHA1 Message Date
Philipp Hagemeister
6fd2957163 release 2014.01.22.1 2014-01-22 02:17:00 +01:00
Philipp Hagemeister
d3a1c71917 [ringtv] Fix and add news extraction 2014-01-22 02:16:40 +01:00
Philipp Hagemeister
af1588c05f [mtv] Update tests and xpath function for new title extraction 2014-01-22 02:04:51 +01:00
Philipp Hagemeister
2250865fb0 [Wimp] Use new URL relay method 2014-01-22 02:01:39 +01:00
Philipp Hagemeister
99f770caa8 [hotnewhiphop] Retrieve media key 2014-01-22 01:55:50 +01:00
Philipp Hagemeister
00122de6a9 [gametrailers/mtv] Fix pre-3.x compatibility function for find_xpath_attr
Fixes #2189
2014-01-22 01:04:12 +01:00
Philipp Hagemeister
a70515c0fd [servingsys] Do not run test on travis
Apparantly, even the advertisers do geoblocking now!?
From the US, this isn't outright blocked, but there are no videos returned.
2014-01-22 00:27:18 +01:00
Philipp Hagemeister
398edd0689 release 2014.01.22 2014-01-22 00:21:41 +01:00
Philipp Hagemeister
6562df768d Merge branch 'master' of github.com:rg3/youtube-dl
Conflicts:
	youtube_dl/extractor/mtv.py
2014-01-22 00:21:27 +01:00
Philipp Hagemeister
06769acd71 [gametrailers] Use unicode_literals
Conflicts:
	youtube_dl/extractor/gametrailers.py
2014-01-22 00:18:52 +01:00
Philipp Hagemeister
32dac6943d [mtv] Use unicode_literals 2014-01-22 00:18:09 +01:00
Philipp Hagemeister
90834c78fe [mtv] Fix title for gametrailers (Fixes #2188)
We now prefer the title including the category, because that title is what is presented at the actual sites.
2014-01-22 00:17:33 +01:00
Jaime Marquínez Ferrándiz
47917f24c4 [brightcove] Fix extraction of embedded videos
There was a leading ‘:’ in the regex.
The ‘flashvars’ parameter is not always available.
2014-01-21 22:04:46 +01:00
Jaime Marquínez Ferrándiz
d614aa40e3 [brightcove] Fix check for url in the result
It may have the ‘formats’ field instead of ‘url’.
2014-01-21 21:53:10 +01:00
Jaime Marquínez Ferrándiz
bc4ba05fcb [mtv] Add an extractor for mtviggy.com (#2072) 2014-01-21 20:59:31 +01:00
Jaime Marquínez Ferrándiz
8d9453b9e8 Add an extractor for spike.com (#2072)
Added a generic _real_extract to MTVServicesInfoExtractor
2014-01-21 20:54:47 +01:00
Jaime Marquínez Ferrándiz
e4f320a4d0 [mtv] Check for geo-blocked videos in the xml document, not in the xml’s string
Allows to use the `_download_xml` method
2014-01-21 19:59:02 +01:00
Jaime Marquínez Ferrándiz
ef9f2ba7af [mtv] Use unicode_literals 2014-01-21 19:58:21 +01:00
11 changed files with 203 additions and 99 deletions

View File

@@ -119,7 +119,10 @@ from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mpora import MporaIE
from .mofosex import MofosexIE
from .mtv import MTVIE
from .mtv import (
MTVIE,
MTVIggyIE,
)
from .muzu import MuzuTVIE
from .myspace import MySpaceIE
from .myspass import MySpassIE
@@ -171,6 +174,7 @@ from .southparkstudios import (
from .space import SpaceIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE
from .spike import SpikeIE
from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE
from .steam import SteamIE

View File

@@ -90,9 +90,12 @@ class BrightcoveIE(InfoExtractor):
object_doc = xml.etree.ElementTree.fromstring(object_str)
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
flashvars = dict(
(k, v[0])
for k, v in compat_parse_qs(fv_el.attrib['value']).items())
if fv_el is not None:
flashvars = dict(
(k, v[0])
for k, v in compat_parse_qs(fv_el.attrib['value']).items())
else:
flashvars = {}
def find_param(name):
if name in flashvars:
@@ -131,7 +134,7 @@ class BrightcoveIE(InfoExtractor):
m_brightcove = re.search(
r'''(?sx)<object
(?:
:[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''',
webpage)
@@ -230,6 +233,6 @@ class BrightcoveIE(InfoExtractor):
else:
return ad_info
if 'url' not in info:
if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor
@@ -6,12 +8,12 @@ from .mtv import MTVServicesInfoExtractor
class GametrailersIE(MTVServicesInfoExtractor):
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
_TEST = {
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
u'md5': u'4c8e67681a0ea7ec241e8c09b3ea8cf7',
u'info_dict': {
u'title': u'E3 2013: Debut Trailer',
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
'file': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7',
'info_dict': {
'title': 'Mirror\'s Edge 2|E3 2013: Debut Trailer',
'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
},
}
@@ -23,5 +25,5 @@ class GametrailersIE(MTVServicesInfoExtractor):
webpage = self._download_webpage(url, video_id)
mgid = self._search_regex([r'data-video="(?P<mgid>mgid:.*?)"',
r'data-contentId=\'(?P<mgid>mgid:.*?)\''],
webpage, u'mgid')
webpage, 'mgid')
return self._get_videos_info(mgid)

View File

@@ -1,17 +1,25 @@
from __future__ import unicode_literals
import re
import base64
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
HEADRequest,
)
class HotNewHipHopIE(InfoExtractor):
_VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
_TEST = {
u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",
u'file': u'1435540.mp3',
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
u'info_dict': {
u"title": u'Freddie Gibbs "Lay It Down"'
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
'file': '1435540.mp3',
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
'info_dict': {
'title': 'Freddie Gibbs - Lay It Down'
}
}
@@ -21,24 +29,41 @@ class HotNewHipHopIE(InfoExtractor):
webpage_src = self._download_webpage(url, video_id)
video_url_base64 = self._search_regex(r'data-path="(.*?)"',
webpage_src, u'video URL', fatal=False)
video_url_base64 = self._search_regex(
r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False)
if video_url_base64 == None:
video_url = self._search_regex(r'"contentUrl" content="(.*?)"', webpage_src,
u'video URL')
if video_url_base64 is None:
video_url = self._search_regex(
r'"contentUrl" content="(.*?)"', webpage_src, u'video URL')
return self.url_result(video_url, ie='Youtube')
video_url = base64.b64decode(video_url_base64).decode('utf-8')
reqdata = compat_urllib_parse.urlencode([
('mediaType', 's'),
('mediaId', video_id),
])
r = compat_urllib_request.Request(
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
mkd = self._download_json(
r, video_id, note='Requesting media key',
errnote='Could not download media key')
if 'mediaKey' not in mkd:
raise ExtractorError('Did not get a media key')
video_title = self._html_search_regex(r"<title>(.*)</title>",
webpage_src, u'title')
redirect_url = base64.b64decode(video_url_base64).decode('utf-8')
redirect_req = HEADRequest(redirect_url)
req = self._request_webpage(
redirect_req, video_id,
note='Resolving final URL', errnote='Could not resolve final URL')
video_url = req.geturl()
if video_url.endswith('.html'):
raise ExtractorError('Redirect failed')
results = [{
'id': video_id,
'url' : video_url,
'title' : video_title,
'thumbnail' : self._og_search_thumbnail(webpage_src),
'ext' : 'mp3',
}]
return results
video_title = self._og_search_title(webpage_src).strip()
return {
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': self._og_search_thumbnail(webpage_src),
}

View File

@@ -1,13 +1,18 @@
from __future__ import unicode_literals
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
url_basename,
RegexNotFoundError,
)
def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag
@@ -34,10 +39,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
else:
return thumb_node.attrib['url']
def _extract_video_formats(self, metadataXml):
if '/error_country_block.swf' in metadataXml:
raise ExtractorError(u'This video is not available from your country.', expected=True)
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
def _extract_video_formats(self, mdoc):
if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
raise ExtractorError('This video is not available from your country.', expected=True)
formats = []
for rendition in mdoc.findall('.//rendition'):
@@ -60,11 +64,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
self.report_extraction(video_id)
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
# Remove the templates, like &device={device}
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
if 'acceptMethods' not in mediagen_url:
mediagen_url += '&acceptMethods=fms'
mediagen_page = self._download_webpage(mediagen_url, video_id,
u'Downloading video urls')
mediagen_doc = self._download_xml(mediagen_url, video_id,
'Downloading video urls')
description_node = itemdoc.find('description')
if description_node is not None:
@@ -72,9 +77,22 @@ class MTVServicesInfoExtractor(InfoExtractor):
else:
description = None
title_el = None
if title_el is None:
title_el = find_xpath_attr(
itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:video_title')
if title_el is None:
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
if title_el is None:
title_el = itemdoc.find('.//title')
title = title_el.text
if title is None:
raise ExtractorError('Could not find video title')
return {
'title': itemdoc.find('title').text,
'formats': self._extract_video_formats(mediagen_page),
'title': title,
'formats': self._extract_video_formats(mediagen_doc),
'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description,
@@ -86,9 +104,20 @@ class MTVServicesInfoExtractor(InfoExtractor):
idoc = self._download_xml(
self._FEED_URL + '?' + data, video_id,
u'Downloading info', transform_source=fix_xml_ampersands)
'Downloading info', transform_source=fix_xml_ampersands)
return [self._get_video_info(item) for item in idoc.findall('.//item')]
def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
try:
# the url is in the format http://media.mtvnservices.com/fb/{mgid}.swf
fb_url = self._og_search_video_url(webpage)
mgid = url_basename(fb_url).rpartition('.')[0]
except RegexNotFoundError:
mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
return self._get_videos_info(mgid)
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)^https?://
@@ -99,25 +128,25 @@ class MTVIE(MTVServicesInfoExtractor):
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
'file': '853555.mp4',
'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
'info_dict': {
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
'add_ie': ['Vevo'],
'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
'file': 'USCJY1331283.mp4',
'md5': '73b4e7fcadd88929292fe52c3ced8caf',
'info_dict': {
'title': 'Everything Has Changed',
'upload_date': '20130606',
'uploader': 'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
'skip': 'VEVO is only available in some countries',
},
]
@@ -136,8 +165,22 @@ class MTVIE(MTVServicesInfoExtractor):
webpage, re.DOTALL)
if m_vevo:
vevo_id = m_vevo.group(1);
self.to_screen(u'Vevo video detected: %s' % vevo_id)
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
return self._get_videos_info(uri)
class MTVIggyIE(MTVServicesInfoExtractor):
IE_NAME = 'mtviggy.com'
_VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
_TEST = {
'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
'info_dict': {
'id': '984696',
'ext': 'mp4',
'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet',
}
}
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'

View File

@@ -1,37 +1,44 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class RingTVIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/videos/video/([^/]+)'
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
_TEST = {
u"url": u"http://ringtv.craveonline.com/videos/video/746619-canelo-alvarez-talks-about-mayweather-showdown",
u"file": u"746619.mp4",
u"md5": u"7c46b4057d22de32e0a539f017e64ad3",
u"info_dict": {
u"title": u"Canelo Alvarez talks about Mayweather showdown",
u"description": u"Saul \\\"Canelo\\\" Alvarez spoke to the media about his Sept. 14 showdown with Floyd Mayweather after their kick-off presser in NYC. Canelo is motivated and confident that he will have the speed and gameplan to beat the pound-for-pound king."
"url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
"file": "857645.mp4",
"md5": "d25945f5df41cdca2d2587165ac28720",
"info_dict": {
"title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
"description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1).split('-')[0]
video_id = mobj.group('id').split('-')[0]
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' | RingTV','')
description = self._search_regex(r'<div class="blurb">(.+?)</div>',
webpage, 'Description')
final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" %(str(video_id))
thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" %(str(video_id))
ext = final_url.split('.')[-1]
return [{
'id' : video_id,
'url' : final_url,
'ext' : ext,
'title' : title,
'thumbnail' : thumbnail_url,
'description' : description,
}]
if mobj.group('type') == 'news':
video_id = self._search_regex(
r'''(?x)<iframe[^>]+src="http://cms\.springboardplatform\.com/
embed_iframe/[0-9]+/video/([0-9]+)/''',
webpage, 'real video ID')
title = self._og_search_title(webpage)
description = self._html_search_regex(
r'addthis:description="([^"]+)"',
webpage, 'description', fatal=False)
final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" % video_id
thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" % video_id
return {
'id': video_id,
'url': final_url,
'title': title,
'thumbnail': thumbnail_url,
'description': description,
}

View File

@@ -36,7 +36,8 @@ class ServingSysIE(InfoExtractor):
}],
'params': {
'playlistend': 2,
}
},
'skip': 'Blocked in the US [sic]',
}
def _real_extract(self, url):

View File

@@ -0,0 +1,19 @@
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
class SpikeIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
_TEST = {
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
'md5': '1a9265f32b0c375793d6c4ce45255256',
'info_dict': {
'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
'ext': 'mp4',
'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?',
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
},
}
_FEED_URL = 'http://www.spike.com/feeds/mrss/'

View File

@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import re
import base64
from .common import InfoExtractor
@@ -7,12 +8,12 @@ from .common import InfoExtractor
class WimpIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
_TEST = {
u'url': u'http://www.wimp.com/deerfence/',
u'file': u'deerfence.flv',
u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',
u'info_dict': {
u"title": u"Watch Till End: Herd of deer jump over a fence.",
u"description": u"These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
'url': 'http://www.wimp.com/deerfence/',
'file': 'deerfence.flv',
'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
'info_dict': {
"title": "Watch Till End: Herd of deer jump over a fence.",
"description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
}
}
@@ -20,13 +21,12 @@ class WimpIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
googleString = base64.b64decode(googleString).decode('ascii')
final_url = self._search_regex('","(.*?)"', googleString, u'final video url')
video_url = self._search_regex(
r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
return {
'id': video_id,
'url': final_url,
'url': video_url,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),

View File

@@ -224,7 +224,7 @@ if sys.version_info >= (2,7):
def find_xpath_attr(node, xpath, key, val):
""" Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z]+$', key)
assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
expr = xpath + u"[@%s='%s']" % (key, val)
return node.find(expr)
else:

View File

@@ -1,2 +1,2 @@
__version__ = '2014.01.21.1'
__version__ = '2014.01.22.1'