More unicode literals

This commit is contained in:
Philipp Hagemeister 2014-01-07 10:04:48 +01:00
parent 2537186d43
commit 3798eadccd
8 changed files with 137 additions and 121 deletions

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -5,7 +6,7 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor): class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)' _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
IE_NAME = u'AcademicEarth:Course' IE_NAME = 'AcademicEarth:Course'
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
import json import json
@ -11,46 +13,46 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor): class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = { _TEST = {
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
u"playlist": [ "playlist": [
{ {
u"file": u"manofsteel-trailer4.mov", "file": "manofsteel-trailer4.mov",
u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
u"info_dict": { "info_dict": {
u"duration": 111, "duration": 111,
u"title": u"Trailer 4", "title": "Trailer 4",
u"upload_date": u"20130523", "upload_date": "20130523",
u"uploader_id": u"wb", "uploader_id": "wb",
}, },
}, },
{ {
u"file": u"manofsteel-trailer3.mov", "file": "manofsteel-trailer3.mov",
u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", "md5": "b8017b7131b721fb4e8d6f49e1df908c",
u"info_dict": { "info_dict": {
u"duration": 182, "duration": 182,
u"title": u"Trailer 3", "title": "Trailer 3",
u"upload_date": u"20130417", "upload_date": "20130417",
u"uploader_id": u"wb", "uploader_id": "wb",
}, },
}, },
{ {
u"file": u"manofsteel-trailer.mov", "file": "manofsteel-trailer.mov",
u"md5": u"d0f1e1150989b9924679b441f3404d48", "md5": "d0f1e1150989b9924679b441f3404d48",
u"info_dict": { "info_dict": {
u"duration": 148, "duration": 148,
u"title": u"Trailer", "title": "Trailer",
u"upload_date": u"20121212", "upload_date": "20121212",
u"uploader_id": u"wb", "uploader_id": "wb",
}, },
}, },
{ {
u"file": u"manofsteel-teaser.mov", "file": "manofsteel-teaser.mov",
u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", "md5": "5fe08795b943eb2e757fa95cb6def1cb",
u"info_dict": { "info_dict": {
u"duration": 93, "duration": 93,
u"title": u"Teaser", "title": "Teaser",
u"upload_date": u"20120721", "upload_date": "20120721",
u"uploader_id": u"wb", "uploader_id": "wb",
}, },
} }
] ]

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import json import json
import re import re
@ -13,14 +15,14 @@ class ArchiveOrgIE(InfoExtractor):
IE_DESC = 'archive.org videos' IE_DESC = 'archive.org videos'
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
_TEST = { _TEST = {
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
u'md5': u'8af1d4cf447933ed3c7f4871162602db', 'md5': '8af1d4cf447933ed3c7f4871162602db',
u'info_dict': { 'info_dict': {
u"title": u"1968 Demo - FJCC Conference Presentation Reel #1", "title": "1968 Demo - FJCC Conference Presentation Reel #1",
u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>", "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
u"upload_date": u"19681210", "upload_date": "19681210",
u"uploader": u"SRI International" "uploader": "SRI International"
} }
} }
@ -29,7 +31,7 @@ class ArchiveOrgIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
json_url = url + (u'?' if u'?' in url else '&') + u'output=json' json_url = url + ('?' if '?' in url else '&') + 'output=json'
json_data = self._download_webpage(json_url, video_id) json_data = self._download_webpage(json_url, video_id)
data = json.loads(json_data) data = json.loads(json_data)

View File

@ -1,4 +1,6 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
import re import re
import json import json
@ -22,7 +24,7 @@ class ArteTvIE(InfoExtractor):
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$' _LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv' IE_NAME = 'arte.tv'
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@ -37,7 +39,7 @@ class ArteTvIE(InfoExtractor):
# r'src="(.*?/videothek_js.*?\.js)', # r'src="(.*?/videothek_js.*?\.js)',
# 0, # 0,
# [ # [
# (1, 'url', u'Invalid URL: %s' % url) # (1, 'url', 'Invalid URL: %s' % url)
# ] # ]
# ) # )
# http_host = url.split('/')[2] # http_host = url.split('/')[2]
@ -49,12 +51,12 @@ class ArteTvIE(InfoExtractor):
# '(rtmp://.*?)\'', # '(rtmp://.*?)\'',
# re.DOTALL, # re.DOTALL,
# [ # [
# (1, 'path', u'could not extract video path: %s' % url), # (1, 'path', 'could not extract video path: %s' % url),
# (2, 'player', u'could not extract video player: %s' % url), # (2, 'player', 'could not extract video player: %s' % url),
# (3, 'url', u'could not extract video url: %s' % url) # (3, 'url', 'could not extract video url: %s' % url)
# ] # ]
# ) # )
# video_url = u'%s/%s' % (info.get('url'), info.get('path')) # video_url = '%s/%s' % (info.get('url'), info.get('path'))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VIDEOS_URL, url) mobj = re.match(self._VIDEOS_URL, url)
@ -107,9 +109,9 @@ class ArteTvIE(InfoExtractor):
def _extract_liveweb(self, url, name, lang): def _extract_liveweb(self, url, name, lang):
"""Extract form http://liveweb.arte.tv/""" """Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information') video_id, 'Downloading information')
event_doc = config_doc.find('event') event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd') url_node = event_doc.find('video').find('urlHd')
if url_node is None: if url_node is None:
@ -124,7 +126,7 @@ class ArteTvIE(InfoExtractor):
class ArteTVPlus7IE(InfoExtractor): class ArteTVPlus7IE(InfoExtractor):
IE_NAME = u'arte.tv:+7' IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
@classmethod @classmethod
@ -207,7 +209,7 @@ class ArteTVPlus7IE(InfoExtractor):
if bitrate is not None: if bitrate is not None:
quality += '-%d' % bitrate quality += '-%d' % bitrate
if format_info.get('versionCode') is not None: if format_info.get('versionCode') is not None:
format_id = u'%s-%s' % (quality, format_info['versionCode']) format_id = '%s-%s' % (quality, format_info['versionCode'])
else: else:
format_id = quality format_id = quality
info = { info = {
@ -216,7 +218,7 @@ class ArteTVPlus7IE(InfoExtractor):
'width': format_info.get('width'), 'width': format_info.get('width'),
'height': height, 'height': height,
} }
if format_info['mediaType'] == u'rtmp': if format_info['mediaType'] == 'rtmp':
info['url'] = format_info['streamer'] info['url'] = format_info['streamer']
info['play_path'] = 'mp4:' + format_info['url'] info['play_path'] = 'mp4:' + format_info['url']
info['ext'] = 'flv' info['ext'] = 'flv'
@ -231,27 +233,27 @@ class ArteTVPlus7IE(InfoExtractor):
# It also uses the arte_vp_url url from the webpage to extract the information # It also uses the arte_vp_url url from the webpage to extract the information
class ArteTVCreativeIE(ArteTVPlus7IE): class ArteTVCreativeIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:creative' IE_NAME = 'arte.tv:creative'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)' _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
_TEST = { _TEST = {
u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
u'file': u'050489-002.mp4', 'file': '050489-002.mp4',
u'info_dict': { 'info_dict': {
u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design', 'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
}, },
} }
class ArteTVFutureIE(ArteTVPlus7IE): class ArteTVFutureIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:future' IE_NAME = 'arte.tv:future'
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)' _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)'
_TEST = { _TEST = {
u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
u'file': u'050940-003.mp4', 'file': '050940-003.mp4',
u'info_dict': { 'info_dict': {
u'title': u'Les champignons au secours de la planète', 'title': 'Les champignons au secours de la planète',
}, },
} }
@ -263,7 +265,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
class ArteTVDDCIE(ArteTVPlus7IE): class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:ddc' IE_NAME = 'arte.tv:ddc'
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)' _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -7,13 +9,14 @@ from ..utils import (
ExtractorError, ExtractorError,
) )
class AUEngineIE(InfoExtractor): class AUEngineIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', 'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
u'file': u'lfvlytY6.mp4', 'file': 'lfvlytY6.mp4',
u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f', 'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
u'info_dict': { 'info_dict': {
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]" 'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
} }
} }
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?' _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
@ -23,7 +26,7 @@ class AUEngineIE(InfoExtractor):
video_id = mobj.group(1) video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
webpage, u'title') webpage, 'title')
title = title.strip() title = title.strip()
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
links = map(compat_urllib_parse.unquote, links) links = map(compat_urllib_parse.unquote, links)
@ -37,7 +40,7 @@ class AUEngineIE(InfoExtractor):
video_url = link video_url = link
if not video_url: if not video_url:
raise ExtractorError(u'Could not find video URL') raise ExtractorError(u'Could not find video URL')
ext = u'.' + determine_ext(video_url) ext = '.' + determine_ext(video_url)
if ext == title[-len(ext):]: if ext == title[-len(ext):]:
title = title[:-len(ext)] title = title[:-len(ext)]

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
import json import json
import itertools import itertools
@ -9,26 +11,26 @@ from ..utils import (
class BambuserIE(InfoExtractor): class BambuserIE(InfoExtractor):
IE_NAME = u'bambuser' IE_NAME = 'bambuser'
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)' _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
_API_KEY = '005f64509e19a868399060af746a00aa' _API_KEY = '005f64509e19a868399060af746a00aa'
_TEST = { _TEST = {
u'url': u'http://bambuser.com/v/4050584', 'url': 'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
#u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
u'info_dict': { 'info_dict': {
u'id': u'4050584', 'id': '4050584',
u'ext': u'flv', 'ext': 'flv',
u'title': u'Education engineering days - lightning talks', 'title': 'Education engineering days - lightning talks',
u'duration': 3741, 'duration': 3741,
u'uploader': u'pixelversity', 'uploader': 'pixelversity',
u'uploader_id': u'344706', 'uploader_id': '344706',
}, },
u'params': { 'params': {
# It doesn't respect the 'Range' header, it would download the whole video # It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59 # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
u'skip_download': True, 'skip_download': True,
}, },
} }
@ -53,7 +55,7 @@ class BambuserIE(InfoExtractor):
class BambuserChannelIE(InfoExtractor): class BambuserChannelIE(InfoExtractor):
IE_NAME = u'bambuser:channel' IE_NAME = 'bambuser:channel'
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)' _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request # The maximum number we can get with each request
_STEP = 50 _STEP = 50
@ -72,7 +74,7 @@ class BambuserChannelIE(InfoExtractor):
# Without setting this header, we wouldn't get any result # Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
info_json = self._download_webpage(req, user, info_json = self._download_webpage(req, user,
u'Downloading page %d' % i) 'Downloading page %d' % i)
results = json.loads(info_json)['result'] results = json.loads(info_json)['result']
if len(results) == 0: if len(results) == 0:
break break

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import json import json
import re import re
@ -12,14 +14,14 @@ from ..utils import (
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
_TESTS = [{ _TESTS = [{
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
u'file': u'1812978515.mp3', 'file': '1812978515.mp3',
u'md5': u'c557841d5e50261777a6585648adf439', 'md5': 'c557841d5e50261777a6585648adf439',
u'info_dict': { 'info_dict': {
u"title": u"youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", "title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
u"duration": 10, "duration": 10,
}, },
u'skip': u'There is a limit of 200 free downloads / month for the test song' '_skip': 'There is a limit of 200 free downloads / month for the test song'
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -58,7 +60,7 @@ class BandcampIE(InfoExtractor):
'duration': duration, 'duration': duration,
} }
else: else:
raise ExtractorError(u'No free songs found') raise ExtractorError('No free songs found')
download_link = m_download.group(1) download_link = m_download.group(1)
video_id = re.search( video_id = re.search(
@ -72,9 +74,9 @@ class BandcampIE(InfoExtractor):
download_webpage, re.MULTILINE).group(1) download_webpage, re.MULTILINE).group(1)
info = json.loads(info)[0] info = json.loads(info)[0]
# We pick mp3-320 for now, until format selection can be easily implemented. # We pick mp3-320 for now, until format selection can be easily implemented.
mp3_info = info[u'downloads'][u'mp3-320'] mp3_info = info['downloads']['mp3-320']
# If we try to use this url it says the link has expired # If we try to use this url it says the link has expired
initial_url = mp3_info[u'url'] initial_url = mp3_info['url']
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
m_url = re.match(re_url, initial_url) m_url = re.match(re_url, initial_url)
#We build the url we will use to get the final track url #We build the url we will use to get the final track url
@ -87,41 +89,41 @@ class BandcampIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': info[u'title'], 'title': info['title'],
'ext': 'mp3', 'ext': 'mp3',
'vcodec': 'none', 'vcodec': 'none',
'url': final_url, 'url': final_url,
'thumbnail': info[u'thumb_url'], 'thumbnail': info['thumb_url'],
'uploader': info[u'artist'], 'uploader': info['artist'],
} }
class BandcampAlbumIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor):
IE_NAME = u'Bandcamp:album' IE_NAME = 'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
_TEST = { _TEST = {
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
u'playlist': [ 'playlist': [
{ {
u'file': u'1353101989.mp3', 'file': '1353101989.mp3',
u'md5': u'39bc1eded3476e927c724321ddf116cf', 'md5': '39bc1eded3476e927c724321ddf116cf',
u'info_dict': { 'info_dict': {
u'title': u'Intro', 'title': 'Intro',
} }
}, },
{ {
u'file': u'38097443.mp3', 'file': '38097443.mp3',
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa', 'md5': '1a2c32e2691474643e912cc6cd4bffaa',
u'info_dict': { 'info_dict': {
u'title': u'Kero One - Keep It Alive (Blazo remix)', 'title': 'Kero One - Keep It Alive (Blazo remix)',
} }
}, },
], ],
u'params': { 'params': {
u'playlistend': 2 'playlistend': 2
}, },
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' 'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -130,11 +132,11 @@ class BandcampAlbumIE(InfoExtractor):
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths: if not tracks_paths:
raise ExtractorError(u'The page doesn\'t contain any tracks') raise ExtractorError('The page doesn\'t contain any tracks')
entries = [ entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths] for t_path in tracks_paths]
title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title') title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
return { return {
'_type': 'playlist', '_type': 'playlist',
'title': title, 'title': title,

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -12,14 +14,14 @@ class CNNIE(InfoExtractor):
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
_TESTS = [{ _TESTS = [{
u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4', 'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
u'md5': u'3e6121ea48df7e2259fe73a0628605c4', 'md5': '3e6121ea48df7e2259fe73a0628605c4',
u'info_dict': { 'info_dict': {
u'title': u'Nadal wins 8th French Open title', 'title': 'Nadal wins 8th French Open title',
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
u'duration': 135, 'duration': 135,
u'upload_date': u'20130609', 'upload_date': '20130609',
}, },
}, },
{ {
@ -36,7 +38,7 @@ class CNNIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
path = mobj.group('path') path = mobj.group('path')
page_title = mobj.group('title') page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path
info = self._download_xml(info_url, page_title) info = self._download_xml(info_url, page_title)
formats = [] formats = []