Compare commits

..

18 Commits

Author SHA1 Message Date
Philipp Hagemeister
e2548b5b25 release 2014.11.15.1 2014-11-15 15:21:50 +01:00
Philipp Hagemeister
bbefcf04bf [goldenmoustache] Fix title (Closes #4203) 2014-11-15 15:21:34 +01:00
Philipp Hagemeister
c7b0add86f [compat] Work around kwargs bugs in old 2.6 Python releases (Fixes #3813) 2014-11-15 15:17:19 +01:00
Philipp Hagemeister
a0155d93d9 release 2014.11.15 2014-11-15 11:01:54 +01:00
Philipp Hagemeister
00d9ef0b70 [mailru] Adapt to new data format (Fixes #4201) 2014-11-15 11:01:17 +01:00
Philipp Hagemeister
0cc8888038 [crunchyroll] Remove NOP code (#2782) 2014-11-15 00:34:03 +01:00
Philipp Hagemeister
c735450e07 release 2014.11.14 2014-11-14 22:27:56 +01:00
Jaime Marquínez Ferrándiz
71f8c7ce7a [mtvservices:embedded] Improve config url (fixes #4092) 2014-11-14 19:02:18 +01:00
Jaime Marquínez Ferrándiz
5fee0eeac0 [ComedyCentralShows] Use the rtmp urls transform function from the MTV IE (fixes #3364)
It produces the right mp4 urls, so we stop prefering the rtmp urls.
2014-11-14 18:36:04 +01:00
Philipp Hagemeister
eb4157fd17 [utils] Fix struct.pack call on very old Python versions (#4181) 2014-11-14 00:39:32 +01:00
Philipp Hagemeister
69ede8ef81 release 2014.11.13.3 2014-11-13 16:28:24 +01:00
Philipp Hagemeister
609a61e3e6 [npo] Improve npo.nl (Fixes #4173) 2014-11-13 16:28:05 +01:00
Philipp Hagemeister
bf951c5e29 release 2014.11.13.2 2014-11-13 16:12:54 +01:00
Philipp Hagemeister
af63fed7d8 [generic] Add support for livestream embeds (Fixes #4185) 2014-11-13 16:12:51 +01:00
Philipp Hagemeister
68d1d41c03 Credit @yaccz for freevideo (#4131) 2014-11-13 15:59:48 +01:00
Philipp Hagemeister
3deed1e91a [freevideo] Simplify and raise error for foreigners (Fixes #4131) 2014-11-13 15:59:22 +01:00
Philipp Hagemeister
11b28e93d3 Merge remote-tracking branch 'yaccz/add-extractor/freevideo' 2014-11-13 15:53:16 +01:00
yac
3898c8a7b2 [FreeVideo] Add new extractor 2014-11-08 00:13:28 +01:00
16 changed files with 115 additions and 38 deletions

View File

@@ -81,3 +81,4 @@ winwon
Xavier Beynon Xavier Beynon
Gabriel Schubiner Gabriel Schubiner
xantares xantares
Jan Matějka

View File

@@ -284,6 +284,10 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}]) self.assertEqual(d, [{"id": "532cb", "x": 3}])
stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc')
d = json.loads(stripped)
self.assertEqual(d, {'STATUS': 'OK'})
def test_uppercase_escape(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '') self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')

View File

@@ -288,6 +288,14 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
else: else:
compat_getpass = getpass.getpass compat_getpass = getpass.getpass
# Old 2.6 and 2.7 releases require kwargs to be bytes
try:
(lambda x: x)(**{'x': 0})
except TypeError:
def compat_kwargs(kwargs):
return dict((bytes(k), v) for k, v in kwargs.items())
else:
compat_kwargs = lambda kwargs: kwargs
__all__ = [ __all__ = [
'compat_HTTPError', 'compat_HTTPError',
@@ -299,6 +307,7 @@ __all__ = [
'compat_html_entities', 'compat_html_entities',
'compat_html_parser', 'compat_html_parser',
'compat_http_client', 'compat_http_client',
'compat_kwargs',
'compat_ord', 'compat_ord',
'compat_parse_qs', 'compat_parse_qs',
'compat_print', 'compat_print',

View File

@@ -127,6 +127,7 @@ from .francetv import (
) )
from .freesound import FreesoundIE from .freesound import FreesoundIE
from .freespeech import FreespeechIE from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gamekings import GamekingsIE from .gamekings import GamekingsIE
from .gameone import ( from .gameone import (

View File

@@ -31,7 +31,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
} }
class ComedyCentralShowsIE(InfoExtractor): class ComedyCentralShowsIE(MTVServicesInfoExtractor):
IE_DESC = 'The Daily Show / The Colbert Report' IE_DESC = 'The Daily Show / The Colbert Report'
# urls can be abbreviations like :thedailyshow or :colbert # urls can be abbreviations like :thedailyshow or :colbert
# urls for episodes like: # urls for episodes like:
@@ -109,14 +109,6 @@ class ComedyCentralShowsIE(InfoExtractor):
'400': (384, 216), '400': (384, 216),
} }
@staticmethod
def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
if not m:
raise ExtractorError('Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid')
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None: if mobj is None:
@@ -212,9 +204,6 @@ class ComedyCentralShowsIE(InfoExtractor):
'ext': self._video_extensions.get(format, 'mp4'), 'ext': self._video_extensions.get(format, 'mp4'),
'height': h, 'height': h,
'width': w, 'width': w,
'format_note': 'HTTP 400 at the moment (patches welcome!)',
'preference': -100,
}) })
formats.append({ formats.append({
'format_id': 'rtmp-%s' % format, 'format_id': 'rtmp-%s' % format,

View File

@@ -264,8 +264,6 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
if not lang_code: if not lang_code:
continue continue
sub_root = xml.etree.ElementTree.fromstring(subtitle) sub_root = xml.etree.ElementTree.fromstring(subtitle)
if not sub_root:
subtitles[lang_code] = ''
if sub_format == 'ass': if sub_format == 'ass':
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root) subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
else: else:

View File

@@ -0,0 +1,38 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class FreeVideoIE(InfoExtractor):
_VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
_TEST = {
'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
'info_dict': {
'id': 'vysukany-zadecek-22033',
'ext': 'mp4',
"title": "vysukany-zadecek-22033",
"age_limit": 18,
},
'skip': 'Blocked outside .cz',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, handle = self._download_webpage_handle(url, video_id)
if '//www.czechav.com/' in handle.geturl():
raise ExtractorError(
'Access to freevideo is blocked from your location',
expected=True)
video_url = self._search_regex(
r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*?/video.mp4)"',
webpage, 'video URL')
return {
'id': video_id,
'url': video_url,
'title': video_id,
'age_limit': 18,
}

View File

@@ -434,7 +434,17 @@ class GenericIE(InfoExtractor):
'title': 'Chet Chat 171 - Oct 29, 2014', 'title': 'Chet Chat 171 - Oct 29, 2014',
'upload_date': '20141029', 'upload_date': '20141029',
} }
} },
# Livestream embed
{
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
'info_dict': {
'id': '67864563',
'ext': 'flv',
'upload_date': '20141112',
'title': 'Rosetta #CometLanding webcast HL 10',
}
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@@ -916,6 +926,12 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream')
def check_video(vurl): def check_video(vurl):
vpath = compat_urlparse.urlparse(vurl).path vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath) vext = determine_ext(vpath)

View File

@@ -10,7 +10,7 @@ from ..utils import (
class GoldenMoustacheIE(InfoExtractor): class GoldenMoustacheIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/', 'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/',
'md5': '0f904432fa07da5054d6c8beb5efb51a', 'md5': '0f904432fa07da5054d6c8beb5efb51a',
'info_dict': { 'info_dict': {
@@ -21,7 +21,18 @@ class GoldenMoustacheIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int, 'view_count': int,
} }
} }, {
'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
'md5': '27f0c50fb4dd5f01dc9082fc67cd5700',
'info_dict': {
'id': '55249',
'ext': 'mp4',
'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
'view_count': int,
}
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@@ -30,7 +41,7 @@ class GoldenMoustacheIE(InfoExtractor):
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL') r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex( title = self._html_search_regex(
r'<title>(.*?) - Golden Moustache</title>', webpage, 'title') r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(

View File

@@ -18,7 +18,7 @@ from ..utils import (
class LivestreamIE(InfoExtractor): class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream' IE_NAME = 'livestream'
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$' _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b', 'md5': '53274c76ba7754fb0e8d072716f2292b',
@@ -37,6 +37,9 @@ class LivestreamIE(InfoExtractor):
'title': 'TEDCity2.0 (English)', 'title': 'TEDCity2.0 (English)',
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
}, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True,
}] }]
def _parse_smil(self, video_id, smil_url): def _parse_smil(self, video_id, smil_url):

View File

@@ -16,7 +16,7 @@ class MailRuIE(InfoExtractor):
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
'md5': 'dea205f03120046894db4ebb6159879a', 'md5': 'dea205f03120046894db4ebb6159879a',
'info_dict': { 'info_dict': {
'id': '46301138', 'id': '46301138_76',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
'timestamp': 1393232740, 'timestamp': 1393232740,
@@ -30,7 +30,7 @@ class MailRuIE(InfoExtractor):
'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html', 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
'md5': '00a91a58c3402204dcced523777b475f', 'md5': '00a91a58c3402204dcced523777b475f',
'info_dict': { 'info_dict': {
'id': '46843144', 'id': '46843144_1263',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
'timestamp': 1397217632, 'timestamp': 1397217632,
@@ -54,33 +54,36 @@ class MailRuIE(InfoExtractor):
author = video_data['author'] author = video_data['author']
uploader = author['name'] uploader = author['name']
uploader_id = author['id'] uploader_id = author.get('id') or author.get('email')
view_count = video_data.get('views_count')
movie = video_data['movie'] meta_data = video_data['meta']
content_id = str(movie['contentId']) content_id = '%s_%s' % (
title = movie['title'] meta_data.get('accId', ''), meta_data['itemId'])
title = meta_data['title']
if title.endswith('.mp4'): if title.endswith('.mp4'):
title = title[:-4] title = title[:-4]
thumbnail = movie['poster'] thumbnail = meta_data['poster']
duration = movie['duration'] duration = meta_data['duration']
timestamp = meta_data['timestamp']
view_count = video_data['views_count']
formats = [ formats = [
{ {
'url': video['url'], 'url': video['url'],
'format_id': video['name'], 'format_id': video['key'],
'height': int(video['key'].rstrip('p'))
} for video in video_data['videos'] } for video in video_data['videos']
] ]
self._sort_formats(formats)
return { return {
'id': content_id, 'id': content_id,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'timestamp': video_data['timestamp'], 'timestamp': timestamp,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'formats': formats, 'formats': formats,
} }

View File

@@ -186,7 +186,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
def _get_feed_url(self, uri): def _get_feed_url(self, uri):
video_id = self._id_from_uri(uri) video_id = self._id_from_uri(uri)
site_id = uri.replace(video_id, '') site_id = uri.replace(video_id, '')
config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id) config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
'context4/context5/config.xml'.format(site_id))
config_doc = self._download_xml(config_url, video_id) config_doc = self._download_xml(config_url, video_id)
feed_node = config_doc.find('.//feed') feed_node = config_doc.find('.//feed')
feed_url = feed_node.text.strip().split('?')[0] feed_url = feed_node.text.strip().split('?')[0]

View File

@@ -7,6 +7,7 @@ from ..utils import (
unified_strdate, unified_strdate,
parse_duration, parse_duration,
qualities, qualities,
strip_jsonp,
url_basename, url_basename,
) )
@@ -63,7 +64,7 @@ class NPOIE(InfoExtractor):
'http://e.omroep.nl/metadata/aflevering/%s' % video_id, 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id, video_id,
# We have to remove the javascript callback # We have to remove the javascript callback
transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j) transform_source=strip_jsonp,
) )
token_page = self._download_webpage( token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js', 'http://ida.omroep.nl/npoplayer/i.js',

View File

@@ -8,6 +8,7 @@ import sys
from .compat import ( from .compat import (
compat_expanduser, compat_expanduser,
compat_getenv, compat_getenv,
compat_kwargs,
) )
from .utils import ( from .utils import (
get_term_width, get_term_width,
@@ -112,7 +113,7 @@ def parseOpts(overrideArguments=None):
'conflict_handler': 'resolve', 'conflict_handler': 'resolve',
} }
parser = optparse.OptionParser(**kw) parser = optparse.OptionParser(**compat_kwargs(kw))
general = optparse.OptionGroup(parser, 'General Options') general = optparse.OptionGroup(parser, 'General Options')
general.add_option( general.add_option(

View File

@@ -843,7 +843,7 @@ def bytes_to_intlist(bs):
def intlist_to_bytes(xs): def intlist_to_bytes(xs):
if not xs: if not xs:
return b'' return b''
return struct.pack('%dB' % len(xs), *xs) return struct_pack('%dB' % len(xs), *xs)
# Cross-platform file locking # Cross-platform file locking
@@ -1331,7 +1331,8 @@ def parse_age_limit(s):
def strip_jsonp(code): def strip_jsonp(code):
return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code) return re.sub(
r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
def js_to_json(code): def js_to_json(code):

View File

@@ -1,2 +1,2 @@
__version__ = '2014.11.13.1' __version__ = '2014.11.15.1'