Compare commits

...

65 Commits

Author SHA1 Message Date
Philipp Hagemeister
acf2a6e97b release 2015.02.02 2015-02-02 01:49:40 +01:00
Philipp Hagemeister
8cfb6efe6f [jsinterp] Correct div command 2015-02-02 01:49:32 +01:00
Philipp Hagemeister
04edb9caf5 Merge pull request #4838 from raunaqrox/patch-1
supported sites link was not opening from README
2015-02-01 23:32:44 +01:00
Sahebjot singh
044131ba21 supported sites was not opening
required a .md at the end.
2015-02-02 03:49:28 +05:30
Philipp Hagemeister
0a7055c90d release 2015.02.01 2015-02-01 22:42:19 +01:00
Philipp Hagemeister
9e3f19919a [jsinterp] Beef up and add tests
In preparation for #4822, extend jsinterp by a lot. (We may even have to/want to write a proper interpreter with actual parsing)
2015-02-01 22:38:29 +01:00
Sergey M․
4a3da4ebdb [hls] Simplify 2015-02-01 23:54:38 +06:00
Sergey M․
027008b14e [hls] Fix encode issues on python2 @ Windows 2015-02-01 23:49:23 +06:00
Sergey M․
c6df692466 [rutv] Extract all m3u8 formats 2015-02-01 23:48:23 +06:00
Philipp Hagemeister
acf757f42e [youtube] Complete test definition 2015-02-01 15:33:32 +01:00
Philipp Hagemeister
dd8982f19c [wdr] Complete test definition 2015-02-01 15:27:16 +01:00
Philipp Hagemeister
654bd52f58 [teletask] Fix test definition 2015-02-01 15:25:33 +01:00
Philipp Hagemeister
a9551e9020 [soundcloud:set] Fix test definition 2015-02-01 15:24:38 +01:00
Philipp Hagemeister
4e980275b5 [test_download] Remove references to "file" in test definitions 2015-02-01 15:21:18 +01:00
Philipp Hagemeister
c172440ac5 [ringtv] Modernize test definition 2015-02-01 15:21:02 +01:00
Philipp Hagemeister
e332772531 [servingsys] Modernize 2015-02-01 15:18:52 +01:00
Philipp Hagemeister
437cac8cc1 [sina] Modernize and simplify 2015-02-01 15:16:35 +01:00
Philipp Hagemeister
9f281cacd2 [keezmovies] Fix extraction and modernize test 2015-02-01 15:13:44 +01:00
Philipp Hagemeister
748a0fab8a Remove unused imports 2015-02-01 15:08:50 +01:00
Philipp Hagemeister
c1f06d6307 [macgamestore] Modernize 2015-02-01 15:08:33 +01:00
Philipp Hagemeister
c4e817ce4a [france2.fr:generation-quoi] Modernize 2015-02-01 15:06:55 +01:00
Philipp Hagemeister
9a3e5e6955 [kankan] Modernize 2015-02-01 15:03:55 +01:00
Philipp Hagemeister
228d30ed06 [la7] Modernize 2015-02-01 15:03:03 +01:00
Philipp Hagemeister
057c0609fc [toutv] Modernize test definition 2015-02-01 15:01:33 +01:00
Philipp Hagemeister
17d2712d9c [teamcoco] Modernize and fix extraction 2015-02-01 15:00:54 +01:00
Philipp Hagemeister
fc09240e24 [vimeo] Modernize test definition 2015-02-01 12:12:27 +01:00
Philipp Hagemeister
146303136f [nerdcubed] Modernize test definition 2015-02-01 12:11:20 +01:00
Philipp Hagemeister
96aded8d3d [rottentomatoes] Modernize test definition 2015-02-01 12:11:14 +01:00
Philipp Hagemeister
2886be15aa [defense] Modernize 2015-02-01 12:10:15 +01:00
Philipp Hagemeister
ca0f500ecf [mtv] Modernize and clean up test 2015-02-01 12:08:21 +01:00
Philipp Hagemeister
29aef5a33c [ard] Remove deleted video test case 2015-02-01 12:00:47 +01:00
Philipp Hagemeister
9158b2b301 [mpora] Modernize 2015-02-01 11:58:37 +01:00
Philipp Hagemeister
0196149c5b [compat] Correct compat_basestring definition 2015-02-01 11:37:00 +01:00
Philipp Hagemeister
8f9312c387 Appease pyflakes8-3 2015-02-01 11:30:56 +01:00
Sergey M․
439b9a9e9b Merge branch 'kinetoskombi-globo-fix' 2015-02-01 04:36:57 +06:00
Sergey M․
8c72beb25e [globo] Properly extract m3u8 formats (#4346 #4832) 2015-02-01 04:36:24 +06:00
kinetoskombi
1ee94db2d0 [globo] Fix error on some globo videos 2015-01-31 20:07:43 -02:00
Jaime Marquínez Ferrándiz
e41b1f7385 Fix flake8 errors 2015-01-31 10:51:39 +01:00
Jaime Marquínez Ferrándiz
cd596028d6 [rtve] Recognize mobile urls (fixes #4823) 2015-01-30 23:46:55 +01:00
Jaime Marquínez Ferrándiz
cc57bd33a8 [rtve] Fix extraction
Skip live stream test, we can't use the f4m manifest yet
2015-01-30 23:46:55 +01:00
Sergey M․
6d593c3276 [YoutubeDL] Fix video+audio format_id (Closes #4824) 2015-01-31 03:50:11 +06:00
Philipp Hagemeister
91755ee384 [comedycentral:shows] Generate better IDs and add a test for that 2015-01-30 19:43:46 +01:00
Sergey M․
0692ef86ef [bbccouk] Improve _VALID_URL 2015-01-30 23:47:09 +06:00
Sergey M․
439d9be27d [mixcloud] Remove unused import 2015-01-30 23:21:58 +06:00
Sergey M․
b80505a409 [mixcloud] Fix extraction (Closes #4784) 2015-01-30 23:21:44 +06:00
Sergey M․
e4c17d7274 [nhl:news] Add extractor (Closes #4805) 2015-01-30 23:12:27 +06:00
Sergey M․
2c58674e0e [nhl] Improve _VALID_URL (#4805) 2015-01-30 22:46:53 +06:00
Sergey M․
ef1269fb07 [drtv] Improve _VALID_URL (#4814) 2015-01-30 22:42:11 +06:00
Sergey M․
e525d9a3df [mtv] Extract subtitles (Closes #4811) 2015-01-30 21:57:59 +06:00
Sergey M․
20b4492c71 [spike] Improve _VALID_URL 2015-01-30 21:54:48 +06:00
Sergey M․
dee3f73787 [spike] Modernize 2015-01-30 21:54:14 +06:00
Philipp Hagemeister
d543bdc351 [downloader/f4m] Clarify that we should eventually just implement the DRM scheme (#3000) 2015-01-30 16:06:55 +01:00
Philipp Hagemeister
c7ff0c6422 Merge remote-tracking branch 'rzhxeo/f4m-drm' 2015-01-30 16:00:47 +01:00
Philipp Hagemeister
01c46659c4 [washingtonpost] Catch more UUIDs 2015-01-30 15:53:58 +01:00
Philipp Hagemeister
b04b885271 [extractor/common] Document all protocol values 2015-01-30 15:53:16 +01:00
Philipp Hagemeister
dc35bfd2d5 [test/helper] Clarify which keys have to be added 2015-01-30 15:52:56 +01:00
Philipp Hagemeister
70fca8d694 [youtube] Remove unused import 2015-01-30 10:59:19 +01:00
Philipp Hagemeister
a52c633536 [cinchcast] Wrap overly long lines (#4820) 2015-01-30 10:59:07 +01:00
Philipp Hagemeister
7b6c60393e Merge remote-tracking branch 'codesparkle/master' 2015-01-30 10:56:53 +01:00
Philipp Hagemeister
83e7a314b4 dedup AUTHORS 2015-01-30 10:48:39 +01:00
codesparkle
749f2ca044 Smotri info extractor: removed unreachable code and updated old md5 for test video 2015-01-30 20:35:20 +11:00
codesparkle
5468ff4d91 Remove duplicate dictionary keys 2015-01-30 20:11:51 +11:00
codesparkle
1d2daaea63 Simplify list creation 2015-01-30 20:10:12 +11:00
codesparkle
52585fd6dc The opening curly brace { is a regex reserved [control character](http://stackoverflow.com/a/400316/1106367), so it needs to be escaped. 2015-01-30 18:41:40 +11:00
rzhxeo
6ca85be6f8 Filter DRM protected media in f4m downloader 2015-01-26 20:44:48 +01:00
50 changed files with 609 additions and 257 deletions

View File

@@ -107,5 +107,4 @@ Yen Chi Hsuan
Enam Mijbah Noor Enam Mijbah Noor
David Luhmer David Luhmer
Shaya Goldberg Shaya Goldberg
Yen Chi Hsuan
Paul Hartmann Paul Hartmann

View File

@@ -534,7 +534,7 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
### How can I detect whether a given URL is supported by youtube-dl? ### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.

View File

@@ -148,9 +148,15 @@ def expect_info_dict(self, got_dict, expected_dict):
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
else: else:
return repr(v) return repr(v)
info_dict_str = ''.join( info_dict_str = ''
' %s: %s,\n' % (_repr(k), _repr(v)) if len(missing_keys) != len(expected_dict):
for k, v in test_info_dict.items()) info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(v))
for k, v in test_info_dict.items() if k not in missing_keys)
info_dict_str += '\n'
info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
for k in missing_keys)
write_string( write_string(
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
self.assertFalse( self.assertFalse(

View File

@@ -89,7 +89,7 @@ def generator(test_case):
for tc in test_cases: for tc in test_cases:
info_dict = tc.get('info_dict', {}) info_dict = tc.get('info_dict', {})
if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')): if not (info_dict.get('id') and info_dict.get('ext')):
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?') raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
if 'skip' in test_case: if 'skip' in test_case:
@@ -116,7 +116,7 @@ def generator(test_case):
expect_warnings(ydl, test_case.get('expected_warnings', [])) expect_warnings(ydl, test_case.get('expected_warnings', []))
def get_tc_filename(tc): def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) return ydl.prepare_filename(tc.get('info_dict', {}))
res_dict = None res_dict = None

95
test/test_jsinterp.py Normal file
View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.jsinterp import JSInterpreter
class TestJSInterpreter(unittest.TestCase):
def test_basic(self):
jsi = JSInterpreter('function x(){;}')
self.assertEqual(jsi.call_function('x'), None)
jsi = JSInterpreter('function x3(){return 42;}')
self.assertEqual(jsi.call_function('x3'), 42)
def test_calc(self):
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
self.assertEqual(jsi.call_function('x4', 3), 7)
def test_empty_return(self):
jsi = JSInterpreter('function f(){return; y()}')
self.assertEqual(jsi.call_function('f'), None)
def test_morespace(self):
jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
self.assertEqual(jsi.call_function('x', 3), 7)
jsi = JSInterpreter('function f () { x = 2 ; return x; }')
self.assertEqual(jsi.call_function('f'), 2)
def test_strange_chars(self):
jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
self.assertEqual(jsi.call_function('$_xY1', 20), 21)
def test_operators(self):
jsi = JSInterpreter('function f(){return 1 << 5;}')
self.assertEqual(jsi.call_function('f'), 32)
jsi = JSInterpreter('function f(){return 19 & 21;}')
self.assertEqual(jsi.call_function('f'), 17)
jsi = JSInterpreter('function f(){return 11 >> 2;}')
self.assertEqual(jsi.call_function('f'), 2)
def test_array_access(self):
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
def test_parens(self):
jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
self.assertEqual(jsi.call_function('f'), 7)
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
self.assertEqual(jsi.call_function('f'), 9)
def test_assignments(self):
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), 31)
jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), 51)
jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), -11)
def test_comments(self):
jsi = JSInterpreter('''
function x() {
var x = /* 1 + */ 2;
var y = /* 30
* 40 */ 50;
return x + y;
}
''')
self.assertEqual(jsi.call_function('x'), 52)
def test_precedence(self):
jsi = JSInterpreter('''
function x() {
var a = [10, 20, 30, 40, 50];
var b = 6;
a[0]=a[b%a.length];
return a;
}''')
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
if __name__ == '__main__':
unittest.main()

View File

@@ -25,6 +25,7 @@ if os.name == 'nt':
import ctypes import ctypes
from .compat import ( from .compat import (
compat_basestring,
compat_cookiejar, compat_cookiejar,
compat_expanduser, compat_expanduser,
compat_http_client, compat_http_client,
@@ -1074,8 +1075,8 @@ class YoutubeDL(object):
selected_format = { selected_format = {
'requested_formats': formats_info, 'requested_formats': formats_info,
'format': rf, 'format': rf,
'format_id': rf, 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
'ext': formats_info[0]['ext'], formats_info[1].get('format_id')),
'width': formats_info[0].get('width'), 'width': formats_info[0].get('width'),
'height': formats_info[0].get('height'), 'height': formats_info[0].get('height'),
'resolution': formats_info[0].get('resolution'), 'resolution': formats_info[0].get('resolution'),
@@ -1558,7 +1559,7 @@ class YoutubeDL(object):
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
# To work around aforementioned issue we will replace request's original URL with # To work around aforementioned issue we will replace request's original URL with
# percent-encoded one # percent-encoded one
req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) req_is_string = isinstance(req, compat_basestring)
url = req if req_is_string else req.get_full_url() url = req if req_is_string else req.get_full_url()
url_escaped = escape_url(url) url_escaped = escape_url(url)

View File

@@ -114,6 +114,26 @@ except ImportError:
string += pct_sequence.decode(encoding, errors) string += pct_sequence.decode(encoding, errors)
return string return string
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
try:
compat_basestring = basestring # Python 2
except NameError:
compat_basestring = str
try:
compat_chr = unichr # Python 2
except NameError:
compat_chr = chr
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
try: try:
from urllib.parse import parse_qs as compat_parse_qs from urllib.parse import parse_qs as compat_parse_qs
@@ -123,7 +143,7 @@ except ImportError: # Python 2
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'): encoding='utf-8', errors='replace'):
qs, _coerce_result = qs, unicode qs, _coerce_result = qs, compat_str
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = [] r = []
for name_value in pairs: for name_value in pairs:
@@ -162,21 +182,6 @@ except ImportError: # Python 2
parsed_result[name] = [value] parsed_result[name] = [value]
return parsed_result return parsed_result
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
try:
compat_chr = unichr # Python 2
except NameError:
compat_chr = chr
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
try: try:
from shlex import quote as shlex_quote from shlex import quote as shlex_quote
except ImportError: # Python < 3.3 except ImportError: # Python < 3.3
@@ -362,6 +367,7 @@ def workaround_optparse_bug9161():
__all__ = [ __all__ = [
'compat_HTTPError', 'compat_HTTPError',
'compat_basestring',
'compat_chr', 'compat_chr',
'compat_cookiejar', 'compat_cookiejar',
'compat_expanduser', 'compat_expanduser',

View File

@@ -230,6 +230,23 @@ class F4mFD(FileDownloader):
A downloader for f4m manifests or AdobeHDS. A downloader for f4m manifests or AdobeHDS.
""" """
def _get_unencrypted_media(self, doc):
media = doc.findall(_add_ns('media'))
if not media:
self.report_error('No media found')
for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
doc.findall(_add_ns('drmAdditionalHeaderSet'))):
# If id attribute is missing it's valid for all media nodes
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
self.report_error('Missing ID in f4m DRM')
media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
'drmAdditionalHeaderSetId' not in e.attrib,
media))
if not media:
self.report_error('Unsupported DRM')
return media
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
man_url = info_dict['url'] man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr') requested_bitrate = info_dict.get('tbr')
@@ -248,7 +265,8 @@ class F4mFD(FileDownloader):
) )
doc = etree.fromstring(manifest) doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] formats = [(int(f.attrib.get('bitrate', -1)), f)
for f in self._get_unencrypted_media(doc)]
if requested_bitrate is None: if requested_bitrate is None:
# get the best format # get the best format
formats = sorted(formats, key=lambda f: f[0]) formats = sorted(formats, key=lambda f: f[0])

View File

@@ -11,6 +11,7 @@ from ..compat import (
compat_urllib_request, compat_urllib_request,
) )
from ..utils import ( from ..utils import (
encodeArgument,
encodeFilename, encodeFilename,
) )
@@ -21,23 +22,22 @@ class HlsFD(FileDownloader):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
args = [
'-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc',
encodeFilename(tmpfilename, for_subprocess=True)]
ffpp = FFmpegPostProcessor(downloader=self) ffpp = FFmpegPostProcessor(downloader=self)
program = ffpp._executable program = ffpp._executable
if program is None: if program is None:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False return False
ffpp.check_version() ffpp.check_version()
cmd = [program] + args
retval = subprocess.call(cmd) args = [
encodeArgument(opt)
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
args.append(encodeFilename(tmpfilename, True))
retval = subprocess.call(args)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize)) self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,

View File

@@ -294,7 +294,11 @@ from .nextmedia import (
) )
from .nfb import NFBIE from .nfb import NFBIE
from .nfl import NFLIE from .nfl import NFLIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import (
NHLIE,
NHLNewsIE,
NHLVideocenterIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .noco import NocoIE from .noco import NocoIE

View File

@@ -122,7 +122,6 @@ class AppleTrailersIE(InfoExtractor):
playlist.append({ playlist.append({
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': title,
'formats': formats, 'formats': formats,
'title': title, 'title': title,
'duration': duration, 'duration': duration,

View File

@@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'file': '22429276.mp4', 'only_matching': True,
'md5': '469751912f1de0816a9fc9df8336476c',
'info_dict': {
'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
},
'skip': 'Blocked outside of Germany',
}, { }, {
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916', 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
'info_dict': { 'info_dict': {

View File

@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor): class BBCCoUkIE(SubtitlesInfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
_TESTS = [ _TESTS = [
{ {
@@ -118,6 +118,9 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
}, { }, {
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3', 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
'only_matching': True,
} }
] ]

View File

@@ -28,12 +28,10 @@ class CinchcastIE(InfoExtractor):
item, './{http://developer.longtailvideo.com/trac/}date') item, './{http://developer.longtailvideo.com/trac/}date')
upload_date = unified_strdate(date_str, day_first=False) upload_date = unified_strdate(date_str, day_first=False)
# duration is present but wrong # duration is present but wrong
formats = [] formats = [{
formats.append({
'format_id': 'main', 'format_id': 'main',
'url': item.find( 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'],
'./{http://search.yahoo.com/mrss/}content').attrib['url'], }]
})
backup_url = xpath_text( backup_url = xpath_text(
item, './{http://developer.longtailvideo.com/trac/}backupContent') item, './{http://developer.longtailvideo.com/trac/}backupContent')
if backup_url: if backup_url:

View File

@@ -49,7 +49,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)| )|
(?P<interview> (?P<interview>
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?))) extended-interviews/(?P<interID>[0-9a-z]+)/
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
(?:/[^/?#]?|[?#]|$))))
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
@@ -62,6 +64,38 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
'uploader': 'thedailyshow', 'uploader': 'thedailyshow',
'title': 'thedailyshow kristen-stewart part 1', 'title': 'thedailyshow kristen-stewart part 1',
} }
}, {
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
'info_dict': {
'id': 'sarah-chayes-extended-interview',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'title': 'thedailyshow Sarah Chayes Extended Interview',
},
'playlist': [
{
'info_dict': {
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
'ext': 'mp4',
'upload_date': '20150129',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'uploader': 'thedailyshow',
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
},
},
{
'info_dict': {
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
'ext': 'mp4',
'upload_date': '20150129',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'uploader': 'thedailyshow',
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
},
},
],
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', 'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
'only_matching': True, 'only_matching': True,
@@ -230,6 +264,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': epTitle,
'entries': entries, 'entries': entries,
'title': show_name + ' ' + title, 'title': show_name + ' ' + title,
'description': description, 'description': description,

View File

@@ -89,7 +89,8 @@ class InfoExtractor(object):
* player_url SWF Player URL (used for rtmpdump). * player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual * protocol The protocol that will be used for the actual
download, lower-case. download, lower-case.
"http", "https", "rtsp", "rtmp", "m3u8" or so. "http", "https", "rtsp", "rtmp", "rtmpe",
"m3u8", or "m3u8_native".
* preference Order number of this format. If this field is * preference Order number of this format. If this field is
present and not None, the formats get sorted present and not None, the formats get sorted
by this field, regardless of all other values. by this field, regardless of all other values.

View File

@@ -1,40 +1,38 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor): class DefenseGouvFrIE(InfoExtractor):
IE_NAME = 'defense.gouv.fr' IE_NAME = 'defense.gouv.fr'
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
r'ligthboxvideo/base-de-medias/webtv/(.*)')
_TEST = { _TEST = {
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
'file': '11213.mp4',
'md5': '75bba6124da7e63d2d60b5244ec9430c', 'md5': '75bba6124da7e63d2d60b5244ec9430c',
"info_dict": { 'info_dict': {
"title": "attaque-chimique-syrienne-du-21-aout-2013-1" 'id': '11213',
'ext': 'mp4',
'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
title = re.match(self._VALID_URL, url).group(1) title = self._match_id(url)
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
video_id = self._search_regex( video_id = self._search_regex(
r"flashvars.pvg_id=\"(\d+)\";", r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID') webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id) + video_id)
info = self._download_webpage(json_url, title, info = self._download_json(json_url, title, 'Downloading JSON config')
'Downloading JSON config') video_url = info['renditions'][0]['url']
video_url = json.loads(info)['renditions'][0]['url']
return {'id': video_id, return {
'ext': 'mp4', 'id': video_id,
'url': video_url, 'ext': 'mp4',
'title': title, 'url': video_url,
} 'title': title,
}

View File

@@ -6,7 +6,7 @@ from ..utils import parse_iso8601
class DRTVIE(SubtitlesInfoExtractor): class DRTVIE(SubtitlesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)' _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = { _TEST = {
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',

View File

@@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
class GenerationQuoiIE(InfoExtractor): class GenerationQuoiIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-quoi' IE_NAME = 'france2.fr:generation-quoi'
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
_TEST = { _TEST = {
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
'info_dict': { 'info_dict': {
'id': 'k7FJX8VBcvvLmX4wA5Q',
'ext': 'mp4',
'title': 'Génération Quoi - Garde à Vous', 'title': 'Génération Quoi - Garde à Vous',
'uploader': 'Génération Quoi', 'uploader': 'Génération Quoi',
}, },
@@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor):
# It uses Dailymotion # It uses Dailymotion
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Only available from France',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
name = mobj.group('name') info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) info_json = self._download_webpage(info_url, display_id)
info_json = self._download_webpage(info_url, name)
info = json.loads(info_json) info = json.loads(info_json)
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
ie='Dailymotion') ie='Dailymotion')

View File

@@ -1073,7 +1073,7 @@ class GenericIE(InfoExtractor):
found = filter_video(re.findall(r'''(?xs) found = filter_video(re.findall(r'''(?xs)
flowplayer\("[^"]+",\s* flowplayer\("[^"]+",\s*
\{[^}]+?\}\s*, \{[^}]+?\}\s*,
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s* \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
["']?url["']?\s*:\s*["']([^"']+)["'] ["']?url["']?\s*:\s*["']([^"']+)["']
''', webpage)) ''', webpage))
if not found: if not found:

View File

@@ -70,6 +70,19 @@ class GloboIE(InfoExtractor):
'like_count': int, 'like_count': int,
} }
}, },
{
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
'md5': 'c1defca721ce25b2354e927d3e4b3dec',
'info_dict': {
'id': '3928201',
'ext': 'mp4',
'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
'duration': 1472.906,
'uploader': 'Canal Brasil',
'uploader_id': 705,
'like_count': int,
}
},
] ]
class MD5(): class MD5():
@@ -381,11 +394,16 @@ class GloboIE(InfoExtractor):
signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding) signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5 signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
formats.append({ resource_url = resource['url']
'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'), signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
'format_id': resource_id, if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
'height': resource['height'] formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
}) else:
formats.append({
'url': signed_url,
'format_id': resource_id,
'height': resource.get('height'),
})
self._sort_formats(formats) self._sort_formats(formats)

View File

@@ -83,7 +83,7 @@ class GroovesharkIE(InfoExtractor):
return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None)) return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
def _transform_bootstrap(self, js): def _transform_bootstrap(self, js):
return re.split('(?m)^\s*try\s*{', js)[0] \ return re.split('(?m)^\s*try\s*\{', js)[0] \
.split(' = ', 1)[1].strip().rstrip(';') .split(' = ', 1)[1].strip().rstrip(';')
def _transform_meta(self, js): def _transform_meta(self, js):

View File

@@ -13,17 +13,17 @@ class KankanIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
'file': '48863.flv',
'md5': '29aca1e47ae68fc28804aca89f29507e', 'md5': '29aca1e47ae68fc28804aca89f29507e',
'info_dict': { 'info_dict': {
'id': '48863',
'ext': 'flv',
'title': 'Ready To Go', 'title': 'Ready To Go',
}, },
'skip': 'Only available from China', 'skip': 'Only available from China',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title') title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')

View File

@@ -7,10 +7,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
compat_urllib_parse,
)
from ..aes import (
aes_decrypt_text
) )
@@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)' _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
_TEST = { _TEST = {
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
'file': '1214711.mp4',
'md5': '6e297b7e789329923fcf83abb67c9289', 'md5': '6e297b7e789329923fcf83abb67c9289',
'info_dict': { 'info_dict': {
'id': '1214711',
'ext': 'mp4',
'title': 'Petite Asian Lady Mai Playing In Bathtub', 'title': 'Petite Asian Lady Mai Playing In Bathtub',
'age_limit': 18, 'age_limit': 18,
} }
@@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor):
embedded_url = mobj.group(1) embedded_url = mobj.group(1)
return self.url_result(embedded_url) return self.url_result(embedded_url)
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title') video_title = self._html_search_regex(
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url')) r'<h1 [^>]*>([^<]+)', webpage, 'title')
if 'encrypted=true' in webpage: video_url = self._html_search_regex(
password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password') r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:] extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2] format = path.split('/')[4].split('_')[:2]

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
@@ -20,9 +18,10 @@ class LA7IE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.la7.tv/richplayer/?assetid=50355319', 'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
'file': '50355319.mp4',
'md5': 'ec7d1f0224d20ba293ab56cf2259651f', 'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
'info_dict': { 'info_dict': {
'id': '50355319',
'ext': 'mp4',
'title': 'IL DIVO', 'title': 'IL DIVO',
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci', 'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
'duration': 6254, 'duration': 6254,
@@ -31,9 +30,7 @@ class LA7IE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
doc = self._download_xml(xml_url, video_id) doc = self._download_xml(xml_url, video_id)

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import ExtractorError
@@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
'file': '2450.m4v',
'md5': '8649b8ea684b6666b4c5be736ecddc61', 'md5': '8649b8ea684b6666b4c5be736ecddc61',
'info_dict': { 'info_dict': {
'id': '2450',
'ext': 'm4v',
'title': 'Crow', 'title': 'Crow',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') webpage = self._download_webpage(
url, video_id, 'Downloading trailer page')
webpage = self._download_webpage(url, video_id, 'Downloading trailer page') if '>Missing Media<' in webpage:
raise ExtractorError(
if re.search(r'>Missing Media<', webpage) is not None: 'Trailer %s does not exist' % video_id, expected=True)
raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title') r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')

View File

@@ -9,7 +9,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest, HEADRequest,
int_or_none, str_to_int,
parse_iso8601, parse_iso8601,
) )
@@ -85,15 +85,17 @@ class MixcloudIE(InfoExtractor):
uploader_id = self._search_regex( uploader_id = self._search_regex(
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
like_count = int_or_none(self._search_regex( like_count = str_to_int(self._search_regex(
r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
r'/favorites/?">([0-9]+)<'],
webpage, 'like count', fatal=False)) webpage, 'like count', fatal=False))
view_count = int_or_none(self._search_regex( view_count = str_to_int(self._search_regex(
r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False)) webpage, 'play count', fatal=False))
timestamp = parse_iso8601(self._search_regex( timestamp = parse_iso8601(self._search_regex(
r'<time itemprop="dateCreated" datetime="([^"]+)">', r'<time itemprop="dateCreated" datetime="([^"]+)">',
webpage, 'upload date')) webpage, 'upload date', default=None))
return { return {
'id': track_id, 'id': track_id,

View File

@@ -1,21 +1,19 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
class MporaIE(InfoExtractor): class MporaIE(InfoExtractor):
_VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
IE_NAME = 'MPORA' IE_NAME = 'MPORA'
_TEST = { _TEST = {
'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de', 'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
'file': 'AAdo8okx4wiz.mp4',
'md5': 'a7a228473eedd3be741397cf452932eb', 'md5': 'a7a228473eedd3be741397cf452932eb',
'info_dict': { 'info_dict': {
'id': 'AAdo8okx4wiz',
'ext': 'mp4',
'title': 'Katy Curd - Winter in the Forest', 'title': 'Katy Curd - Winter in the Forest',
'duration': 416, 'duration': 416,
'uploader': 'Peter Newman Media', 'uploader': 'Peter Newman Media',
@@ -23,14 +21,12 @@ class MporaIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_json = self._search_regex( data_json = self._search_regex(
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
data = self._parse_json(data_json, video_id)
data = json.loads(data_json)
uploader = data['info_overlay'].get('username') uploader = data['info_overlay'].get('username')
duration = data['video']['duration'] // 1000 duration = data['video']['duration'] // 1000

View File

@@ -2,10 +2,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
compat_str,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@@ -22,7 +23,7 @@ def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag return '{http://search.yahoo.com/mrss/}%s' % tag
class MTVServicesInfoExtractor(InfoExtractor): class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
_MOBILE_TEMPLATE = None _MOBILE_TEMPLATE = None
@staticmethod @staticmethod
@@ -78,17 +79,42 @@ class MTVServicesInfoExtractor(InfoExtractor):
try: try:
_, _, ext = rendition.attrib['type'].partition('/') _, _, ext = rendition.attrib['type'].partition('/')
rtmp_video_url = rendition.find('./src').text rtmp_video_url = rendition.find('./src').text
formats.append({'ext': ext, if rtmp_video_url.endswith('siteunavail.png'):
'url': self._transform_rtmp_url(rtmp_video_url), continue
'format_id': rendition.get('bitrate'), formats.append({
'width': int(rendition.get('width')), 'ext': ext,
'height': int(rendition.get('height')), 'url': self._transform_rtmp_url(rtmp_video_url),
}) 'format_id': rendition.get('bitrate'),
'width': int(rendition.get('width')),
'height': int(rendition.get('height')),
})
except (KeyError, TypeError): except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.') raise ExtractorError('Invalid rendition field.')
self._sort_formats(formats) self._sort_formats(formats)
return formats return formats
def _extract_subtitles(self, mdoc, mtvn_id):
subtitles = {}
FORMATS = {
'scc': 'cea-608',
'eia-608': 'cea-608',
'xml': 'ttml',
}
subtitles_format = FORMATS.get(
self._downloader.params.get('subtitlesformat'), 'ttml')
for transcript in mdoc.findall('.//transcript'):
if transcript.get('kind') != 'captions':
continue
lang = transcript.get('srclang')
for typographic in transcript.findall('./typographic'):
captions_format = typographic.get('format')
if captions_format == subtitles_format:
subtitles[lang] = compat_str(typographic.get('src'))
break
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(mtvn_id, subtitles)
return self.extract_subtitles(mtvn_id, subtitles)
def _get_video_info(self, itemdoc): def _get_video_info(self, itemdoc):
uri = itemdoc.find('guid').text uri = itemdoc.find('guid').text
video_id = self._id_from_uri(uri) video_id = self._id_from_uri(uri)
@@ -135,6 +161,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return { return {
'title': title, 'title': title,
'formats': self._extract_video_formats(mediagen_doc, mtvn_id), 'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
'id': video_id, 'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc), 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description, 'description': description,
@@ -167,7 +194,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
mgid = self._search_regex( mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, 'mgid') webpage, 'mgid')
return self._get_videos_info(mgid)
videos_info = self._get_videos_info(mgid)
if self._downloader.params.get('listsubtitles', False):
return
return videos_info
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
@@ -212,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
'file': '853555.mp4',
'md5': '850f3f143316b1e71fa56a4edfd6e0f8', 'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
'info_dict': { 'info_dict': {
'id': '853555',
'ext': 'mp4',
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
}, },
}, },
{
'add_ie': ['Vevo'],
'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
'file': 'USCJY1331283.mp4',
'md5': '73b4e7fcadd88929292fe52c3ced8caf',
'info_dict': {
'title': 'Everything Has Changed',
'upload_date': '20130606',
'uploader': 'Taylor Swift',
},
'skip': 'VEVO is only available in some countries',
},
] ]
def _get_thumbnail_url(self, uri, itemdoc): def _get_thumbnail_url(self, uri, itemdoc):
@@ -244,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# Some videos come from Vevo.com # Some videos come from Vevo.com
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', m_vevo = re.search(
webpage, re.DOTALL) r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
if m_vevo: if m_vevo:
vevo_id = m_vevo.group(1) vevo_id = m_vevo.group(1)
self.to_screen('Vevo video detected: %s' % vevo_id) self.to_screen('Vevo video detected: %s' % vevo_id)

View File

@@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json', 'url': 'http://www.nerdcubed.co.uk/feed.json',
'info_dict': { 'info_dict': {
'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed', 'title': 'nerdcubed.co.uk feed',
}, },
'playlist_mincount': 1300, 'playlist_mincount': 1300,

View File

@@ -20,6 +20,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
def _fix_json(json_string): def _fix_json(json_string):
return json_string.replace('\\\'', '\'') return json_string.replace('\\\'', '\'')
def _real_extract_video(self, video_id):
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
data = self._download_json(
json_url, video_id, transform_source=self._fix_json)
return self._extract_video(data[0])
def _extract_video(self, info): def _extract_video(self, info):
video_id = info['id'] video_id = info['id']
self.report_extraction(video_id) self.report_extraction(video_id)
@@ -54,7 +60,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
class NHLIE(NHLBaseInfoExtractor): class NHLIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com' IE_NAME = 'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)' _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@@ -92,15 +98,41 @@ class NHLIE(NHLBaseInfoExtractor):
}, { }, {
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616', 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://video.nhl.com/videocenter/?id=736722',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') return self._real_extract_video(video_id)
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
data = self._download_json(
json_url, video_id, transform_source=self._fix_json) class NHLNewsIE(NHLBaseInfoExtractor):
return self._extract_video(data[0]) IE_NAME = 'nhl.com:news'
IE_DESC = 'NHL news'
_VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_TEST = {
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
'md5': '4b3d1262e177687a3009937bd9ec0be8',
'info_dict': {
'id': '736722',
'ext': 'mp4',
'title': 'Cal Clutterbuck has been fined $2,000',
'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
'duration': 37,
'upload_date': '20150128',
},
}
def _real_extract(self, url):
news_id = self._match_id(url)
webpage = self._download_webpage(url, news_id)
video_id = self._search_regex(
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
webpage, 'video id')
return self._real_extract_video(video_id)
class NHLVideocenterIE(NHLBaseInfoExtractor): class NHLVideocenterIE(NHLBaseInfoExtractor):

View File

@@ -6,12 +6,13 @@ from .common import InfoExtractor
class RingTVIE(InfoExtractor): class RingTVIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
_TEST = { _TEST = {
"url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30", "url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
"file": "857645.mp4",
"md5": "d25945f5df41cdca2d2587165ac28720", "md5": "d25945f5df41cdca2d2587165ac28720",
"info_dict": { "info_dict": {
'id': '857645',
'ext': 'mp4',
"title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV', "title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
"description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.', "description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
} }

View File

@@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE):
_TEST = { _TEST = {
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
'file': '613340.mp4',
'info_dict': { 'info_dict': {
'id': '613340',
'ext': 'mp4',
'title': 'TOY STORY 3', 'title': 'TOY STORY 3',
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
}, },

View File

@@ -57,7 +57,7 @@ def _decrypt_url(png):
class RTVEALaCartaIE(InfoExtractor): class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta' IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta' IE_DESC = 'RTVE a la carta'
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' _VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
@@ -74,7 +74,11 @@ class RTVEALaCartaIE(InfoExtractor):
'id': '1694255', 'id': '1694255',
'ext': 'flv', 'ext': 'flv',
'title': 'TODO', 'title': 'TODO',
} },
'skip': 'The f4m manifest can\'t be used yet',
}, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@@ -86,6 +90,18 @@ class RTVEALaCartaIE(InfoExtractor):
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
png = self._download_webpage(png_url, video_id, 'Downloading url information') png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = _decrypt_url(png) video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
auth_url = video_url.replace(
'resources/', 'auth/resources/'
).replace('.net.rtve', '.multimedia.cdn.rtve')
video_path = self._download_webpage(
auth_url, video_id, 'Getting video url')
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
# the right Content-Length header and the mp4 format
video_url = (
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
)
return { return {
'id': video_id, 'id': video_id,

View File

@@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor):
'vbr': int(quality), 'vbr': int(quality),
} }
elif transport == 'm3u8': elif transport == 'm3u8':
fmt = { formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
'url': url, continue
'ext': 'mp4',
}
else: else:
fmt = { fmt = {
'url': url 'url': url

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?', 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
'info_dict': {
'id': '5349193',
'title': 'AdAPPter_Hyundai_demo',
},
'playlist': [{ 'playlist': [{
'file': '29955898.flv',
'md5': 'baed851342df6846eb8677a60a011a0f', 'md5': 'baed851342df6846eb8677a60a011a0f',
'info_dict': { 'info_dict': {
'id': '29955898',
'ext': 'flv',
'title': 'AdAPPter_Hyundai_demo (1)', 'title': 'AdAPPter_Hyundai_demo (1)',
'duration': 74, 'duration': 74,
'tbr': 1378, 'tbr': 1378,
@@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor):
'height': 400, 'height': 400,
}, },
}, { }, {
'file': '29907998.flv',
'md5': '979b4da2655c4bc2d81aeb915a8c5014', 'md5': '979b4da2655c4bc2d81aeb915a8c5014',
'info_dict': { 'info_dict': {
'id': '29907998',
'ext': 'flv',
'title': 'AdAPPter_Hyundai_demo (2)', 'title': 'AdAPPter_Hyundai_demo (2)',
'duration': 34, 'duration': 34,
'width': 854, 'width': 854,
@@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor):
'params': { 'params': {
'playlistend': 2, 'playlistend': 2,
}, },
'skip': 'Blocked in the US [sic]', '_skip': 'Blocked in the US [sic]',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) pl_id = self._match_id(url)
pl_id = mobj.group('id')
vast_doc = self._download_xml(url, pl_id) vast_doc = self._download_xml(url, pl_id)
title = vast_doc.find('.//AdTitle').text title = vast_doc.find('.//AdTitle').text
media = vast_doc.find('.//MediaFile').text media = vast_doc.find('.//MediaFile').text
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL') info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')

View File

@@ -11,7 +11,7 @@ from ..compat import (
class SinaIE(InfoExtractor): class SinaIE(InfoExtractor):
_VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/ _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
( (
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-)))) (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
| |
@@ -23,9 +23,10 @@ class SinaIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', 'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
'file': '110028898.flv',
'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f', 'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
'info_dict': { 'info_dict': {
'id': '110028898',
'ext': 'flv',
'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员', 'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
} }
}, },
@@ -39,10 +40,6 @@ class SinaIE(InfoExtractor):
}, },
] ]
@classmethod
def suitable(cls, url):
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
def _extract_video(self, video_id): def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id}) data = compat_urllib_parse.urlencode({'vid': video_id})
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
@@ -59,7 +56,7 @@ class SinaIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
if mobj.group('token') is not None: if mobj.group('token') is not None:
# The video id is in the redirected url # The video id is in the redirected url

View File

@@ -108,7 +108,7 @@ class SmotriIE(InfoExtractor):
# swf player # swf player
{ {
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500', 'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
'md5': '4d47034979d9390d14acdf59c4935bc2', 'md5': '31099eeb4bc906712c5f40092045108d',
'info_dict': { 'info_dict': {
'id': 'v9188090500', 'id': 'v9188090500',
'ext': 'mp4', 'ext': 'mp4',
@@ -139,9 +139,6 @@ class SmotriIE(InfoExtractor):
def _search_meta(self, name, html, display_name=None): def _search_meta(self, name, html, display_name=None):
if display_name is None: if display_name is None:
display_name = name display_name = name
return self._html_search_regex(
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
html, display_name, fatal=False)
return self._html_search_meta(name, html, display_name) return self._html_search_meta(name, html, display_name)
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE):
_TESTS = [{ _TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
'info_dict': { 'info_dict': {
'id': '2284613',
'title': 'The Royal Concept EP', 'title': 'The Royal Concept EP',
}, },
'playlist_mincount': 6, 'playlist_mincount': 6,
@@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE):
return { return {
'_type': 'playlist', '_type': 'playlist',
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']], 'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
'id': info['id'], 'id': '%s' % info['id'],
'title': info['title'], 'title': info['title'],
} }

View File

@@ -1,14 +1,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
class SpikeIE(MTVServicesInfoExtractor): class SpikeIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?:// _VALID_URL = r'''(?x)https?://
(www\.spike\.com/(video-clips|episodes)/.+| (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+)) m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
''' '''
_TEST = { _TEST = {
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', 'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
@@ -25,8 +23,7 @@ class SpikeIE(MTVServicesInfoExtractor):
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url) mobile_id = self._match_id(url)
mobile_id = mobj.group('mobile_id') if mobile_id:
if mobile_id is not None:
url = 'http://www.spike.com/video-clips/%s' % mobile_id url = 'http://www.spike.com/video-clips/%s' % mobile_id
return super(SpikeIE, self)._real_extract(url) return super(SpikeIE, self)._real_extract(url)

View File

@@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
'file': '80187.mp4',
'md5': '3f7746aa0dc86de18df7539903d399ea', 'md5': '3f7746aa0dc86de18df7539903d399ea',
'info_dict': { 'info_dict': {
'id': '80187',
'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant', 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
} }
}, { }, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
'info_dict': { 'info_dict': {
'id': '19705',
'ext': 'mp4',
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
"title": "Louis C.K. Interview Pt. 1 11/3/11" "title": "Louis C.K. Interview Pt. 1 11/3/11"
} }
@@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor):
video_id = mobj.group("video_id") video_id = mobj.group("video_id")
if not video_id: if not video_id:
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'data-node-id="(\d+?)"', r'<div\s+class="player".*?data-id="(\d+?)"',
webpage, 'video id') webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id

View File

@@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.tele-task.de/archive/video/html5/26168/', 'url': 'http://www.tele-task.de/archive/video/html5/26168/',
'info_dict': { 'info_dict': {
'id': '26168',
'title': 'Duplicate Detection', 'title': 'Duplicate Detection',
}, },
'playlist': [{ 'playlist': [{
@@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
lecture_id = self._match_id(url) lecture_id = self._match_id(url)
webpage = self._download_webpage(url, lecture_id) webpage = self._download_webpage(url, lecture_id)
title = self._html_search_regex( title = self._html_search_regex(

View File

@@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.tou.tv/30-vies/S04E41', 'url': 'http://www.tou.tv/30-vies/S04E41',
'file': '30-vies_S04E41.mp4',
'info_dict': { 'info_dict': {
'id': '30-vies_S04E41',
'ext': 'mp4',
'title': '30 vies Saison 4 / Épisode 41', 'title': '30 vies Saison 4 / Épisode 41',
'description': 'md5:da363002db82ccbe4dafeb9cab039b09', 'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
'age_limit': 8, 'age_limit': 8,

View File

@@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor):
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)' _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'file': '75524534.mp4',
'md5': 'c507a72f780cacc12b2248bb4006d253', 'md5': 'c507a72f780cacc12b2248bb4006d253',
'info_dict': { 'info_dict': {
'id': '75524534',
'ext': 'mp4',
'title': "DICK HARDWICK 'Comedian'", 'title': "DICK HARDWICK 'Comedian'",
'uploader': 'Richard Hardwick', 'uploader': 'Richard Hardwick',
} }

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@@ -11,9 +12,10 @@ from ..utils import (
class WashingtonPostIE(InfoExtractor): class WashingtonPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
_TEST = { _TESTS = [{
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/', 'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
'info_dict': { 'info_dict': {
'id': 'sinkhole-of-bureaucracy',
'title': 'Sinkhole of bureaucracy', 'title': 'Sinkhole of bureaucracy',
}, },
'playlist': [{ 'playlist': [{
@@ -40,15 +42,38 @@ class WashingtonPostIE(InfoExtractor):
'upload_date': '20140322', 'upload_date': '20140322',
'uploader': 'The Washington Post', 'uploader': 'The Washington Post',
}, },
}],
}, {
'url': 'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/',
'info_dict': {
'id': 'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear',
'title': 'One airline figured out how to make sure its airplanes never disappear',
},
'playlist': [{
'md5': 'a7c1b5634ba5e57a6a82cdffa5b1e0d0',
'info_dict': {
'id': '0e4bb54c-9065-11e4-a66f-0ca5037a597d',
'ext': 'mp4',
'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
'upload_date': '20141230',
'uploader': 'The Washington Post',
'timestamp': 1419974765,
'title': 'Why black boxes dont transmit data in real time',
}
}] }]
} }]
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
uuids = re.findall(r'''(?x)
(?:
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
data-video-uuid=
)"([^"]+)"''', webpage)
entries = [] entries = []
for i, uuid in enumerate(uuids, start=1): for i, uuid in enumerate(uuids, start=1):
vinfo_all = self._download_json( vinfo_all = self._download_json(
@@ -75,10 +100,11 @@ class WashingtonPostIE(InfoExtractor):
'filesize': s.get('fileSize'), 'filesize': s.get('fileSize'),
'url': s.get('url'), 'url': s.get('url'),
'ext': 'mp4', 'ext': 'mp4',
'preference': -100 if s.get('type') == 'smil' else None,
'protocol': { 'protocol': {
'MP4': 'http', 'MP4': 'http',
'F4F': 'f4m', 'F4F': 'f4m',
}.get(s.get('type')) }.get(s.get('type')),
} for s in vinfo.get('streams', [])] } for s in vinfo.get('streams', [])]
source_media_url = vinfo.get('sourceMediaURL') source_media_url = vinfo.get('sourceMediaURL')
if source_media_url: if source_media_url:

View File

@@ -71,6 +71,9 @@ class WDRIE(InfoExtractor):
{ {
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
'playlist_mincount': 146, 'playlist_mincount': 146,
'info_dict': {
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
}
} }
] ]

View File

@@ -28,7 +28,6 @@ from ..utils import (
get_element_by_attribute, get_element_by_attribute,
get_element_by_id, get_element_by_id,
int_or_none, int_or_none,
js_to_json,
OnDemandPagedList, OnDemandPagedList,
orderedSet, orderedSet,
unescapeHTML, unescapeHTML,
@@ -1161,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
'info_dict': { 'info_dict': {
'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
'title': 'YDL_Empty_List', 'title': 'YDL_Empty_List',
}, },
'playlist_count': 0, 'playlist_count': 0,
@@ -1169,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': { 'info_dict': {
'title': '29C3: Not my department', 'title': '29C3: Not my department',
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
}, },
'playlist_count': 95, 'playlist_count': 95,
}, { }, {
@@ -1176,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'PLBB231211A4F62143', 'url': 'PLBB231211A4F62143',
'info_dict': { 'info_dict': {
'title': '[OLD]Team Fortress 2 (Class-based LP)', 'title': '[OLD]Team Fortress 2 (Class-based LP)',
'id': 'PLBB231211A4F62143',
}, },
'playlist_mincount': 26, 'playlist_mincount': 26,
}, { }, {
@@ -1183,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
'info_dict': { 'info_dict': {
'title': 'Uploads from Cauchemar', 'title': 'Uploads from Cauchemar',
'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
}, },
'playlist_mincount': 799, 'playlist_mincount': 799,
}, { }, {
'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
'info_dict': { 'info_dict': {
'title': 'YDL_safe_search', 'title': 'YDL_safe_search',
'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
}, },
'playlist_count': 2, 'playlist_count': 2,
}, { }, {
@@ -1197,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 4, 'playlist_count': 4,
'info_dict': { 'info_dict': {
'title': 'JODA15', 'title': 'JODA15',
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
} }
}, { }, {
'note': 'Embedded SWF player', 'note': 'Embedded SWF player',
@@ -1204,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 4, 'playlist_count': 4,
'info_dict': { 'info_dict': {
'title': 'JODA7', 'title': 'JODA7',
'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
} }
}, { }, {
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
'info_dict': { 'info_dict': {
'title': 'Uploads from Interstellar Movie', 'title': 'Uploads from Interstellar Movie',
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
}, },
'playlist_mincout': 21, 'playlist_mincout': 21,
}] }]
@@ -1315,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor):
'note': 'paginated channel', 'note': 'paginated channel',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'playlist_mincount': 91, 'playlist_mincount': 91,
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
}
}] }]
def extract_videos_from_page(self, page): def extract_videos_from_page(self, page):

View File

@@ -1,59 +1,122 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import operator
import re import re
from .utils import ( from .utils import (
ExtractorError, ExtractorError,
) )
_OPERATORS = [
('|', operator.or_),
('^', operator.xor),
('&', operator.and_),
('>>', operator.rshift),
('<<', operator.lshift),
('-', operator.sub),
('+', operator.add),
('%', operator.mod),
('/', operator.truediv),
('*', operator.mul),
]
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
class JSInterpreter(object): class JSInterpreter(object):
def __init__(self, code): def __init__(self, code, objects=None):
self.code = code if objects is None:
objects = {}
self.code = self._remove_comments(code)
self._functions = {} self._functions = {}
self._objects = {} self._objects = objects
def interpret_statement(self, stmt, local_vars, allow_recursion=20): def _remove_comments(self, code):
return re.sub(r'(?s)/\*.*?\*/', '', code)
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0: if allow_recursion < 0:
raise ExtractorError('Recursion limit reached') raise ExtractorError('Recursion limit reached')
if stmt.startswith('var '): should_abort = False
stmt = stmt[len('var '):] stmt = stmt.lstrip()
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + stmt_m = re.match(r'var\s', stmt)
r'=(?P<expr>.*)$', stmt) if stmt_m:
if ass_m: expr = stmt[len(stmt_m.group(0)):]
if ass_m.groupdict().get('index'):
def assign(val):
lvar = local_vars[ass_m.group('out')]
idx = self.interpret_expression(
ass_m.group('index'), local_vars, allow_recursion)
assert isinstance(idx, int)
lvar[idx] = val
return val
expr = ass_m.group('expr')
else:
def assign(val):
local_vars[ass_m.group('out')] = val
return val
expr = ass_m.group('expr')
elif stmt.startswith('return '):
assign = lambda v: v
expr = stmt[len('return '):]
else: else:
# Try interpreting it as an expression return_m = re.match(r'return(?:\s+|$)', stmt)
expr = stmt if return_m:
assign = lambda v: v expr = stmt[len(return_m.group(0)):]
should_abort = True
else:
# Try interpreting it as an expression
expr = stmt
v = self.interpret_expression(expr, local_vars, allow_recursion) v = self.interpret_expression(expr, local_vars, allow_recursion)
return assign(v) return v, should_abort
def interpret_expression(self, expr, local_vars, allow_recursion): def interpret_expression(self, expr, local_vars, allow_recursion):
expr = expr.strip()
if expr == '': # Empty expression
return None
if expr.startswith('('):
parens_count = 0
for m in re.finditer(r'[()]', expr):
if m.group(0) == '(':
parens_count += 1
else:
parens_count -= 1
if parens_count == 0:
sub_expr = expr[1:m.start()]
sub_result = self.interpret_expression(
sub_expr, local_vars, allow_recursion)
remaining_expr = expr[m.end():].strip()
if not remaining_expr:
return sub_result
else:
expr = json.dumps(sub_result) + remaining_expr
break
else:
raise ExtractorError('Premature end of parens in %r' % expr)
for op, opfunc in _ASSIGN_OPERATORS:
m = re.match(r'''(?x)
(?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
\s*%s
(?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
if not m:
continue
right_val = self.interpret_expression(
m.group('expr'), local_vars, allow_recursion - 1)
if m.groupdict().get('index'):
lvar = local_vars[m.group('out')]
idx = self.interpret_expression(
m.group('index'), local_vars, allow_recursion)
assert isinstance(idx, int)
cur = lvar[idx]
val = opfunc(cur, right_val)
lvar[idx] = val
return val
else:
cur = local_vars.get(m.group('out'))
val = opfunc(cur, right_val)
local_vars[m.group('out')] = val
return val
if expr.isdigit(): if expr.isdigit():
return int(expr) return int(expr)
if expr.isalpha(): var_m = re.match(
return local_vars[expr] r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
expr)
if var_m:
return local_vars[var_m.group('name')]
try: try:
return json.loads(expr) return json.loads(expr)
@@ -61,7 +124,7 @@ class JSInterpreter(object):
pass pass
m = re.match( m = re.match(
r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$', r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
expr) expr)
if m: if m:
variable = m.group('var') variable = m.group('var')
@@ -114,23 +177,31 @@ class JSInterpreter(object):
return obj[member](argvals) return obj[member](argvals)
m = re.match( m = re.match(
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr) r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
if m: if m:
val = local_vars[m.group('in')] val = local_vars[m.group('in')]
idx = self.interpret_expression( idx = self.interpret_expression(
m.group('idx'), local_vars, allow_recursion - 1) m.group('idx'), local_vars, allow_recursion - 1)
return val[idx] return val[idx]
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr) for op, opfunc in _OPERATORS:
if m: m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
a = self.interpret_expression( if not m:
m.group('a'), local_vars, allow_recursion) continue
b = self.interpret_expression( x, abort = self.interpret_statement(
m.group('b'), local_vars, allow_recursion) m.group('x'), local_vars, allow_recursion - 1)
return a % b if abort:
raise ExtractorError(
'Premature left-side return of %s in %r' % (op, expr))
y, abort = self.interpret_statement(
m.group('y'), local_vars, allow_recursion - 1)
if abort:
raise ExtractorError(
'Premature right-side return of %s in %r' % (op, expr))
return opfunc(x, y)
m = re.match( m = re.match(
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr) r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
if m: if m:
fname = m.group('func') fname = m.group('func')
argvals = tuple([ argvals = tuple([
@@ -139,6 +210,7 @@ class JSInterpreter(object):
if fname not in self._functions: if fname not in self._functions:
self._functions[fname] = self.extract_function(fname) self._functions[fname] = self.extract_function(fname)
return self._functions[fname](argvals) return self._functions[fname](argvals)
raise ExtractorError('Unsupported JS expression %r' % expr) raise ExtractorError('Unsupported JS expression %r' % expr)
def extract_object(self, objname): def extract_object(self, objname):
@@ -162,9 +234,11 @@ class JSInterpreter(object):
def extract_function(self, funcname): def extract_function(self, funcname):
func_m = re.search( func_m = re.search(
(r'(?:function %s|[{;]%s\s*=\s*function)' % ( r'''(?x)
re.escape(funcname), re.escape(funcname))) + (?:function\s+%s|[{;]%s\s*=\s*function)\s*
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', \((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname)),
self.code) self.code)
if func_m is None: if func_m is None:
raise ExtractorError('Could not find JS function %r' % funcname) raise ExtractorError('Could not find JS function %r' % funcname)
@@ -172,10 +246,16 @@ class JSInterpreter(object):
return self.build_function(argnames, func_m.group('code')) return self.build_function(argnames, func_m.group('code'))
def call_function(self, funcname, *args):
f = self.extract_function(funcname)
return f(args)
def build_function(self, argnames, code): def build_function(self, argnames, code):
def resf(args): def resf(args):
local_vars = dict(zip(argnames, args)) local_vars = dict(zip(argnames, args))
for stmt in code.split(';'): for stmt in code.split(';'):
res = self.interpret_statement(stmt, local_vars) res, abort = self.interpret_statement(stmt, local_vars)
if abort:
break
return res return res
return resf return resf

View File

@@ -32,6 +32,7 @@ import xml.etree.ElementTree
import zlib import zlib
from .compat import ( from .compat import (
compat_basestring,
compat_chr, compat_chr,
compat_getenv, compat_getenv,
compat_html_entities, compat_html_entities,
@@ -140,7 +141,7 @@ else:
def find_xpath_attr(node, xpath, key, val): def find_xpath_attr(node, xpath, key, val):
# Here comes the crazy part: In 2.6, if the xpath is a unicode, # Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . ! # .//node does not match if a node is a direct child of . !
if isinstance(xpath, unicode): if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii') xpath = xpath.encode('ascii')
for f in node.findall(xpath): for f in node.findall(xpath):
@@ -1262,7 +1263,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
def parse_duration(s): def parse_duration(s):
if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str): if not isinstance(s, compat_basestring):
return None return None
s = s.strip() s = s.strip()
@@ -1426,7 +1427,7 @@ def uppercase_escape(s):
def escape_rfc3986(s): def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986""" """Escape non-ASCII characters as suggested by RFC 3986"""
if sys.version_info < (3, 0) and isinstance(s, unicode): if sys.version_info < (3, 0) and isinstance(s, compat_str):
s = s.encode('utf-8') s = s.encode('utf-8')
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.01.30.2' __version__ = '2015.02.02'