Compare commits

..

86 Commits

Author SHA1 Message Date
Philipp Hagemeister
c21b1fbeeb release 2015.02.02.2 2015-02-02 21:58:58 +01:00
Philipp Hagemeister
f920ce295e [ntvru] Remove unused import 2015-02-02 21:58:17 +01:00
Philipp Hagemeister
7a7bd19c45 [n-tv.de] Use native m3u8 as best format 2015-02-02 21:57:48 +01:00
Philipp Hagemeister
8f4b58d70e [ntvde] Add new extractor (Fixes #4850) 2015-02-02 21:48:54 +01:00
Philipp Hagemeister
3fd45e03bf [ntvru] Rename from NTV to clarify the difference between n-tv.de and ntv.ru 2015-02-02 20:43:02 +01:00
Philipp Hagemeister
869b4aeff4 release 2015.02.02.1 2015-02-02 20:35:04 +01:00
Philipp Hagemeister
cc9ca3ba6e [downloader/external] Simplify source_address
'' might actually be passed in, so check for None.
2015-02-02 20:33:25 +01:00
Philipp Hagemeister
ea71034bd3 Merge remote-tracking branch 'origin/master'
Conflicts:
	youtube_dl/downloader/external.py
2015-02-02 20:32:07 +01:00
Philipp Hagemeister
9fffd0469f [options] Mark --fixup as non-experimental and correct its help 2015-02-02 20:28:18 +01:00
Sergey M․
ae7773942e [downloader/external] Simplify 2015-02-02 21:51:38 +06:00
Sergey M․
469a64cebf [downloader/external] Simplify 2015-02-02 21:40:52 +06:00
Sergey M.
aae3fdcfae Merge pull request #4845 from vijayanandnandam/master
Passing source address option to external downloaders
2015-02-02 21:38:22 +06:00
vijayanand nandam
6a66904f8e passing source address option to external downloaders 2015-02-02 20:51:40 +05:30
Sergey M․
78271e3319 [drtv] Extract material id (Closes #4814) 2015-02-02 21:11:25 +06:00
Sergey M․
92bf0bcdf8 [historicfilms] Add extractor (Closes #4825) 2015-02-02 20:52:37 +06:00
Philipp Hagemeister
1283204917 [http] PEP8 (#4831) 2015-02-02 12:05:39 +01:00
Philipp Hagemeister
6789defea9 Merge pull request #4831 from light94/master
Handling Connection Reset by Peer Error
2015-02-02 12:03:28 +01:00
Philipp Hagemeister
acf2a6e97b release 2015.02.02 2015-02-02 01:49:40 +01:00
Philipp Hagemeister
8cfb6efe6f [jsinterp] Correct div command 2015-02-02 01:49:32 +01:00
Philipp Hagemeister
04edb9caf5 Merge pull request #4838 from raunaqrox/patch-1
supported sites link was not opening from README
2015-02-01 23:32:44 +01:00
Sahebjot singh
044131ba21 supported sites was not opening
required a .md at the end.
2015-02-02 03:49:28 +05:30
Philipp Hagemeister
0a7055c90d release 2015.02.01 2015-02-01 22:42:19 +01:00
Philipp Hagemeister
9e3f19919a [jsinterp] Beef up and add tests
In preparation for #4822, extend jsinterp by a lot. (We may even have to/want to write a proper interpreter with actual parsing)
2015-02-01 22:38:29 +01:00
Sergey M․
4a3da4ebdb [hls] Simplify 2015-02-01 23:54:38 +06:00
Sergey M․
027008b14e [hls] Fix encode issues on python2 @ Windows 2015-02-01 23:49:23 +06:00
Sergey M․
c6df692466 [rutv] Extract all m3u8 formats 2015-02-01 23:48:23 +06:00
Philipp Hagemeister
acf757f42e [youtube] Complete test definition 2015-02-01 15:33:32 +01:00
Philipp Hagemeister
dd8982f19c [wdr] Complete test definition 2015-02-01 15:27:16 +01:00
Philipp Hagemeister
654bd52f58 [teletask] Fix test definition 2015-02-01 15:25:33 +01:00
Philipp Hagemeister
a9551e9020 [soundcloud:set] Fix test definition 2015-02-01 15:24:38 +01:00
Philipp Hagemeister
4e980275b5 [test_download] Remove references to "file" in test definitions 2015-02-01 15:21:18 +01:00
Philipp Hagemeister
c172440ac5 [ringtv] Modernize test definition 2015-02-01 15:21:02 +01:00
Philipp Hagemeister
e332772531 [servingsys] Modernize 2015-02-01 15:18:52 +01:00
Philipp Hagemeister
437cac8cc1 [sina] Modernize and simplify 2015-02-01 15:16:35 +01:00
Philipp Hagemeister
9f281cacd2 [keezmovies] Fix extraction and modernize test 2015-02-01 15:13:44 +01:00
Philipp Hagemeister
748a0fab8a Remove unused imports 2015-02-01 15:08:50 +01:00
Philipp Hagemeister
c1f06d6307 [macgamestore] Modernize 2015-02-01 15:08:33 +01:00
Philipp Hagemeister
c4e817ce4a [france2.fr:generation-quoi] Modernize 2015-02-01 15:06:55 +01:00
Philipp Hagemeister
9a3e5e6955 [kankan] Modernize 2015-02-01 15:03:55 +01:00
Philipp Hagemeister
228d30ed06 [la7] Modernize 2015-02-01 15:03:03 +01:00
Philipp Hagemeister
057c0609fc [toutv] Modernize test definition 2015-02-01 15:01:33 +01:00
Philipp Hagemeister
17d2712d9c [teamcoco] Modernize and fix extraction 2015-02-01 15:00:54 +01:00
Philipp Hagemeister
fc09240e24 [vimeo] Modernize test definition 2015-02-01 12:12:27 +01:00
Philipp Hagemeister
146303136f [nerdcubed] Modernize test definition 2015-02-01 12:11:20 +01:00
Philipp Hagemeister
96aded8d3d [rottentomatoes] Modernize test definition 2015-02-01 12:11:14 +01:00
Philipp Hagemeister
2886be15aa [defense] Modernize 2015-02-01 12:10:15 +01:00
Philipp Hagemeister
ca0f500ecf [mtv] Modernize and clean up test 2015-02-01 12:08:21 +01:00
Philipp Hagemeister
29aef5a33c [ard] Remove deleted video test case 2015-02-01 12:00:47 +01:00
Philipp Hagemeister
9158b2b301 [mpora] Modernize 2015-02-01 11:58:37 +01:00
Philipp Hagemeister
0196149c5b [compat] Correct compat_basestring definition 2015-02-01 11:37:00 +01:00
Philipp Hagemeister
8f9312c387 Appease pyflakes8-3 2015-02-01 11:30:56 +01:00
Sergey M․
439b9a9e9b Merge branch 'kinetoskombi-globo-fix' 2015-02-01 04:36:57 +06:00
Sergey M․
8c72beb25e [globo] Properly extract m3u8 formats (#4346 #4832) 2015-02-01 04:36:24 +06:00
kinetoskombi
1ee94db2d0 [globo] Fix error on some globo videos 2015-01-31 20:07:43 -02:00
light94
e77d2975af Handling Connection Reset by Peer Error 2015-02-01 00:10:58 +05:30
Jaime Marquínez Ferrándiz
e41b1f7385 Fix flake8 errors 2015-01-31 10:51:39 +01:00
Jaime Marquínez Ferrándiz
cd596028d6 [rtve] Recognize mobile urls (fixes #4823) 2015-01-30 23:46:55 +01:00
Jaime Marquínez Ferrándiz
cc57bd33a8 [rtve] Fix extraction
Skip live stream test, we can't use the f4m manifest yet
2015-01-30 23:46:55 +01:00
Sergey M․
6d593c3276 [YoutubeDL] Fix video+audio format_id (Closes #4824) 2015-01-31 03:50:11 +06:00
Philipp Hagemeister
91755ee384 [comedycentral:shows] Generate better IDs and add a test for that 2015-01-30 19:43:46 +01:00
Sergey M․
0692ef86ef [bbccouk] Improve _VALID_URL 2015-01-30 23:47:09 +06:00
Sergey M․
439d9be27d [mixcloud] Remove unused import 2015-01-30 23:21:58 +06:00
Sergey M․
b80505a409 [mixcloud] Fix extraction (Closes #4784) 2015-01-30 23:21:44 +06:00
Sergey M․
e4c17d7274 [nhl:news] Add extractor (Closes #4805) 2015-01-30 23:12:27 +06:00
Sergey M․
2c58674e0e [nhl] Improve _VALID_URL (#4805) 2015-01-30 22:46:53 +06:00
Sergey M․
ef1269fb07 [drtv] Improve _VALID_URL (#4814) 2015-01-30 22:42:11 +06:00
Sergey M․
e525d9a3df [mtv] Extract subtitles (Closes #4811) 2015-01-30 21:57:59 +06:00
Sergey M․
20b4492c71 [spike] Improve _VALID_URL 2015-01-30 21:54:48 +06:00
Sergey M․
dee3f73787 [spike] Modernize 2015-01-30 21:54:14 +06:00
Philipp Hagemeister
d543bdc351 [downloader/f4m] Clarify that we should eventually just implement the DRM scheme (#3000) 2015-01-30 16:06:55 +01:00
Philipp Hagemeister
c7ff0c6422 Merge remote-tracking branch 'rzhxeo/f4m-drm' 2015-01-30 16:00:47 +01:00
Philipp Hagemeister
01c46659c4 [washingtonpost] Catch more UUIDs 2015-01-30 15:53:58 +01:00
Philipp Hagemeister
b04b885271 [extractor/common] Document all protocol values 2015-01-30 15:53:16 +01:00
Philipp Hagemeister
dc35bfd2d5 [test/helper] Clarify which keys have to be added 2015-01-30 15:52:56 +01:00
Philipp Hagemeister
70fca8d694 [youtube] Remove unused import 2015-01-30 10:59:19 +01:00
Philipp Hagemeister
a52c633536 [cinchcast] Wrap overly long lines (#4820) 2015-01-30 10:59:07 +01:00
Philipp Hagemeister
7b6c60393e Merge remote-tracking branch 'codesparkle/master' 2015-01-30 10:56:53 +01:00
Philipp Hagemeister
83e7a314b4 dedup AUTHORS 2015-01-30 10:48:39 +01:00
codesparkle
749f2ca044 Smotri info extractor: removed unreachable code and updated old md5 for test video 2015-01-30 20:35:20 +11:00
codesparkle
5468ff4d91 Remove duplicate dictionary keys 2015-01-30 20:11:51 +11:00
codesparkle
1d2daaea63 Simplify list creation 2015-01-30 20:10:12 +11:00
codesparkle
52585fd6dc The opening curly brace { is a regex reserved [control character](http://stackoverflow.com/a/400316/1106367), so it needs to be escaped. 2015-01-30 18:41:40 +11:00
Philipp Hagemeister
c03844a4ec release 2015.01.30.2 2015-01-30 04:44:00 +01:00
Philipp Hagemeister
6449cd807e [youtube] Fall back to embed webpage when content is blocked on main (Fixes #4717) 2015-01-30 04:43:50 +01:00
Philipp Hagemeister
e2a08185c6 [README] Add an FAQ for YouTube IDs starting with a dash (Closes #4800) 2015-01-30 04:17:44 +01:00
rzhxeo
6ca85be6f8 Filter DRM protected media in f4m downloader 2015-01-26 20:44:48 +01:00
57 changed files with 801 additions and 277 deletions

View File

@@ -107,5 +107,4 @@ Yen Chi Hsuan
Enam Mijbah Noor Enam Mijbah Noor
David Luhmer David Luhmer
Shaya Goldberg Shaya Goldberg
Yen Chi Hsuan
Paul Hartmann Paul Hartmann

View File

@@ -368,11 +368,11 @@ which means you can modify it, redistribute it or use it however you like.
--add-metadata write metadata to the video file --add-metadata write metadata to the video file
--xattrs write metadata to the video file's xattrs --xattrs write metadata to the video file's xattrs
(using dublin core and xdg standards) (using dublin core and xdg standards)
--fixup POLICY (experimental) Automatically correct known --fixup POLICY Automatically correct known faults of the
faults of the file. One of never (do file. One of never (do nothing), warn (only
nothing), warn (only emit a warning), emit a warning), detect_or_warn(the
detect_or_warn(check whether we can do default; fix file if we can, warn
anything about it, warn otherwise otherwise)
--prefer-avconv Prefer avconv over ffmpeg for running the --prefer-avconv Prefer avconv over ffmpeg for running the
postprocessors (default) postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the --prefer-ffmpeg Prefer ffmpeg over avconv for running the
@@ -525,9 +525,16 @@ From then on, after restarting your shell, you will be able to access both youtu
Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration). Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
### How do I download a video starting with a `-` ?
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
youtube-dl -- -wNyEUrxzFU
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
### How can I detect whether a given URL is supported by youtube-dl? ### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.

View File

@@ -148,9 +148,15 @@ def expect_info_dict(self, got_dict, expected_dict):
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
else: else:
return repr(v) return repr(v)
info_dict_str = ''.join( info_dict_str = ''
' %s: %s,\n' % (_repr(k), _repr(v)) if len(missing_keys) != len(expected_dict):
for k, v in test_info_dict.items()) info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(v))
for k, v in test_info_dict.items() if k not in missing_keys)
info_dict_str += '\n'
info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
for k in missing_keys)
write_string( write_string(
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
self.assertFalse( self.assertFalse(

View File

@@ -89,7 +89,7 @@ def generator(test_case):
for tc in test_cases: for tc in test_cases:
info_dict = tc.get('info_dict', {}) info_dict = tc.get('info_dict', {})
if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')): if not (info_dict.get('id') and info_dict.get('ext')):
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?') raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
if 'skip' in test_case: if 'skip' in test_case:
@@ -116,7 +116,7 @@ def generator(test_case):
expect_warnings(ydl, test_case.get('expected_warnings', [])) expect_warnings(ydl, test_case.get('expected_warnings', []))
def get_tc_filename(tc): def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) return ydl.prepare_filename(tc.get('info_dict', {}))
res_dict = None res_dict = None

95
test/test_jsinterp.py Normal file
View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.jsinterp import JSInterpreter
class TestJSInterpreter(unittest.TestCase):
def test_basic(self):
jsi = JSInterpreter('function x(){;}')
self.assertEqual(jsi.call_function('x'), None)
jsi = JSInterpreter('function x3(){return 42;}')
self.assertEqual(jsi.call_function('x3'), 42)
def test_calc(self):
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
self.assertEqual(jsi.call_function('x4', 3), 7)
def test_empty_return(self):
jsi = JSInterpreter('function f(){return; y()}')
self.assertEqual(jsi.call_function('f'), None)
def test_morespace(self):
jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
self.assertEqual(jsi.call_function('x', 3), 7)
jsi = JSInterpreter('function f () { x = 2 ; return x; }')
self.assertEqual(jsi.call_function('f'), 2)
def test_strange_chars(self):
jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
self.assertEqual(jsi.call_function('$_xY1', 20), 21)
def test_operators(self):
jsi = JSInterpreter('function f(){return 1 << 5;}')
self.assertEqual(jsi.call_function('f'), 32)
jsi = JSInterpreter('function f(){return 19 & 21;}')
self.assertEqual(jsi.call_function('f'), 17)
jsi = JSInterpreter('function f(){return 11 >> 2;}')
self.assertEqual(jsi.call_function('f'), 2)
def test_array_access(self):
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
def test_parens(self):
jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
self.assertEqual(jsi.call_function('f'), 7)
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
self.assertEqual(jsi.call_function('f'), 9)
def test_assignments(self):
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), 31)
jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), 51)
jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), -11)
def test_comments(self):
jsi = JSInterpreter('''
function x() {
var x = /* 1 + */ 2;
var y = /* 30
* 40 */ 50;
return x + y;
}
''')
self.assertEqual(jsi.call_function('x'), 52)
def test_precedence(self):
jsi = JSInterpreter('''
function x() {
var a = [10, 20, 30, 40, 50];
var b = 6;
a[0]=a[b%a.length];
return a;
}''')
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
if __name__ == '__main__':
unittest.main()

View File

@@ -238,6 +238,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('5 s'), 5) self.assertEqual(parse_duration('5 s'), 5)
self.assertEqual(parse_duration('3 min'), 180) self.assertEqual(parse_duration('3 min'), 180)
self.assertEqual(parse_duration('2.5 hours'), 9000) self.assertEqual(parse_duration('2.5 hours'), 9000)
self.assertEqual(parse_duration('02:03:04'), 7384)
self.assertEqual(parse_duration('01:02:03:04'), 93784)
def test_fix_xml_ampersands(self): def test_fix_xml_ampersands(self):
self.assertEqual( self.assertEqual(
@@ -371,6 +373,16 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": true}') on = js_to_json('{"abc": true}')
self.assertEqual(json.loads(on), {'abc': True}) self.assertEqual(json.loads(on), {'abc': True})
# Ignore JavaScript code as well
on = js_to_json('''{
"x": 1,
y: "a",
z: some.code
}''')
d = json.loads(on)
self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a')
def test_clean_html(self): def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')

View File

@@ -25,6 +25,7 @@ if os.name == 'nt':
import ctypes import ctypes
from .compat import ( from .compat import (
compat_basestring,
compat_cookiejar, compat_cookiejar,
compat_expanduser, compat_expanduser,
compat_http_client, compat_http_client,
@@ -1074,8 +1075,8 @@ class YoutubeDL(object):
selected_format = { selected_format = {
'requested_formats': formats_info, 'requested_formats': formats_info,
'format': rf, 'format': rf,
'format_id': rf, 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
'ext': formats_info[0]['ext'], formats_info[1].get('format_id')),
'width': formats_info[0].get('width'), 'width': formats_info[0].get('width'),
'height': formats_info[0].get('height'), 'height': formats_info[0].get('height'),
'resolution': formats_info[0].get('resolution'), 'resolution': formats_info[0].get('resolution'),
@@ -1558,7 +1559,7 @@ class YoutubeDL(object):
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
# To work around aforementioned issue we will replace request's original URL with # To work around aforementioned issue we will replace request's original URL with
# percent-encoded one # percent-encoded one
req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) req_is_string = isinstance(req, compat_basestring)
url = req if req_is_string else req.get_full_url() url = req if req_is_string else req.get_full_url()
url_escaped = escape_url(url) url_escaped = escape_url(url)

View File

@@ -114,6 +114,26 @@ except ImportError:
string += pct_sequence.decode(encoding, errors) string += pct_sequence.decode(encoding, errors)
return string return string
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
try:
compat_basestring = basestring # Python 2
except NameError:
compat_basestring = str
try:
compat_chr = unichr # Python 2
except NameError:
compat_chr = chr
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
try: try:
from urllib.parse import parse_qs as compat_parse_qs from urllib.parse import parse_qs as compat_parse_qs
@@ -123,7 +143,7 @@ except ImportError: # Python 2
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'): encoding='utf-8', errors='replace'):
qs, _coerce_result = qs, unicode qs, _coerce_result = qs, compat_str
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = [] r = []
for name_value in pairs: for name_value in pairs:
@@ -162,21 +182,6 @@ except ImportError: # Python 2
parsed_result[name] = [value] parsed_result[name] = [value]
return parsed_result return parsed_result
try:
compat_str = unicode # Python 2
except NameError:
compat_str = str
try:
compat_chr = unichr # Python 2
except NameError:
compat_chr = chr
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
try: try:
from shlex import quote as shlex_quote from shlex import quote as shlex_quote
except ImportError: # Python < 3.3 except ImportError: # Python < 3.3
@@ -362,6 +367,7 @@ def workaround_optparse_bug9161():
__all__ = [ __all__ = [
'compat_HTTPError', 'compat_HTTPError',
'compat_basestring',
'compat_chr', 'compat_chr',
'compat_cookiejar', 'compat_cookiejar',
'compat_expanduser', 'compat_expanduser',

View File

@@ -45,6 +45,12 @@ class ExternalFD(FileDownloader):
def supports(cls, info_dict): def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
def _source_address(self, command_option):
source_address = self.params.get('source_address')
if source_address is None:
return []
return [command_option, source_address]
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """ """ Either overwrite this or implement _make_cmd """
cmd = self._make_cmd(tmpfilename, info_dict) cmd = self._make_cmd(tmpfilename, info_dict)
@@ -72,6 +78,7 @@ class CurlFD(ExternalFD):
cmd = [self.exe, '-o', tmpfilename] cmd = [self.exe, '-o', tmpfilename]
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--interface')
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
@@ -81,6 +88,7 @@ class WgetFD(ExternalFD):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--bind-address')
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
@@ -96,6 +104,7 @@ class Aria2cFD(ExternalFD):
cmd += ['--out', os.path.basename(tmpfilename)] cmd += ['--out', os.path.basename(tmpfilename)]
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--interface')
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd

View File

@@ -230,6 +230,23 @@ class F4mFD(FileDownloader):
A downloader for f4m manifests or AdobeHDS. A downloader for f4m manifests or AdobeHDS.
""" """
def _get_unencrypted_media(self, doc):
media = doc.findall(_add_ns('media'))
if not media:
self.report_error('No media found')
for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
doc.findall(_add_ns('drmAdditionalHeaderSet'))):
# If id attribute is missing it's valid for all media nodes
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
self.report_error('Missing ID in f4m DRM')
media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
'drmAdditionalHeaderSetId' not in e.attrib,
media))
if not media:
self.report_error('Unsupported DRM')
return media
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
man_url = info_dict['url'] man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr') requested_bitrate = info_dict.get('tbr')
@@ -248,7 +265,8 @@ class F4mFD(FileDownloader):
) )
doc = etree.fromstring(manifest) doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] formats = [(int(f.attrib.get('bitrate', -1)), f)
for f in self._get_unencrypted_media(doc)]
if requested_bitrate is None: if requested_bitrate is None:
# get the best format # get the best format
formats = sorted(formats, key=lambda f: f[0]) formats = sorted(formats, key=lambda f: f[0])

View File

@@ -11,6 +11,7 @@ from ..compat import (
compat_urllib_request, compat_urllib_request,
) )
from ..utils import ( from ..utils import (
encodeArgument,
encodeFilename, encodeFilename,
) )
@@ -21,23 +22,22 @@ class HlsFD(FileDownloader):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
args = [
'-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc',
encodeFilename(tmpfilename, for_subprocess=True)]
ffpp = FFmpegPostProcessor(downloader=self) ffpp = FFmpegPostProcessor(downloader=self)
program = ffpp._executable program = ffpp._executable
if program is None: if program is None:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False return False
ffpp.check_version() ffpp.check_version()
cmd = [program] + args
retval = subprocess.call(cmd) args = [
encodeArgument(opt)
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
args.append(encodeFilename(tmpfilename, True))
retval = subprocess.call(args)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize)) self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,

View File

@@ -3,6 +3,9 @@ from __future__ import unicode_literals
import os import os
import time import time
from socket import error as SocketError
import errno
from .common import FileDownloader from .common import FileDownloader
from ..compat import ( from ..compat import (
compat_urllib_request, compat_urllib_request,
@@ -99,6 +102,11 @@ class HttpFD(FileDownloader):
resume_len = 0 resume_len = 0
open_mode = 'wb' open_mode = 'wb'
break break
except SocketError as e:
if e.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry
raise
# Retry # Retry
count += 1 count += 1
if count <= retries: if count <= retries:

View File

@@ -182,6 +182,7 @@ from .heise import HeiseIE
from .hellporno import HellPornoIE from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE from .hentaistigma import HentaiStigmaIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE from .hitbox import HitboxIE, HitboxLiveIE
from .hornbunny import HornBunnyIE from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE from .hostingbulk import HostingBulkIE
@@ -294,7 +295,11 @@ from .nextmedia import (
) )
from .nfb import NFBIE from .nfb import NFBIE
from .nfl import NFLIE from .nfl import NFLIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import (
NHLIE,
NHLNewsIE,
NHLVideocenterIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .noco import NocoIE from .noco import NocoIE
@@ -312,7 +317,8 @@ from .nrk import (
NRKIE, NRKIE,
NRKTVIE, NRKTVIE,
) )
from .ntv import NTVIE from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nytimes import NYTimesIE from .nytimes import NYTimesIE
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE

View File

@@ -122,7 +122,6 @@ class AppleTrailersIE(InfoExtractor):
playlist.append({ playlist.append({
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': title,
'formats': formats, 'formats': formats,
'title': title, 'title': title,
'duration': duration, 'duration': duration,

View File

@@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'file': '22429276.mp4', 'only_matching': True,
'md5': '469751912f1de0816a9fc9df8336476c',
'info_dict': {
'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
},
'skip': 'Blocked outside of Germany',
}, { }, {
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916', 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
'info_dict': { 'info_dict': {

View File

@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor): class BBCCoUkIE(SubtitlesInfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
_TESTS = [ _TESTS = [
{ {
@@ -118,6 +118,9 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
}, { }, {
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3', 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
'only_matching': True,
} }
] ]

View File

@@ -28,12 +28,10 @@ class CinchcastIE(InfoExtractor):
item, './{http://developer.longtailvideo.com/trac/}date') item, './{http://developer.longtailvideo.com/trac/}date')
upload_date = unified_strdate(date_str, day_first=False) upload_date = unified_strdate(date_str, day_first=False)
# duration is present but wrong # duration is present but wrong
formats = [] formats = [{
formats.append({
'format_id': 'main', 'format_id': 'main',
'url': item.find( 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'],
'./{http://search.yahoo.com/mrss/}content').attrib['url'], }]
})
backup_url = xpath_text( backup_url = xpath_text(
item, './{http://developer.longtailvideo.com/trac/}backupContent') item, './{http://developer.longtailvideo.com/trac/}backupContent')
if backup_url: if backup_url:

View File

@@ -49,7 +49,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)| )|
(?P<interview> (?P<interview>
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?))) extended-interviews/(?P<interID>[0-9a-z]+)/
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
(?:/[^/?#]?|[?#]|$))))
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
@@ -62,6 +64,38 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
'uploader': 'thedailyshow', 'uploader': 'thedailyshow',
'title': 'thedailyshow kristen-stewart part 1', 'title': 'thedailyshow kristen-stewart part 1',
} }
}, {
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
'info_dict': {
'id': 'sarah-chayes-extended-interview',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'title': 'thedailyshow Sarah Chayes Extended Interview',
},
'playlist': [
{
'info_dict': {
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
'ext': 'mp4',
'upload_date': '20150129',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'uploader': 'thedailyshow',
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
},
},
{
'info_dict': {
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
'ext': 'mp4',
'upload_date': '20150129',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'uploader': 'thedailyshow',
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
},
},
],
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', 'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
'only_matching': True, 'only_matching': True,
@@ -230,6 +264,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': epTitle,
'entries': entries, 'entries': entries,
'title': show_name + ' ' + title, 'title': show_name + ' ' + title,
'description': description, 'description': description,

View File

@@ -89,7 +89,8 @@ class InfoExtractor(object):
* player_url SWF Player URL (used for rtmpdump). * player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual * protocol The protocol that will be used for the actual
download, lower-case. download, lower-case.
"http", "https", "rtsp", "rtmp", "m3u8" or so. "http", "https", "rtsp", "rtmp", "rtmpe",
"m3u8", or "m3u8_native".
* preference Order number of this format. If this field is * preference Order number of this format. If this field is
present and not None, the formats get sorted present and not None, the formats get sorted
by this field, regardless of all other values. by this field, regardless of all other values.

View File

@@ -1,40 +1,38 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor): class DefenseGouvFrIE(InfoExtractor):
IE_NAME = 'defense.gouv.fr' IE_NAME = 'defense.gouv.fr'
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
r'ligthboxvideo/base-de-medias/webtv/(.*)')
_TEST = { _TEST = {
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
'file': '11213.mp4',
'md5': '75bba6124da7e63d2d60b5244ec9430c', 'md5': '75bba6124da7e63d2d60b5244ec9430c',
"info_dict": { 'info_dict': {
"title": "attaque-chimique-syrienne-du-21-aout-2013-1" 'id': '11213',
'ext': 'mp4',
'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
title = re.match(self._VALID_URL, url).group(1) title = self._match_id(url)
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
video_id = self._search_regex( video_id = self._search_regex(
r"flashvars.pvg_id=\"(\d+)\";", r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID') webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id) + video_id)
info = self._download_webpage(json_url, title, info = self._download_json(json_url, title, 'Downloading JSON config')
'Downloading JSON config') video_url = info['renditions'][0]['url']
video_url = json.loads(info)['renditions'][0]['url']
return {'id': video_id, return {
'ext': 'mp4', 'id': video_id,
'url': video_url, 'ext': 'mp4',
'title': title, 'url': video_url,
} 'title': title,
}

View File

@@ -6,7 +6,7 @@ from ..utils import parse_iso8601
class DRTVIE(SubtitlesInfoExtractor): class DRTVIE(SubtitlesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)' _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = { _TEST = {
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
@@ -25,9 +25,15 @@ class DRTVIE(SubtitlesInfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
programcard = self._download_json( webpage = self._download_webpage(url, video_id)
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
video_id = self._search_regex(
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
webpage, 'video id')
programcard = self._download_json(
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
video_id, 'Downloading video JSON')
data = programcard['Data'][0] data = programcard['Data'][0]
title = data['Title'] title = data['Title']

View File

@@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
class GenerationQuoiIE(InfoExtractor): class GenerationQuoiIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-quoi' IE_NAME = 'france2.fr:generation-quoi'
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
_TEST = { _TEST = {
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
'info_dict': { 'info_dict': {
'id': 'k7FJX8VBcvvLmX4wA5Q',
'ext': 'mp4',
'title': 'Génération Quoi - Garde à Vous', 'title': 'Génération Quoi - Garde à Vous',
'uploader': 'Génération Quoi', 'uploader': 'Génération Quoi',
}, },
@@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor):
# It uses Dailymotion # It uses Dailymotion
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Only available from France',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
name = mobj.group('name') info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) info_json = self._download_webpage(info_url, display_id)
info_json = self._download_webpage(info_url, name)
info = json.loads(info_json) info = json.loads(info_json)
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
ie='Dailymotion') ie='Dailymotion')

View File

@@ -1073,7 +1073,7 @@ class GenericIE(InfoExtractor):
found = filter_video(re.findall(r'''(?xs) found = filter_video(re.findall(r'''(?xs)
flowplayer\("[^"]+",\s* flowplayer\("[^"]+",\s*
\{[^}]+?\}\s*, \{[^}]+?\}\s*,
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s* \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
["']?url["']?\s*:\s*["']([^"']+)["'] ["']?url["']?\s*:\s*["']([^"']+)["']
''', webpage)) ''', webpage))
if not found: if not found:

View File

@@ -70,6 +70,19 @@ class GloboIE(InfoExtractor):
'like_count': int, 'like_count': int,
} }
}, },
{
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
'md5': 'c1defca721ce25b2354e927d3e4b3dec',
'info_dict': {
'id': '3928201',
'ext': 'mp4',
'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
'duration': 1472.906,
'uploader': 'Canal Brasil',
'uploader_id': 705,
'like_count': int,
}
},
] ]
class MD5(): class MD5():
@@ -381,11 +394,16 @@ class GloboIE(InfoExtractor):
signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding) signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5 signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
formats.append({ resource_url = resource['url']
'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'), signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
'format_id': resource_id, if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
'height': resource['height'] formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
}) else:
formats.append({
'url': signed_url,
'format_id': resource_id,
'height': resource.get('height'),
})
self._sort_formats(formats) self._sort_formats(formats)

View File

@@ -83,7 +83,7 @@ class GroovesharkIE(InfoExtractor):
return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None)) return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
def _transform_bootstrap(self, js): def _transform_bootstrap(self, js):
return re.split('(?m)^\s*try\s*{', js)[0] \ return re.split('(?m)^\s*try\s*\{', js)[0] \
.split(' = ', 1)[1].strip().rstrip(';') .split(' = ', 1)[1].strip().rstrip(';')
def _transform_meta(self, js): def _transform_meta(self, js):

View File

@@ -0,0 +1,46 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import parse_duration
class HistoricFilmsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)'
_TEST = {
'url': 'http://www.historicfilms.com/tapes/4728',
'md5': 'd4a437aec45d8d796a38a215db064e9a',
'info_dict': {
'id': '4728',
'ext': 'mov',
'title': 'Historic Films: GP-7',
'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 2096,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
tape_id = self._search_regex(
r'class="tapeId">([^<]+)<', webpage, 'tape id')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._html_search_meta(
'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage)
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration'))
video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id)
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -13,17 +13,17 @@ class KankanIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
'file': '48863.flv',
'md5': '29aca1e47ae68fc28804aca89f29507e', 'md5': '29aca1e47ae68fc28804aca89f29507e',
'info_dict': { 'info_dict': {
'id': '48863',
'ext': 'flv',
'title': 'Ready To Go', 'title': 'Ready To Go',
}, },
'skip': 'Only available from China', 'skip': 'Only available from China',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title') title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')

View File

@@ -7,10 +7,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
compat_urllib_parse,
)
from ..aes import (
aes_decrypt_text
) )
@@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)' _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
_TEST = { _TEST = {
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
'file': '1214711.mp4',
'md5': '6e297b7e789329923fcf83abb67c9289', 'md5': '6e297b7e789329923fcf83abb67c9289',
'info_dict': { 'info_dict': {
'id': '1214711',
'ext': 'mp4',
'title': 'Petite Asian Lady Mai Playing In Bathtub', 'title': 'Petite Asian Lady Mai Playing In Bathtub',
'age_limit': 18, 'age_limit': 18,
} }
@@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor):
embedded_url = mobj.group(1) embedded_url = mobj.group(1)
return self.url_result(embedded_url) return self.url_result(embedded_url)
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title') video_title = self._html_search_regex(
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url')) r'<h1 [^>]*>([^<]+)', webpage, 'title')
if 'encrypted=true' in webpage: video_url = self._html_search_regex(
password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password') r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
path = compat_urllib_parse_urlparse(video_url).path path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:] extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2] format = path.split('/')[4].split('_')[:2]

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
@@ -20,9 +18,10 @@ class LA7IE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.la7.tv/richplayer/?assetid=50355319', 'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
'file': '50355319.mp4',
'md5': 'ec7d1f0224d20ba293ab56cf2259651f', 'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
'info_dict': { 'info_dict': {
'id': '50355319',
'ext': 'mp4',
'title': 'IL DIVO', 'title': 'IL DIVO',
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci', 'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
'duration': 6254, 'duration': 6254,
@@ -31,9 +30,7 @@ class LA7IE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
doc = self._download_xml(xml_url, video_id) doc = self._download_xml(xml_url, video_id)

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import ExtractorError
@@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
'file': '2450.m4v',
'md5': '8649b8ea684b6666b4c5be736ecddc61', 'md5': '8649b8ea684b6666b4c5be736ecddc61',
'info_dict': { 'info_dict': {
'id': '2450',
'ext': 'm4v',
'title': 'Crow', 'title': 'Crow',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') webpage = self._download_webpage(
url, video_id, 'Downloading trailer page')
webpage = self._download_webpage(url, video_id, 'Downloading trailer page') if '>Missing Media<' in webpage:
raise ExtractorError(
if re.search(r'>Missing Media<', webpage) is not None: 'Trailer %s does not exist' % video_id, expected=True)
raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title') r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')

View File

@@ -9,7 +9,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest, HEADRequest,
int_or_none, str_to_int,
parse_iso8601, parse_iso8601,
) )
@@ -85,15 +85,17 @@ class MixcloudIE(InfoExtractor):
uploader_id = self._search_regex( uploader_id = self._search_regex(
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
like_count = int_or_none(self._search_regex( like_count = str_to_int(self._search_regex(
r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
r'/favorites/?">([0-9]+)<'],
webpage, 'like count', fatal=False)) webpage, 'like count', fatal=False))
view_count = int_or_none(self._search_regex( view_count = str_to_int(self._search_regex(
r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False)) webpage, 'play count', fatal=False))
timestamp = parse_iso8601(self._search_regex( timestamp = parse_iso8601(self._search_regex(
r'<time itemprop="dateCreated" datetime="([^"]+)">', r'<time itemprop="dateCreated" datetime="([^"]+)">',
webpage, 'upload date')) webpage, 'upload date', default=None))
return { return {
'id': track_id, 'id': track_id,

View File

@@ -1,21 +1,19 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
class MporaIE(InfoExtractor): class MporaIE(InfoExtractor):
_VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
IE_NAME = 'MPORA' IE_NAME = 'MPORA'
_TEST = { _TEST = {
'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de', 'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
'file': 'AAdo8okx4wiz.mp4',
'md5': 'a7a228473eedd3be741397cf452932eb', 'md5': 'a7a228473eedd3be741397cf452932eb',
'info_dict': { 'info_dict': {
'id': 'AAdo8okx4wiz',
'ext': 'mp4',
'title': 'Katy Curd - Winter in the Forest', 'title': 'Katy Curd - Winter in the Forest',
'duration': 416, 'duration': 416,
'uploader': 'Peter Newman Media', 'uploader': 'Peter Newman Media',
@@ -23,14 +21,12 @@ class MporaIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_json = self._search_regex( data_json = self._search_regex(
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
data = self._parse_json(data_json, video_id)
data = json.loads(data_json)
uploader = data['info_overlay'].get('username') uploader = data['info_overlay'].get('username')
duration = data['video']['duration'] // 1000 duration = data['video']['duration'] // 1000

View File

@@ -2,10 +2,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
compat_str,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@@ -22,7 +23,7 @@ def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag return '{http://search.yahoo.com/mrss/}%s' % tag
class MTVServicesInfoExtractor(InfoExtractor): class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
_MOBILE_TEMPLATE = None _MOBILE_TEMPLATE = None
@staticmethod @staticmethod
@@ -78,17 +79,42 @@ class MTVServicesInfoExtractor(InfoExtractor):
try: try:
_, _, ext = rendition.attrib['type'].partition('/') _, _, ext = rendition.attrib['type'].partition('/')
rtmp_video_url = rendition.find('./src').text rtmp_video_url = rendition.find('./src').text
formats.append({'ext': ext, if rtmp_video_url.endswith('siteunavail.png'):
'url': self._transform_rtmp_url(rtmp_video_url), continue
'format_id': rendition.get('bitrate'), formats.append({
'width': int(rendition.get('width')), 'ext': ext,
'height': int(rendition.get('height')), 'url': self._transform_rtmp_url(rtmp_video_url),
}) 'format_id': rendition.get('bitrate'),
'width': int(rendition.get('width')),
'height': int(rendition.get('height')),
})
except (KeyError, TypeError): except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.') raise ExtractorError('Invalid rendition field.')
self._sort_formats(formats) self._sort_formats(formats)
return formats return formats
def _extract_subtitles(self, mdoc, mtvn_id):
subtitles = {}
FORMATS = {
'scc': 'cea-608',
'eia-608': 'cea-608',
'xml': 'ttml',
}
subtitles_format = FORMATS.get(
self._downloader.params.get('subtitlesformat'), 'ttml')
for transcript in mdoc.findall('.//transcript'):
if transcript.get('kind') != 'captions':
continue
lang = transcript.get('srclang')
for typographic in transcript.findall('./typographic'):
captions_format = typographic.get('format')
if captions_format == subtitles_format:
subtitles[lang] = compat_str(typographic.get('src'))
break
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(mtvn_id, subtitles)
return self.extract_subtitles(mtvn_id, subtitles)
def _get_video_info(self, itemdoc): def _get_video_info(self, itemdoc):
uri = itemdoc.find('guid').text uri = itemdoc.find('guid').text
video_id = self._id_from_uri(uri) video_id = self._id_from_uri(uri)
@@ -135,6 +161,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return { return {
'title': title, 'title': title,
'formats': self._extract_video_formats(mediagen_doc, mtvn_id), 'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
'id': video_id, 'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc), 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description, 'description': description,
@@ -167,7 +194,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
mgid = self._search_regex( mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, 'mgid') webpage, 'mgid')
return self._get_videos_info(mgid)
videos_info = self._get_videos_info(mgid)
if self._downloader.params.get('listsubtitles', False):
return
return videos_info
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
@@ -212,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
'file': '853555.mp4',
'md5': '850f3f143316b1e71fa56a4edfd6e0f8', 'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
'info_dict': { 'info_dict': {
'id': '853555',
'ext': 'mp4',
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
}, },
}, },
{
'add_ie': ['Vevo'],
'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
'file': 'USCJY1331283.mp4',
'md5': '73b4e7fcadd88929292fe52c3ced8caf',
'info_dict': {
'title': 'Everything Has Changed',
'upload_date': '20130606',
'uploader': 'Taylor Swift',
},
'skip': 'VEVO is only available in some countries',
},
] ]
def _get_thumbnail_url(self, uri, itemdoc): def _get_thumbnail_url(self, uri, itemdoc):
@@ -244,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# Some videos come from Vevo.com # Some videos come from Vevo.com
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', m_vevo = re.search(
webpage, re.DOTALL) r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
if m_vevo: if m_vevo:
vevo_id = m_vevo.group(1) vevo_id = m_vevo.group(1)
self.to_screen('Vevo video detected: %s' % vevo_id) self.to_screen('Vevo video detected: %s' % vevo_id)

View File

@@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json', 'url': 'http://www.nerdcubed.co.uk/feed.json',
'info_dict': { 'info_dict': {
'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed', 'title': 'nerdcubed.co.uk feed',
}, },
'playlist_mincount': 1300, 'playlist_mincount': 1300,

View File

@@ -20,6 +20,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
def _fix_json(json_string): def _fix_json(json_string):
return json_string.replace('\\\'', '\'') return json_string.replace('\\\'', '\'')
def _real_extract_video(self, video_id):
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
data = self._download_json(
json_url, video_id, transform_source=self._fix_json)
return self._extract_video(data[0])
def _extract_video(self, info): def _extract_video(self, info):
video_id = info['id'] video_id = info['id']
self.report_extraction(video_id) self.report_extraction(video_id)
@@ -54,7 +60,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
class NHLIE(NHLBaseInfoExtractor): class NHLIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com' IE_NAME = 'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)' _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@@ -92,15 +98,41 @@ class NHLIE(NHLBaseInfoExtractor):
}, { }, {
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616', 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://video.nhl.com/videocenter/?id=736722',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') return self._real_extract_video(video_id)
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
data = self._download_json(
json_url, video_id, transform_source=self._fix_json) class NHLNewsIE(NHLBaseInfoExtractor):
return self._extract_video(data[0]) IE_NAME = 'nhl.com:news'
IE_DESC = 'NHL news'
_VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_TEST = {
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
'md5': '4b3d1262e177687a3009937bd9ec0be8',
'info_dict': {
'id': '736722',
'ext': 'mp4',
'title': 'Cal Clutterbuck has been fined $2,000',
'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
'duration': 37,
'upload_date': '20150128',
},
}
def _real_extract(self, url):
news_id = self._match_id(url)
webpage = self._download_webpage(url, news_id)
video_id = self._search_regex(
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
webpage, 'video id')
return self._real_extract_video(video_id)
class NHLVideocenterIE(NHLBaseInfoExtractor): class NHLVideocenterIE(NHLBaseInfoExtractor):

View File

@@ -0,0 +1,68 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
parse_duration,
)
class NTVDeIE(InfoExtractor):
IE_NAME = 'n-tv.de'
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
_TESTS = [{
'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
'info_dict': {
'id': '14438086',
'ext': 'mp4',
'thumbnail': 're:^https?://.*\.jpg$',
'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
'alt_title': 'Winterchaos auf deutschen Straßen',
'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
'duration': 4020,
'timestamp': 1422892797,
'upload_date': '20150202',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = self._parse_json(self._search_regex(
r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
video_id, transform_source=js_to_json)
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
vdata = self._parse_json(self._search_regex(
r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
webpage, 'player data'),
video_id, transform_source=js_to_json)
duration = parse_duration(vdata.get('duration'))
formats = [{
'format_id': 'flash',
'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
}, {
'format_id': 'mobile',
'url': 'http://video.n-tv.de' + vdata['videoMp4'],
'tbr': 400, # estimation
}]
m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4',
entry_protocol='m3u8_native', preference=0))
self._sort_formats(formats)
return {
'id': video_id,
'title': info['headline'],
'description': info.get('intro'),
'alt_title': info.get('kicker'),
'timestamp': timestamp,
'thumbnail': vdata.get('html5VideoPoster'),
'duration': duration,
'formats': formats,
}

View File

@@ -1,15 +1,14 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unescapeHTML unescapeHTML
) )
class NTVIE(InfoExtractor): class NTVRuIE(InfoExtractor):
IE_NAME = 'ntv.ru'
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)' _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
_TESTS = [ _TESTS = [
@@ -92,9 +91,7 @@ class NTVIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id) page = self._download_webpage(url, video_id)
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id') video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')

View File

@@ -6,12 +6,13 @@ from .common import InfoExtractor
class RingTVIE(InfoExtractor): class RingTVIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
_TEST = { _TEST = {
"url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30", "url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
"file": "857645.mp4",
"md5": "d25945f5df41cdca2d2587165ac28720", "md5": "d25945f5df41cdca2d2587165ac28720",
"info_dict": { "info_dict": {
'id': '857645',
'ext': 'mp4',
"title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV', "title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
"description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.', "description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
} }

View File

@@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE):
_TEST = { _TEST = {
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
'file': '613340.mp4',
'info_dict': { 'info_dict': {
'id': '613340',
'ext': 'mp4',
'title': 'TOY STORY 3', 'title': 'TOY STORY 3',
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
}, },

View File

@@ -57,7 +57,7 @@ def _decrypt_url(png):
class RTVEALaCartaIE(InfoExtractor): class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta' IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta' IE_DESC = 'RTVE a la carta'
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' _VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
@@ -74,7 +74,11 @@ class RTVEALaCartaIE(InfoExtractor):
'id': '1694255', 'id': '1694255',
'ext': 'flv', 'ext': 'flv',
'title': 'TODO', 'title': 'TODO',
} },
'skip': 'The f4m manifest can\'t be used yet',
}, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@@ -86,6 +90,18 @@ class RTVEALaCartaIE(InfoExtractor):
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
png = self._download_webpage(png_url, video_id, 'Downloading url information') png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = _decrypt_url(png) video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
auth_url = video_url.replace(
'resources/', 'auth/resources/'
).replace('.net.rtve', '.multimedia.cdn.rtve')
video_path = self._download_webpage(
auth_url, video_id, 'Getting video url')
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
# the right Content-Length header and the mp4 format
video_url = (
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
)
return { return {
'id': video_id, 'id': video_id,

View File

@@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor):
'vbr': int(quality), 'vbr': int(quality),
} }
elif transport == 'm3u8': elif transport == 'm3u8':
fmt = { formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
'url': url, continue
'ext': 'mp4',
}
else: else:
fmt = { fmt = {
'url': url 'url': url

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?', 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
'info_dict': {
'id': '5349193',
'title': 'AdAPPter_Hyundai_demo',
},
'playlist': [{ 'playlist': [{
'file': '29955898.flv',
'md5': 'baed851342df6846eb8677a60a011a0f', 'md5': 'baed851342df6846eb8677a60a011a0f',
'info_dict': { 'info_dict': {
'id': '29955898',
'ext': 'flv',
'title': 'AdAPPter_Hyundai_demo (1)', 'title': 'AdAPPter_Hyundai_demo (1)',
'duration': 74, 'duration': 74,
'tbr': 1378, 'tbr': 1378,
@@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor):
'height': 400, 'height': 400,
}, },
}, { }, {
'file': '29907998.flv',
'md5': '979b4da2655c4bc2d81aeb915a8c5014', 'md5': '979b4da2655c4bc2d81aeb915a8c5014',
'info_dict': { 'info_dict': {
'id': '29907998',
'ext': 'flv',
'title': 'AdAPPter_Hyundai_demo (2)', 'title': 'AdAPPter_Hyundai_demo (2)',
'duration': 34, 'duration': 34,
'width': 854, 'width': 854,
@@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor):
'params': { 'params': {
'playlistend': 2, 'playlistend': 2,
}, },
'skip': 'Blocked in the US [sic]', '_skip': 'Blocked in the US [sic]',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) pl_id = self._match_id(url)
pl_id = mobj.group('id')
vast_doc = self._download_xml(url, pl_id) vast_doc = self._download_xml(url, pl_id)
title = vast_doc.find('.//AdTitle').text title = vast_doc.find('.//AdTitle').text
media = vast_doc.find('.//MediaFile').text media = vast_doc.find('.//MediaFile').text
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL') info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')

View File

@@ -11,7 +11,7 @@ from ..compat import (
class SinaIE(InfoExtractor): class SinaIE(InfoExtractor):
_VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/ _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
( (
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-)))) (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
| |
@@ -23,9 +23,10 @@ class SinaIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', 'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
'file': '110028898.flv',
'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f', 'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
'info_dict': { 'info_dict': {
'id': '110028898',
'ext': 'flv',
'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员', 'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
} }
}, },
@@ -39,10 +40,6 @@ class SinaIE(InfoExtractor):
}, },
] ]
@classmethod
def suitable(cls, url):
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
def _extract_video(self, video_id): def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id}) data = compat_urllib_parse.urlencode({'vid': video_id})
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
@@ -59,7 +56,7 @@ class SinaIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
if mobj.group('token') is not None: if mobj.group('token') is not None:
# The video id is in the redirected url # The video id is in the redirected url

View File

@@ -108,7 +108,7 @@ class SmotriIE(InfoExtractor):
# swf player # swf player
{ {
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500', 'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
'md5': '4d47034979d9390d14acdf59c4935bc2', 'md5': '31099eeb4bc906712c5f40092045108d',
'info_dict': { 'info_dict': {
'id': 'v9188090500', 'id': 'v9188090500',
'ext': 'mp4', 'ext': 'mp4',
@@ -139,9 +139,6 @@ class SmotriIE(InfoExtractor):
def _search_meta(self, name, html, display_name=None): def _search_meta(self, name, html, display_name=None):
if display_name is None: if display_name is None:
display_name = name display_name = name
return self._html_search_regex(
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
html, display_name, fatal=False)
return self._html_search_meta(name, html, display_name) return self._html_search_meta(name, html, display_name)
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE):
_TESTS = [{ _TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
'info_dict': { 'info_dict': {
'id': '2284613',
'title': 'The Royal Concept EP', 'title': 'The Royal Concept EP',
}, },
'playlist_mincount': 6, 'playlist_mincount': 6,
@@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE):
return { return {
'_type': 'playlist', '_type': 'playlist',
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']], 'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
'id': info['id'], 'id': '%s' % info['id'],
'title': info['title'], 'title': info['title'],
} }

View File

@@ -1,14 +1,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
class SpikeIE(MTVServicesInfoExtractor): class SpikeIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?:// _VALID_URL = r'''(?x)https?://
(www\.spike\.com/(video-clips|episodes)/.+| (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+)) m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
''' '''
_TEST = { _TEST = {
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', 'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
@@ -25,8 +23,7 @@ class SpikeIE(MTVServicesInfoExtractor):
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url) mobile_id = self._match_id(url)
mobile_id = mobj.group('mobile_id') if mobile_id:
if mobile_id is not None:
url = 'http://www.spike.com/video-clips/%s' % mobile_id url = 'http://www.spike.com/video-clips/%s' % mobile_id
return super(SpikeIE, self)._real_extract(url) return super(SpikeIE, self)._real_extract(url)

View File

@@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
'file': '80187.mp4',
'md5': '3f7746aa0dc86de18df7539903d399ea', 'md5': '3f7746aa0dc86de18df7539903d399ea',
'info_dict': { 'info_dict': {
'id': '80187',
'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant', 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
} }
}, { }, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
'info_dict': { 'info_dict': {
'id': '19705',
'ext': 'mp4',
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
"title": "Louis C.K. Interview Pt. 1 11/3/11" "title": "Louis C.K. Interview Pt. 1 11/3/11"
} }
@@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor):
video_id = mobj.group("video_id") video_id = mobj.group("video_id")
if not video_id: if not video_id:
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'data-node-id="(\d+?)"', r'<div\s+class="player".*?data-id="(\d+?)"',
webpage, 'video id') webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id

View File

@@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.tele-task.de/archive/video/html5/26168/', 'url': 'http://www.tele-task.de/archive/video/html5/26168/',
'info_dict': { 'info_dict': {
'id': '26168',
'title': 'Duplicate Detection', 'title': 'Duplicate Detection',
}, },
'playlist': [{ 'playlist': [{
@@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
lecture_id = self._match_id(url) lecture_id = self._match_id(url)
webpage = self._download_webpage(url, lecture_id) webpage = self._download_webpage(url, lecture_id)
title = self._html_search_regex( title = self._html_search_regex(

View File

@@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.tou.tv/30-vies/S04E41', 'url': 'http://www.tou.tv/30-vies/S04E41',
'file': '30-vies_S04E41.mp4',
'info_dict': { 'info_dict': {
'id': '30-vies_S04E41',
'ext': 'mp4',
'title': '30 vies Saison 4 / Épisode 41', 'title': '30 vies Saison 4 / Épisode 41',
'description': 'md5:da363002db82ccbe4dafeb9cab039b09', 'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
'age_limit': 8, 'age_limit': 8,

View File

@@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor):
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)' _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'file': '75524534.mp4',
'md5': 'c507a72f780cacc12b2248bb4006d253', 'md5': 'c507a72f780cacc12b2248bb4006d253',
'info_dict': { 'info_dict': {
'id': '75524534',
'ext': 'mp4',
'title': "DICK HARDWICK 'Comedian'", 'title': "DICK HARDWICK 'Comedian'",
'uploader': 'Richard Hardwick', 'uploader': 'Richard Hardwick',
} }

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@@ -11,9 +12,10 @@ from ..utils import (
class WashingtonPostIE(InfoExtractor): class WashingtonPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
_TEST = { _TESTS = [{
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/', 'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
'info_dict': { 'info_dict': {
'id': 'sinkhole-of-bureaucracy',
'title': 'Sinkhole of bureaucracy', 'title': 'Sinkhole of bureaucracy',
}, },
'playlist': [{ 'playlist': [{
@@ -40,15 +42,38 @@ class WashingtonPostIE(InfoExtractor):
'upload_date': '20140322', 'upload_date': '20140322',
'uploader': 'The Washington Post', 'uploader': 'The Washington Post',
}, },
}],
}, {
'url': 'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/',
'info_dict': {
'id': 'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear',
'title': 'One airline figured out how to make sure its airplanes never disappear',
},
'playlist': [{
'md5': 'a7c1b5634ba5e57a6a82cdffa5b1e0d0',
'info_dict': {
'id': '0e4bb54c-9065-11e4-a66f-0ca5037a597d',
'ext': 'mp4',
'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
'upload_date': '20141230',
'uploader': 'The Washington Post',
'timestamp': 1419974765,
'title': 'Why black boxes dont transmit data in real time',
}
}] }]
} }]
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
uuids = re.findall(r'''(?x)
(?:
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
data-video-uuid=
)"([^"]+)"''', webpage)
entries = [] entries = []
for i, uuid in enumerate(uuids, start=1): for i, uuid in enumerate(uuids, start=1):
vinfo_all = self._download_json( vinfo_all = self._download_json(
@@ -75,10 +100,11 @@ class WashingtonPostIE(InfoExtractor):
'filesize': s.get('fileSize'), 'filesize': s.get('fileSize'),
'url': s.get('url'), 'url': s.get('url'),
'ext': 'mp4', 'ext': 'mp4',
'preference': -100 if s.get('type') == 'smil' else None,
'protocol': { 'protocol': {
'MP4': 'http', 'MP4': 'http',
'F4F': 'f4m', 'F4F': 'f4m',
}.get(s.get('type')) }.get(s.get('type')),
} for s in vinfo.get('streams', [])] } for s in vinfo.get('streams', [])]
source_media_url = vinfo.get('sourceMediaURL') source_media_url = vinfo.get('sourceMediaURL')
if source_media_url: if source_media_url:

View File

@@ -71,6 +71,9 @@ class WDRIE(InfoExtractor):
{ {
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
'playlist_mincount': 146, 'playlist_mincount': 146,
'info_dict': {
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
}
} }
] ]

View File

@@ -809,6 +809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
player_url = None player_url = None
# Get video info # Get video info
embed_webpage = None
if re.search(r'player-age-gate-content">', video_webpage) is not None: if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id} # We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -1016,10 +1017,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url += '&signature=' + url_data['sig'][0] url += '&signature=' + url_data['sig'][0]
elif 's' in url_data: elif 's' in url_data:
encrypted_sig = url_data['s'][0] encrypted_sig = url_data['s'][0]
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
jsplayer_url_json = self._search_regex( jsplayer_url_json = self._search_regex(
r'"assets":.+?"js":\s*("[^"]+")', ASSETS_RE,
embed_webpage if age_gate else video_webpage, 'JS player URL') embed_webpage if age_gate else video_webpage,
'JS player URL (1)', default=None)
if not jsplayer_url_json and not age_gate:
# We need the embed website after all
if embed_webpage is None:
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
embed_webpage = self._download_webpage(
embed_url, video_id, 'Downloading embed webpage')
jsplayer_url_json = self._search_regex(
ASSETS_RE, embed_webpage, 'JS player URL')
player_url = json.loads(jsplayer_url_json) player_url = json.loads(jsplayer_url_json)
if player_url is None: if player_url is None:
player_url_json = self._search_regex( player_url_json = self._search_regex(
@@ -1148,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
'info_dict': { 'info_dict': {
'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
'title': 'YDL_Empty_List', 'title': 'YDL_Empty_List',
}, },
'playlist_count': 0, 'playlist_count': 0,
@@ -1156,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': { 'info_dict': {
'title': '29C3: Not my department', 'title': '29C3: Not my department',
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
}, },
'playlist_count': 95, 'playlist_count': 95,
}, { }, {
@@ -1163,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'PLBB231211A4F62143', 'url': 'PLBB231211A4F62143',
'info_dict': { 'info_dict': {
'title': '[OLD]Team Fortress 2 (Class-based LP)', 'title': '[OLD]Team Fortress 2 (Class-based LP)',
'id': 'PLBB231211A4F62143',
}, },
'playlist_mincount': 26, 'playlist_mincount': 26,
}, { }, {
@@ -1170,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
'info_dict': { 'info_dict': {
'title': 'Uploads from Cauchemar', 'title': 'Uploads from Cauchemar',
'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
}, },
'playlist_mincount': 799, 'playlist_mincount': 799,
}, { }, {
'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
'info_dict': { 'info_dict': {
'title': 'YDL_safe_search', 'title': 'YDL_safe_search',
'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
}, },
'playlist_count': 2, 'playlist_count': 2,
}, { }, {
@@ -1184,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 4, 'playlist_count': 4,
'info_dict': { 'info_dict': {
'title': 'JODA15', 'title': 'JODA15',
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
} }
}, { }, {
'note': 'Embedded SWF player', 'note': 'Embedded SWF player',
@@ -1191,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 4, 'playlist_count': 4,
'info_dict': { 'info_dict': {
'title': 'JODA7', 'title': 'JODA7',
'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
} }
}, { }, {
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
'info_dict': { 'info_dict': {
'title': 'Uploads from Interstellar Movie', 'title': 'Uploads from Interstellar Movie',
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
}, },
'playlist_mincout': 21, 'playlist_mincout': 21,
}] }]
@@ -1302,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor):
'note': 'paginated channel', 'note': 'paginated channel',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'playlist_mincount': 91, 'playlist_mincount': 91,
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
}
}] }]
def extract_videos_from_page(self, page): def extract_videos_from_page(self, page):

View File

@@ -1,59 +1,122 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import operator
import re import re
from .utils import ( from .utils import (
ExtractorError, ExtractorError,
) )
_OPERATORS = [
('|', operator.or_),
('^', operator.xor),
('&', operator.and_),
('>>', operator.rshift),
('<<', operator.lshift),
('-', operator.sub),
('+', operator.add),
('%', operator.mod),
('/', operator.truediv),
('*', operator.mul),
]
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
class JSInterpreter(object): class JSInterpreter(object):
def __init__(self, code): def __init__(self, code, objects=None):
self.code = code if objects is None:
objects = {}
self.code = self._remove_comments(code)
self._functions = {} self._functions = {}
self._objects = {} self._objects = objects
def interpret_statement(self, stmt, local_vars, allow_recursion=20): def _remove_comments(self, code):
return re.sub(r'(?s)/\*.*?\*/', '', code)
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0: if allow_recursion < 0:
raise ExtractorError('Recursion limit reached') raise ExtractorError('Recursion limit reached')
if stmt.startswith('var '): should_abort = False
stmt = stmt[len('var '):] stmt = stmt.lstrip()
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + stmt_m = re.match(r'var\s', stmt)
r'=(?P<expr>.*)$', stmt) if stmt_m:
if ass_m: expr = stmt[len(stmt_m.group(0)):]
if ass_m.groupdict().get('index'):
def assign(val):
lvar = local_vars[ass_m.group('out')]
idx = self.interpret_expression(
ass_m.group('index'), local_vars, allow_recursion)
assert isinstance(idx, int)
lvar[idx] = val
return val
expr = ass_m.group('expr')
else:
def assign(val):
local_vars[ass_m.group('out')] = val
return val
expr = ass_m.group('expr')
elif stmt.startswith('return '):
assign = lambda v: v
expr = stmt[len('return '):]
else: else:
# Try interpreting it as an expression return_m = re.match(r'return(?:\s+|$)', stmt)
expr = stmt if return_m:
assign = lambda v: v expr = stmt[len(return_m.group(0)):]
should_abort = True
else:
# Try interpreting it as an expression
expr = stmt
v = self.interpret_expression(expr, local_vars, allow_recursion) v = self.interpret_expression(expr, local_vars, allow_recursion)
return assign(v) return v, should_abort
def interpret_expression(self, expr, local_vars, allow_recursion): def interpret_expression(self, expr, local_vars, allow_recursion):
expr = expr.strip()
if expr == '': # Empty expression
return None
if expr.startswith('('):
parens_count = 0
for m in re.finditer(r'[()]', expr):
if m.group(0) == '(':
parens_count += 1
else:
parens_count -= 1
if parens_count == 0:
sub_expr = expr[1:m.start()]
sub_result = self.interpret_expression(
sub_expr, local_vars, allow_recursion)
remaining_expr = expr[m.end():].strip()
if not remaining_expr:
return sub_result
else:
expr = json.dumps(sub_result) + remaining_expr
break
else:
raise ExtractorError('Premature end of parens in %r' % expr)
for op, opfunc in _ASSIGN_OPERATORS:
m = re.match(r'''(?x)
(?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
\s*%s
(?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
if not m:
continue
right_val = self.interpret_expression(
m.group('expr'), local_vars, allow_recursion - 1)
if m.groupdict().get('index'):
lvar = local_vars[m.group('out')]
idx = self.interpret_expression(
m.group('index'), local_vars, allow_recursion)
assert isinstance(idx, int)
cur = lvar[idx]
val = opfunc(cur, right_val)
lvar[idx] = val
return val
else:
cur = local_vars.get(m.group('out'))
val = opfunc(cur, right_val)
local_vars[m.group('out')] = val
return val
if expr.isdigit(): if expr.isdigit():
return int(expr) return int(expr)
if expr.isalpha(): var_m = re.match(
return local_vars[expr] r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
expr)
if var_m:
return local_vars[var_m.group('name')]
try: try:
return json.loads(expr) return json.loads(expr)
@@ -61,7 +124,7 @@ class JSInterpreter(object):
pass pass
m = re.match( m = re.match(
r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$', r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
expr) expr)
if m: if m:
variable = m.group('var') variable = m.group('var')
@@ -114,23 +177,31 @@ class JSInterpreter(object):
return obj[member](argvals) return obj[member](argvals)
m = re.match( m = re.match(
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr) r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
if m: if m:
val = local_vars[m.group('in')] val = local_vars[m.group('in')]
idx = self.interpret_expression( idx = self.interpret_expression(
m.group('idx'), local_vars, allow_recursion - 1) m.group('idx'), local_vars, allow_recursion - 1)
return val[idx] return val[idx]
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr) for op, opfunc in _OPERATORS:
if m: m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
a = self.interpret_expression( if not m:
m.group('a'), local_vars, allow_recursion) continue
b = self.interpret_expression( x, abort = self.interpret_statement(
m.group('b'), local_vars, allow_recursion) m.group('x'), local_vars, allow_recursion - 1)
return a % b if abort:
raise ExtractorError(
'Premature left-side return of %s in %r' % (op, expr))
y, abort = self.interpret_statement(
m.group('y'), local_vars, allow_recursion - 1)
if abort:
raise ExtractorError(
'Premature right-side return of %s in %r' % (op, expr))
return opfunc(x, y)
m = re.match( m = re.match(
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr) r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
if m: if m:
fname = m.group('func') fname = m.group('func')
argvals = tuple([ argvals = tuple([
@@ -139,6 +210,7 @@ class JSInterpreter(object):
if fname not in self._functions: if fname not in self._functions:
self._functions[fname] = self.extract_function(fname) self._functions[fname] = self.extract_function(fname)
return self._functions[fname](argvals) return self._functions[fname](argvals)
raise ExtractorError('Unsupported JS expression %r' % expr) raise ExtractorError('Unsupported JS expression %r' % expr)
def extract_object(self, objname): def extract_object(self, objname):
@@ -162,9 +234,11 @@ class JSInterpreter(object):
def extract_function(self, funcname): def extract_function(self, funcname):
func_m = re.search( func_m = re.search(
(r'(?:function %s|[{;]%s\s*=\s*function)' % ( r'''(?x)
re.escape(funcname), re.escape(funcname))) + (?:function\s+%s|[{;]%s\s*=\s*function)\s*
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', \((?P<args>[^)]*)\)\s*
\{(?P<code>[^}]+)\}''' % (
re.escape(funcname), re.escape(funcname)),
self.code) self.code)
if func_m is None: if func_m is None:
raise ExtractorError('Could not find JS function %r' % funcname) raise ExtractorError('Could not find JS function %r' % funcname)
@@ -172,10 +246,16 @@ class JSInterpreter(object):
return self.build_function(argnames, func_m.group('code')) return self.build_function(argnames, func_m.group('code'))
def call_function(self, funcname, *args):
f = self.extract_function(funcname)
return f(args)
def build_function(self, argnames, code): def build_function(self, argnames, code):
def resf(args): def resf(args):
local_vars = dict(zip(argnames, args)) local_vars = dict(zip(argnames, args))
for stmt in code.split(';'): for stmt in code.split(';'):
res = self.interpret_statement(stmt, local_vars) res, abort = self.interpret_statement(stmt, local_vars)
if abort:
break
return res return res
return resf return resf

View File

@@ -698,10 +698,9 @@ def parseOpts(overrideArguments=None):
postproc.add_option( postproc.add_option(
'--fixup', '--fixup',
metavar='POLICY', dest='fixup', default='detect_or_warn', metavar='POLICY', dest='fixup', default='detect_or_warn',
help='(experimental) Automatically correct known faults of the file. ' help='Automatically correct known faults of the file. '
'One of never (do nothing), warn (only emit a warning), ' 'One of never (do nothing), warn (only emit a warning), '
'detect_or_warn(check whether we can do anything about it, warn ' 'detect_or_warn(the default; fix file if we can, warn otherwise)')
'otherwise')
postproc.add_option( postproc.add_option(
'--prefer-avconv', '--prefer-avconv',
action='store_false', dest='prefer_ffmpeg', action='store_false', dest='prefer_ffmpeg',

View File

@@ -32,6 +32,7 @@ import xml.etree.ElementTree
import zlib import zlib
from .compat import ( from .compat import (
compat_basestring,
compat_chr, compat_chr,
compat_getenv, compat_getenv,
compat_html_entities, compat_html_entities,
@@ -140,7 +141,7 @@ else:
def find_xpath_attr(node, xpath, key, val): def find_xpath_attr(node, xpath, key, val):
# Here comes the crazy part: In 2.6, if the xpath is a unicode, # Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . ! # .//node does not match if a node is a direct child of . !
if isinstance(xpath, unicode): if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii') xpath = xpath.encode('ascii')
for f in node.findall(xpath): for f in node.findall(xpath):
@@ -1262,7 +1263,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
def parse_duration(s): def parse_duration(s):
if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str): if not isinstance(s, compat_basestring):
return None return None
s = s.strip() s = s.strip()
@@ -1274,7 +1275,10 @@ def parse_duration(s):
(?P<only_hours>[0-9.]+)\s*(?:hours?)| (?P<only_hours>[0-9.]+)\s*(?:hours?)|
(?: (?:
(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)? (?:
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
)?
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s* (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
)? )?
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)? (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
@@ -1292,6 +1296,8 @@ def parse_duration(s):
res += int(m.group('mins')) * 60 res += int(m.group('mins')) * 60
if m.group('hours'): if m.group('hours'):
res += int(m.group('hours')) * 60 * 60 res += int(m.group('hours')) * 60 * 60
if m.group('days'):
res += int(m.group('days')) * 24 * 60 * 60
if m.group('ms'): if m.group('ms'):
res += float(m.group('ms')) res += float(m.group('ms'))
return res return res
@@ -1426,7 +1432,7 @@ def uppercase_escape(s):
def escape_rfc3986(s): def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986""" """Escape non-ASCII characters as suggested by RFC 3986"""
if sys.version_info < (3, 0) and isinstance(s, unicode): if sys.version_info < (3, 0) and isinstance(s, compat_str):
s = s.encode('utf-8') s = s.encode('utf-8')
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
@@ -1542,7 +1548,7 @@ def js_to_json(code):
res = re.sub(r'''(?x) res = re.sub(r'''(?x)
"(?:[^"\\]*(?:\\\\|\\")?)*"| "(?:[^"\\]*(?:\\\\|\\")?)*"|
'(?:[^'\\]*(?:\\\\|\\')?)*'| '(?:[^'\\]*(?:\\\\|\\')?)*'|
[a-zA-Z_][a-zA-Z_0-9]* [a-zA-Z_][.a-zA-Z_0-9]*
''', fix_kv, code) ''', fix_kv, code)
res = re.sub(r',(\s*\])', lambda m: m.group(1), res) res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
return res return res

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.01.30.1' __version__ = '2015.02.02.2'