Compare commits
81 Commits
2015.03.15
...
2015.03.24
Author | SHA1 | Date | |
---|---|---|---|
![]() |
48c971e073 | ||
![]() |
f5e2efbbf0 | ||
![]() |
b0872c19ea | ||
![]() |
9f790b9901 | ||
![]() |
93f787070f | ||
![]() |
f9544f6e8f | ||
![]() |
336d19044c | ||
![]() |
7866c9e173 | ||
![]() |
1a4123de04 | ||
![]() |
cf2e2eb1c0 | ||
![]() |
2051acdeb2 | ||
![]() |
cefdf970cc | ||
![]() |
a1d0aa7b88 | ||
![]() |
49aeedb8cb | ||
![]() |
ef249a2cd7 | ||
![]() |
a09141548a | ||
![]() |
5379a2d40d | ||
![]() |
c9450c7ab1 | ||
![]() |
faa1b5c292 | ||
![]() |
393d9fc6d2 | ||
![]() |
4e6a228689 | ||
![]() |
179d6678b1 | ||
![]() |
85698c5086 | ||
![]() |
a7d9ded45d | ||
![]() |
531980d89c | ||
![]() |
1887ecd4d6 | ||
![]() |
cd32c2caba | ||
![]() |
1c9a1457fc | ||
![]() |
038b0eb1da | ||
![]() |
f20bf146e2 | ||
![]() |
01218f919b | ||
![]() |
2684871bc1 | ||
![]() |
ccf3960eec | ||
![]() |
eecc0685c9 | ||
![]() |
2ed849eccf | ||
![]() |
3378d67a18 | ||
![]() |
f3c0c667a6 | ||
![]() |
0ae8bbac2d | ||
![]() |
cbc3cfcab4 | ||
![]() |
b30ef07c6c | ||
![]() |
73900846b1 | ||
![]() |
d1dc7e3991 | ||
![]() |
3073a6d5e9 | ||
![]() |
aae53774f2 | ||
![]() |
7a757b7194 | ||
![]() |
fa8ce26904 | ||
![]() |
2c2c06e359 | ||
![]() |
ee580538fa | ||
![]() |
c3c5c31517 | ||
![]() |
ed9a25dd61 | ||
![]() |
9ef4f12b53 | ||
![]() |
84f8101606 | ||
![]() |
b1337948eb | ||
![]() |
98f02fdde2 | ||
![]() |
048fdc2292 | ||
![]() |
2ca1c5aa9f | ||
![]() |
674fb0fcc5 | ||
![]() |
00bfe40e4d | ||
![]() |
cd459b1d49 | ||
![]() |
92a4793b3c | ||
![]() |
dc03a42537 | ||
![]() |
219da6bb68 | ||
![]() |
0499cd866e | ||
![]() |
13047f4135 | ||
![]() |
af69cab21d | ||
![]() |
d41a3fa1b4 | ||
![]() |
733be371af | ||
![]() |
576904bce6 | ||
![]() |
cf47794f09 | ||
![]() |
c06a9f8730 | ||
![]() |
2e90dff2c2 | ||
![]() |
90183a46d8 | ||
![]() |
b68eedba23 | ||
![]() |
d5b559393b | ||
![]() |
2cb434e53e | ||
![]() |
cd65491c30 | ||
![]() |
a172d96292 | ||
![]() |
55969016e9 | ||
![]() |
5c7495a194 | ||
![]() |
5ee6fc974e | ||
![]() |
63fc800057 |
2
AUTHORS
2
AUTHORS
@@ -115,3 +115,5 @@ Leslie P. Polzer
|
|||||||
Duncan Keall
|
Duncan Keall
|
||||||
Alexander Mamay
|
Alexander Mamay
|
||||||
Devin J. Pohly
|
Devin J. Pohly
|
||||||
|
Eduardo Ferro Aldama
|
||||||
|
Jeff Buchbinder
|
||||||
|
42
devscripts/generate_aes_testdata.py
Normal file
42
devscripts/generate_aes_testdata.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.utils import intlist_to_bytes
|
||||||
|
from youtube_dl.aes import aes_encrypt, key_expansion
|
||||||
|
|
||||||
|
secret_msg = b'Secret message goes here'
|
||||||
|
|
||||||
|
|
||||||
|
def hex_str(int_list):
|
||||||
|
return codecs.encode(intlist_to_bytes(int_list), 'hex')
|
||||||
|
|
||||||
|
|
||||||
|
def openssl_encode(algo, key, iv):
|
||||||
|
cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
|
||||||
|
prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||||
|
out, _ = prog.communicate(secret_msg)
|
||||||
|
return out
|
||||||
|
|
||||||
|
iv = key = [0x20, 0x15] + 14 * [0]
|
||||||
|
|
||||||
|
r = openssl_encode('aes-128-cbc', key, iv)
|
||||||
|
print('aes_cbc_decrypt')
|
||||||
|
print(repr(r))
|
||||||
|
|
||||||
|
password = key
|
||||||
|
new_key = aes_encrypt(password, key_expansion(password))
|
||||||
|
r = openssl_encode('aes-128-ctr', new_key, iv)
|
||||||
|
print('aes_decrypt_text 16')
|
||||||
|
print(repr(r))
|
||||||
|
|
||||||
|
password = key + 16 * [0]
|
||||||
|
new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
|
||||||
|
r = openssl_encode('aes-256-ctr', new_key, iv)
|
||||||
|
print('aes_decrypt_text 32')
|
||||||
|
print(repr(r))
|
@@ -112,6 +112,7 @@
|
|||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **divxstage**: DivxStage
|
- **divxstage**: DivxStage
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
|
- **DouyuTV**
|
||||||
- **DRBonanza**
|
- **DRBonanza**
|
||||||
- **Dropbox**
|
- **Dropbox**
|
||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
@@ -230,6 +231,7 @@
|
|||||||
- **Letv**
|
- **Letv**
|
||||||
- **LetvPlaylist**
|
- **LetvPlaylist**
|
||||||
- **LetvTv**
|
- **LetvTv**
|
||||||
|
- **Libsyn**
|
||||||
- **lifenews**: LIFE | NEWS
|
- **lifenews**: LIFE | NEWS
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
@@ -309,6 +311,7 @@
|
|||||||
- **npo.nl:radio**
|
- **npo.nl:radio**
|
||||||
- **npo.nl:radio:fragment**
|
- **npo.nl:radio:fragment**
|
||||||
- **NRK**
|
- **NRK**
|
||||||
|
- **NRKPlaylist**
|
||||||
- **NRKTV**
|
- **NRKTV**
|
||||||
- **ntv.ru**
|
- **ntv.ru**
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
@@ -342,6 +345,7 @@
|
|||||||
- **PornHubPlaylist**
|
- **PornHubPlaylist**
|
||||||
- **Pornotube**
|
- **Pornotube**
|
||||||
- **PornoXO**
|
- **PornoXO**
|
||||||
|
- **PrimeShareTV**
|
||||||
- **PromptFile**
|
- **PromptFile**
|
||||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
- **Puls4**
|
- **Puls4**
|
||||||
@@ -367,6 +371,7 @@
|
|||||||
- **RTP**
|
- **RTP**
|
||||||
- **RTS**: RTS.ch
|
- **RTS**: RTS.ch
|
||||||
- **rtve.es:alacarta**: RTVE a la carta
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
|
- **rtve.es:infantil**: RTVE infantil
|
||||||
- **rtve.es:live**: RTVE.es live streams
|
- **rtve.es:live**: RTVE.es live streams
|
||||||
- **RUHD**
|
- **RUHD**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
@@ -487,6 +492,7 @@
|
|||||||
- **Ubu**
|
- **Ubu**
|
||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
|
- **Ultimedia**
|
||||||
- **Unistra**
|
- **Unistra**
|
||||||
- **Urort**: NRK P3 Urørt
|
- **Urort**: NRK P3 Urørt
|
||||||
- **ustream**
|
- **ustream**
|
||||||
|
@@ -14,6 +14,7 @@ from test.helper import FakeYDL, assertRegexpMatches
|
|||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.postprocessor.common import PostProcessor
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
|
from youtube_dl.utils import match_filter_func
|
||||||
|
|
||||||
TEST_URL = 'http://localhost/sample.mp4'
|
TEST_URL = 'http://localhost/sample.mp4'
|
||||||
|
|
||||||
@@ -339,6 +340,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'G')
|
self.assertEqual(downloaded['format_id'], 'G')
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeDL(unittest.TestCase):
|
||||||
def test_subtitles(self):
|
def test_subtitles(self):
|
||||||
def s_formats(lang, autocaption=False):
|
def s_formats(lang, autocaption=False):
|
||||||
return [{
|
return [{
|
||||||
@@ -461,6 +464,73 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||||
os.unlink(audiofile)
|
os.unlink(audiofile)
|
||||||
|
|
||||||
|
def test_match_filter(self):
|
||||||
|
class FilterYDL(YDL):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(FilterYDL, self).__init__(*args, **kwargs)
|
||||||
|
self.params['simulate'] = True
|
||||||
|
|
||||||
|
def process_info(self, info_dict):
|
||||||
|
super(YDL, self).process_info(info_dict)
|
||||||
|
|
||||||
|
def _match_entry(self, info_dict, incomplete):
|
||||||
|
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
|
||||||
|
if res is None:
|
||||||
|
self.downloaded_info_dicts.append(info_dict)
|
||||||
|
return res
|
||||||
|
|
||||||
|
first = {
|
||||||
|
'id': '1',
|
||||||
|
'url': TEST_URL,
|
||||||
|
'title': 'one',
|
||||||
|
'extractor': 'TEST',
|
||||||
|
'duration': 30,
|
||||||
|
'filesize': 10 * 1024,
|
||||||
|
}
|
||||||
|
second = {
|
||||||
|
'id': '2',
|
||||||
|
'url': TEST_URL,
|
||||||
|
'title': 'two',
|
||||||
|
'extractor': 'TEST',
|
||||||
|
'duration': 10,
|
||||||
|
'description': 'foo',
|
||||||
|
'filesize': 5 * 1024,
|
||||||
|
}
|
||||||
|
videos = [first, second]
|
||||||
|
|
||||||
|
def get_videos(filter_=None):
|
||||||
|
ydl = FilterYDL({'match_filter': filter_})
|
||||||
|
for v in videos:
|
||||||
|
ydl.process_ie_result(v, download=True)
|
||||||
|
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||||
|
|
||||||
|
res = get_videos()
|
||||||
|
self.assertEqual(res, ['1', '2'])
|
||||||
|
|
||||||
|
def f(v):
|
||||||
|
if v['id'] == '1':
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return 'Video id is not 1'
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
f = match_filter_func('duration < 30')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['2'])
|
||||||
|
|
||||||
|
f = match_filter_func('description = foo')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['2'])
|
||||||
|
|
||||||
|
f = match_filter_func('description =? foo')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1', '2'])
|
||||||
|
|
||||||
|
f = match_filter_func('filesize > 5KiB')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
55
test/test_aes.py
Normal file
55
test/test_aes.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
|
||||||
|
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
|
||||||
|
|
||||||
|
|
||||||
|
class TestAES(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.key = self.iv = [0x20, 0x15] + 14 * [0]
|
||||||
|
self.secret_msg = b'Secret message goes here'
|
||||||
|
|
||||||
|
def test_encrypt(self):
|
||||||
|
msg = b'message'
|
||||||
|
key = list(range(16))
|
||||||
|
encrypted = aes_encrypt(bytes_to_intlist(msg), key)
|
||||||
|
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
|
||||||
|
self.assertEqual(decrypted, msg)
|
||||||
|
|
||||||
|
def test_cbc_decrypt(self):
|
||||||
|
data = bytes_to_intlist(
|
||||||
|
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
|
||||||
|
)
|
||||||
|
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
|
||||||
|
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||||
|
|
||||||
|
def test_decrypt_text(self):
|
||||||
|
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||||
|
encrypted = base64.b64encode(
|
||||||
|
intlist_to_bytes(self.iv[:8]) +
|
||||||
|
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
|
||||||
|
)
|
||||||
|
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
||||||
|
self.assertEqual(decrypted, self.secret_msg)
|
||||||
|
|
||||||
|
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||||
|
encrypted = base64.b64encode(
|
||||||
|
intlist_to_bytes(self.iv[:8]) +
|
||||||
|
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
|
||||||
|
)
|
||||||
|
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||||
|
self.assertEqual(decrypted, self.secret_msg)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@@ -1,4 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
@@ -27,5 +29,12 @@ class TestExecution(unittest.TestCase):
|
|||||||
def test_main_exec(self):
|
def test_main_exec(self):
|
||||||
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
|
|
||||||
|
def test_cmdline_umlauts(self):
|
||||||
|
p = subprocess.Popen(
|
||||||
|
[sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'],
|
||||||
|
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||||
|
_, stderr = p.communicate()
|
||||||
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -8,7 +8,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server
|
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||||
import ssl
|
import ssl
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
@@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
|
|||||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_proxy_handler(name):
|
||||||
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
proxy_name = name
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
|
||||||
|
return HTTPTestRequestHandler
|
||||||
|
|
||||||
|
|
||||||
|
class TestProxy(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.proxy = compat_http_server.HTTPServer(
|
||||||
|
('localhost', 0), _build_proxy_handler('normal'))
|
||||||
|
self.port = self.proxy.socket.getsockname()[1]
|
||||||
|
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||||
|
self.proxy_thread.daemon = True
|
||||||
|
self.proxy_thread.start()
|
||||||
|
|
||||||
|
self.cn_proxy = compat_http_server.HTTPServer(
|
||||||
|
('localhost', 0), _build_proxy_handler('cn'))
|
||||||
|
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||||
|
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||||
|
self.cn_proxy_thread.daemon = True
|
||||||
|
self.cn_proxy_thread.start()
|
||||||
|
|
||||||
|
def test_proxy(self):
|
||||||
|
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
||||||
|
ydl = YoutubeDL({
|
||||||
|
'proxy': 'localhost:{0}'.format(self.port),
|
||||||
|
'cn_verification_proxy': cn_proxy,
|
||||||
|
})
|
||||||
|
url = 'http://foo.com/bar'
|
||||||
|
response = ydl.urlopen(url).read().decode('utf-8')
|
||||||
|
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Ytdl-request-proxy', cn_proxy)
|
||||||
|
response = ydl.urlopen(req).read().decode('utf-8')
|
||||||
|
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -24,6 +24,7 @@ from youtube_dl.utils import (
|
|||||||
encodeFilename,
|
encodeFilename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
escape_url,
|
escape_url,
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
@@ -39,6 +40,7 @@ from youtube_dl.utils import (
|
|||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_path,
|
sanitize_path,
|
||||||
|
sanitize_url_path_consecutive_slashes,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@@ -53,6 +55,7 @@ from youtube_dl.utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
xpath_text,
|
||||||
render_table,
|
render_table,
|
||||||
match_str,
|
match_str,
|
||||||
)
|
)
|
||||||
@@ -168,6 +171,26 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||||
|
|
||||||
|
def test_sanitize_url_path_consecutive_slashes(self):
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||||
|
'http://hostname/')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||||
|
'http://hostname/')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||||
|
'http://hostname/abc/')
|
||||||
|
|
||||||
def test_ordered_set(self):
|
def test_ordered_set(self):
|
||||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||||
self.assertEqual(orderedSet([]), [])
|
self.assertEqual(orderedSet([]), [])
|
||||||
@@ -229,6 +252,17 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||||
|
|
||||||
|
def test_xpath_text(self):
|
||||||
|
testxml = '''<root>
|
||||||
|
<div>
|
||||||
|
<p>Foo</p>
|
||||||
|
</div>
|
||||||
|
</root>'''
|
||||||
|
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||||
|
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||||
|
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||||
|
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
||||||
|
|
||||||
def test_smuggle_url(self):
|
def test_smuggle_url(self):
|
||||||
data = {"ö": "ö", "abc": [3]}
|
data = {"ö": "ö", "abc": [3]}
|
||||||
url = 'https://foo.bar/baz?x=y#a'
|
url = 'https://foo.bar/baz?x=y#a'
|
||||||
|
@@ -328,9 +328,6 @@ class YoutubeDL(object):
|
|||||||
'Parameter outtmpl is bytes, but should be a unicode string. '
|
'Parameter outtmpl is bytes, but should be a unicode string. '
|
||||||
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||||
|
|
||||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
|
||||||
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
|
||||||
|
|
||||||
self._setup_opener()
|
self._setup_opener()
|
||||||
|
|
||||||
if auto_init:
|
if auto_init:
|
||||||
@@ -1218,9 +1215,6 @@ class YoutubeDL(object):
|
|||||||
if len(info_dict['title']) > 200:
|
if len(info_dict['title']) > 200:
|
||||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||||
|
|
||||||
# Keep for backwards compatibility
|
|
||||||
info_dict['stitle'] = info_dict['title']
|
|
||||||
|
|
||||||
if 'format' not in info_dict:
|
if 'format' not in info_dict:
|
||||||
info_dict['format'] = info_dict['ext']
|
info_dict['format'] = info_dict['ext']
|
||||||
|
|
||||||
|
@@ -107,6 +107,7 @@ from .dctp import DctpTvIE
|
|||||||
from .deezer import DeezerPlaylistIE
|
from .deezer import DeezerPlaylistIE
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
|
from .douyutv import DouyuTVIE
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drbonanza import DRBonanzaIE
|
from .drbonanza import DRBonanzaIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
@@ -249,6 +250,7 @@ from .letv import (
|
|||||||
LetvTvIE,
|
LetvTvIE,
|
||||||
LetvPlaylistIE
|
LetvPlaylistIE
|
||||||
)
|
)
|
||||||
|
from .libsyn import LibsynIE
|
||||||
from .lifenews import LifeNewsIE
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
@@ -345,6 +347,7 @@ from .npo import (
|
|||||||
)
|
)
|
||||||
from .nrk import (
|
from .nrk import (
|
||||||
NRKIE,
|
NRKIE,
|
||||||
|
NRKPlaylistIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
)
|
)
|
||||||
from .ntvde import NTVDeIE
|
from .ntvde import NTVDeIE
|
||||||
@@ -380,6 +383,7 @@ from .pornhub import (
|
|||||||
)
|
)
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
|
from .primesharetv import PrimeShareTVIE
|
||||||
from .promptfile import PromptFileIE
|
from .promptfile import PromptFileIE
|
||||||
from .prosiebensat1 import ProSiebenSat1IE
|
from .prosiebensat1 import ProSiebenSat1IE
|
||||||
from .puls4 import Puls4IE
|
from .puls4 import Puls4IE
|
||||||
@@ -405,7 +409,7 @@ from .rtlnow import RTLnowIE
|
|||||||
from .rtl2 import RTL2IE
|
from .rtl2 import RTL2IE
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
@@ -536,6 +540,7 @@ from .udemy import (
|
|||||||
UdemyIE,
|
UdemyIE,
|
||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
)
|
)
|
||||||
|
from .ultimedia import UltimediaIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .urort import UrortIE
|
from .urort import UrortIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
|
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
|
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
|
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
|
@@ -23,7 +23,6 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
inc,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
|
|
||||||
key = obfuscate_key(id)
|
key = obfuscate_key(id)
|
||||||
|
|
||||||
class Counter:
|
|
||||||
__value = iv
|
|
||||||
|
|
||||||
def next_value(self):
|
|
||||||
temp = self.__value
|
|
||||||
self.__value = inc(self.__value)
|
|
||||||
return temp
|
|
||||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||||
return zlib.decompress(decrypted_data)
|
return zlib.decompress(decrypted_data)
|
||||||
|
|
||||||
|
77
youtube_dl/extractor/douyutv.py
Normal file
77
youtube_dl/extractor/douyutv.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class DouyuTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.douyutv.com/iseven',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'iseven',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'description': 'md5:9e525642c25a0a24302869937cf69d17',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': '7师傅',
|
||||||
|
'uploader_id': '431925',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
config = self._download_json(
|
||||||
|
'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
data = config['data']
|
||||||
|
|
||||||
|
error_code = config.get('error', 0)
|
||||||
|
show_status = data.get('show_status')
|
||||||
|
if error_code is not 0:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Server reported error %i' % error_code, expected=True)
|
||||||
|
|
||||||
|
# 1 = live, 2 = offline
|
||||||
|
if show_status == '2':
|
||||||
|
raise ExtractorError(
|
||||||
|
'Live stream is offline', expected=True)
|
||||||
|
|
||||||
|
base_url = data['rtmp_url']
|
||||||
|
live_path = data['rtmp_live']
|
||||||
|
|
||||||
|
title = self._live_title(data['room_name'])
|
||||||
|
description = data.get('show_details')
|
||||||
|
thumbnail = data.get('room_src')
|
||||||
|
|
||||||
|
uploader = data.get('nickname')
|
||||||
|
uploader_id = data.get('owner_uid')
|
||||||
|
|
||||||
|
multi_formats = data.get('rtmp_multi_bitrate')
|
||||||
|
if not isinstance(multi_formats, dict):
|
||||||
|
multi_formats = {}
|
||||||
|
multi_formats['live'] = live_path
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': '%s/%s' % (base_url, format_path),
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': 1 if format_id == 'live' else 0,
|
||||||
|
} for format_id, format_path in multi_formats.items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
@@ -527,6 +527,17 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Viddler'],
|
'add_ie': ['Viddler'],
|
||||||
},
|
},
|
||||||
|
# Libsyn embed
|
||||||
|
{
|
||||||
|
'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3377616',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||||
|
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||||
|
'upload_date': '20150220',
|
||||||
|
},
|
||||||
|
},
|
||||||
# jwplayer YouTube
|
# jwplayer YouTube
|
||||||
{
|
{
|
||||||
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
||||||
@@ -1006,6 +1017,19 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for NYTimes player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for Libsyn player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||||
@@ -1268,10 +1292,16 @@ class GenericIE(InfoExtractor):
|
|||||||
# HTML5 video
|
# HTML5 video
|
||||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
|
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||||
found = re.search(
|
found = re.search(
|
||||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||||
webpage)
|
webpage)
|
||||||
|
if not found:
|
||||||
|
# Look also in Refresh HTTP header
|
||||||
|
refresh_header = head_response.headers.get('Refresh')
|
||||||
|
if refresh_header:
|
||||||
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
new_url = found.group(1)
|
new_url = found.group(1)
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
|
@@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
|
|||||||
|
|
||||||
if webpage is not None:
|
if webpage is not None:
|
||||||
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||||
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
|
return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
|
||||||
|
|
||||||
return (webpage, None)
|
return webpage, None
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self.ts = int(time.time() * 1000) # timestamp in millis
|
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||||
@@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
|
|||||||
swf_referer = None
|
swf_referer = None
|
||||||
if self.do_playerpage_request:
|
if self.do_playerpage_request:
|
||||||
(_, player_objs) = self._get_playerpage(url)
|
(_, player_objs) = self._get_playerpage(url)
|
||||||
if player_objs is not None:
|
if player_objs:
|
||||||
swf_referer = self._build_swf_referer(url, player_objs[0])
|
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||||
self.to_screen('SWF Referer: %s' % swf_referer)
|
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||||
|
|
||||||
|
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
|
|||||||
description = self._og_search_description(webpage, default=None)
|
description = self._og_search_description(webpage, default=None)
|
||||||
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
||||||
duration = int_or_none(flashvars.get('duration'))
|
duration = int_or_none(flashvars.get('duration'))
|
||||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
width = int_or_none(self._og_search_property(
|
||||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
'video:width', webpage, 'video width', default=None))
|
||||||
|
height = int_or_none(self._og_search_property(
|
||||||
|
'video:height', webpage, 'video height', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -88,12 +88,13 @@ class LetvIE(InfoExtractor):
|
|||||||
play_json_req = compat_urllib_request.Request(
|
play_json_req = compat_urllib_request.Request(
|
||||||
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||||
)
|
)
|
||||||
play_json_req.add_header(
|
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||||
'Ytdl-request-proxy',
|
if cn_verification_proxy:
|
||||||
self._downloader.params.get('cn_verification_proxy'))
|
play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||||
|
|
||||||
play_json = self._download_json(
|
play_json = self._download_json(
|
||||||
play_json_req,
|
play_json_req,
|
||||||
media_id, 'playJson data')
|
media_id, 'Downloading playJson data')
|
||||||
|
|
||||||
# Check for errors
|
# Check for errors
|
||||||
playstatus = play_json['playstatus']
|
playstatus = play_json['playstatus']
|
||||||
|
59
youtube_dl/extractor/libsyn.py
Normal file
59
youtube_dl/extractor/libsyn.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class LibsynIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||||
|
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3377616',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||||
|
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||||
|
'upload_date': '20150220',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': media_url,
|
||||||
|
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||||
|
|
||||||
|
podcast_title = self._search_regex(
|
||||||
|
r'<h2>([^<]+)</h2>', webpage, 'title')
|
||||||
|
episode_title = self._search_regex(
|
||||||
|
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
release_date = unified_strdate(self._search_regex(
|
||||||
|
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': release_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -10,7 +11,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
parse_iso8601,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -27,8 +27,6 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'Daniel Holbach',
|
||||||
'uploader_id': 'dholbach',
|
'uploader_id': 'dholbach',
|
||||||
'upload_date': '20111115',
|
|
||||||
'timestamp': 1321359578,
|
|
||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
@@ -37,31 +35,30 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
||||||
'ext': 'm4a',
|
'ext': 'mp3',
|
||||||
'title': 'Electric Relaxation vol. 3',
|
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
||||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||||
'uploader': 'Daniel Drumz',
|
'uploader': 'Gilles Peterson Worldwide',
|
||||||
'uploader_id': 'gillespeterson',
|
'uploader_id': 'gillespeterson',
|
||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*/images/',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_url(self, track_id, template_url):
|
def _get_url(self, track_id, template_url, server_number):
|
||||||
server_count = 30
|
boundaries = (1, 30)
|
||||||
for i in range(server_count):
|
for nr in server_numbers(server_number, boundaries):
|
||||||
url = template_url % i
|
url = template_url % nr
|
||||||
try:
|
try:
|
||||||
# We only want to know if the request succeed
|
# We only want to know if the request succeed
|
||||||
# don't download the whole file
|
# don't download the whole file
|
||||||
self._request_webpage(
|
self._request_webpage(
|
||||||
HEADRequest(url), track_id,
|
HEADRequest(url), track_id,
|
||||||
'Checking URL %d/%d ...' % (i + 1, server_count + 1))
|
'Checking URL %d/%d ...' % (nr, boundaries[-1]))
|
||||||
return url
|
return url
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -75,17 +72,18 @@ class MixcloudIE(InfoExtractor):
|
|||||||
preview_url = self._search_regex(
|
preview_url = self._search_regex(
|
||||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
||||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||||
|
server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
|
||||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||||
final_song_url = self._get_url(track_id, template_url)
|
final_song_url = self._get_url(track_id, template_url, server_number)
|
||||||
if final_song_url is None:
|
if final_song_url is None:
|
||||||
self.to_screen('Trying with m4a extension')
|
self.to_screen('Trying with m4a extension')
|
||||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||||
final_song_url = self._get_url(track_id, template_url)
|
final_song_url = self._get_url(track_id, template_url, server_number)
|
||||||
if final_song_url is None:
|
if final_song_url is None:
|
||||||
raise ExtractorError('Unable to extract track url')
|
raise ExtractorError('Unable to extract track url')
|
||||||
|
|
||||||
PREFIX = (
|
PREFIX = (
|
||||||
r'<span class="play-button[^"]*?"'
|
r'm-play-on-spacebar[^>]+'
|
||||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||||
@@ -99,16 +97,12 @@ class MixcloudIE(InfoExtractor):
|
|||||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
like_count = str_to_int(self._search_regex(
|
like_count = str_to_int(self._search_regex(
|
||||||
[r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
|
||||||
r'/favorites/?">([0-9]+)<'],
|
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||||
r'/listeners/?">([0-9,.]+)</a>'],
|
r'/listeners/?">([0-9,.]+)</a>'],
|
||||||
webpage, 'play count', fatal=False))
|
webpage, 'play count', fatal=False))
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
|
||||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
|
||||||
webpage, 'upload date', default=None))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
@@ -118,7 +112,38 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'timestamp': timestamp,
|
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def server_numbers(first, boundaries):
|
||||||
|
""" Server numbers to try in descending order of probable availability.
|
||||||
|
Starting from first (i.e. the number of the server hosting the preview file)
|
||||||
|
and going further and further up to the higher boundary and down to the
|
||||||
|
lower one in an alternating fashion. Namely:
|
||||||
|
|
||||||
|
server_numbers(2, (1, 5))
|
||||||
|
|
||||||
|
# Where the preview server is 2, min number is 1 and max is 5.
|
||||||
|
# Yields: 2, 3, 1, 4, 5
|
||||||
|
|
||||||
|
Why not random numbers or increasing sequences? Since from what I've seen,
|
||||||
|
full length files seem to be hosted on servers whose number is closer to
|
||||||
|
that of the preview; to be confirmed.
|
||||||
|
"""
|
||||||
|
zip_longest = getattr(itertools, 'zip_longest', None)
|
||||||
|
if zip_longest is None:
|
||||||
|
# python 2.x
|
||||||
|
zip_longest = itertools.izip_longest
|
||||||
|
|
||||||
|
if len(boundaries) != 2:
|
||||||
|
raise ValueError("boundaries should be a two-element tuple")
|
||||||
|
min, max = boundaries
|
||||||
|
highs = range(first + 1, max + 1)
|
||||||
|
lows = range(first - 1, min - 1, -1)
|
||||||
|
rest = filter(
|
||||||
|
None, itertools.chain.from_iterable(zip_longest(highs, lows)))
|
||||||
|
yield first
|
||||||
|
for n in rest:
|
||||||
|
yield n
|
||||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MLBIE(InfoExtractor):
|
class MLBIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
_VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||||
@@ -80,6 +80,10 @@ class MLBIE(InfoExtractor):
|
|||||||
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
|
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -22,7 +22,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
IE_NAME = 'niconico'
|
IE_NAME = 'niconico'
|
||||||
IE_DESC = 'ニコニコ動画'
|
IE_DESC = 'ニコニコ動画'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -39,7 +39,24 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'username': 'ydl.niconico@gmail.com',
|
'username': 'ydl.niconico@gmail.com',
|
||||||
'password': 'youtube-dl',
|
'password': 'youtube-dl',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.nicovideo.jp/watch/nm14296458',
|
||||||
|
'md5': '8db08e0158457cf852a31519fceea5bc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nm14296458',
|
||||||
|
'ext': 'swf',
|
||||||
|
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
||||||
|
'description': 'md5:',
|
||||||
|
'uploader': 'りょうた',
|
||||||
|
'uploader_id': '18822557',
|
||||||
|
'upload_date': '20110429',
|
||||||
|
'duration': 209,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'username': 'ydl.niconico@gmail.com',
|
||||||
|
'password': 'youtube-dl',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
@@ -89,7 +106,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
if self._AUTHENTICATED:
|
if self._AUTHENTICATED:
|
||||||
# Get flv info
|
# Get flv info
|
||||||
flv_info_webpage = self._download_webpage(
|
flv_info_webpage = self._download_webpage(
|
||||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||||
video_id, 'Downloading flv info')
|
video_id, 'Downloading flv info')
|
||||||
else:
|
else:
|
||||||
# Get external player info
|
# Get external player info
|
||||||
|
@@ -231,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
stream_url = self._download_json(
|
stream_url = self._download_json(
|
||||||
stream_info['stream'], display_id,
|
stream_info['stream'], display_id,
|
||||||
'Downloading %s URL' % stream_type,
|
'Downloading %s URL' % stream_type,
|
||||||
transform_source=strip_jsonp)
|
'Unable to download %s URL' % stream_type,
|
||||||
|
transform_source=strip_jsonp, fatal=False)
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
if stream_type == 'hds':
|
if stream_type == 'hds':
|
||||||
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
|
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
|
||||||
# f4m downloader downloads only piece of live stream
|
# f4m downloader downloads only piece of live stream
|
||||||
|
@@ -14,46 +14,48 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NRKIE(InfoExtractor):
|
class NRKIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
|
_VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
|
'md5': 'bccd850baebefe23b56d708a113229c2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150533',
|
'id': '150533',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Dompap og andre fugler i Piip-Show',
|
'title': 'Dompap og andre fugler i Piip-Show',
|
||||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
|
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||||
|
'duration': 263,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
|
'url': 'http://www.nrk.no/video/PS*154915',
|
||||||
'md5': '3471f2a51718195164e88f46bf427668',
|
'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '154915',
|
'id': '154915',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Slik høres internett ut når du er blind',
|
'title': 'Slik høres internett ut når du er blind',
|
||||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||||
|
'duration': 20,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
|
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
|
'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
|
||||||
|
video_id, 'Downloading media JSON')
|
||||||
|
|
||||||
if data['usageRights']['isGeoBlocked']:
|
if data['usageRights']['isGeoBlocked']:
|
||||||
raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
|
raise ExtractorError(
|
||||||
|
'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
|
video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
|
||||||
|
|
||||||
|
duration = parse_duration(data.get('duration'))
|
||||||
|
|
||||||
images = data.get('images')
|
images = data.get('images')
|
||||||
if images:
|
if images:
|
||||||
@@ -69,10 +71,51 @@ class NRKIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
'description': data['description'],
|
'description': data['description'],
|
||||||
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NRKPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||||
|
'title': 'Gjenopplev den historiske solformørkelsen',
|
||||||
|
'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rivertonprisen-til-karin-fossum-1.12266449',
|
||||||
|
'title': 'Rivertonprisen til Karin Fossum',
|
||||||
|
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
||||||
|
},
|
||||||
|
'playlist_count': 5,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('nrk:%s' % video_id, 'NRK')
|
||||||
|
for video_id in re.findall(
|
||||||
|
r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
|
||||||
|
webpage)
|
||||||
|
]
|
||||||
|
|
||||||
|
playlist_title = self._og_search_title(webpage)
|
||||||
|
playlist_description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
|
||||||
class NRKTVIE(InfoExtractor):
|
class NRKTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||||
|
|
||||||
|
@@ -1,15 +1,17 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_iso8601
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NYTimesIE(InfoExtractor):
|
class NYTimesIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -22,18 +24,21 @@ class NYTimesIE(InfoExtractor):
|
|||||||
'uploader': 'Brett Weiner',
|
'uploader': 'Brett Weiner',
|
||||||
'duration': 419,
|
'duration': 419,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
|
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
title = video_data['headline']
|
title = video_data['headline']
|
||||||
description = video_data['summary']
|
description = video_data.get('summary')
|
||||||
duration = video_data['duration'] / 1000.0
|
duration = float_or_none(video_data.get('duration'), 1000)
|
||||||
|
|
||||||
uploader = video_data['byline']
|
uploader = video_data['byline']
|
||||||
timestamp = parse_iso8601(video_data['publication_date'][:-8])
|
timestamp = parse_iso8601(video_data['publication_date'][:-8])
|
||||||
@@ -49,11 +54,11 @@ class NYTimesIE(InfoExtractor):
|
|||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'url': video['url'],
|
'url': video['url'],
|
||||||
'format_id': video['type'],
|
'format_id': video.get('type'),
|
||||||
'vcodec': video['video_codec'],
|
'vcodec': video.get('video_codec'),
|
||||||
'width': video['width'],
|
'width': int_or_none(video.get('width')),
|
||||||
'height': video['height'],
|
'height': int_or_none(video.get('height')),
|
||||||
'filesize': get_file_size(video['fileSize']),
|
'filesize': get_file_size(video.get('fileSize')),
|
||||||
} for video in video_data['renditions']
|
} for video in video_data['renditions']
|
||||||
]
|
]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@@ -61,7 +66,8 @@ class NYTimesIE(InfoExtractor):
|
|||||||
thumbnails = [
|
thumbnails = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nytimes.com/%s' % image['url'],
|
'url': 'http://www.nytimes.com/%s' % image['url'],
|
||||||
'resolution': '%dx%d' % (image['width'], image['height']),
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
} for image in video_data['images']
|
} for image in video_data['images']
|
||||||
]
|
]
|
||||||
|
|
||||||
|
69
youtube_dl/extractor/primesharetv.py
Normal file
69
youtube_dl/extractor/primesharetv.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class PrimeShareTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://primeshare.tv/download/238790B611',
|
||||||
|
'md5': 'b92d9bf5461137c36228009f31533fbc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '238790B611',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if '>File not exist<' in webpage:
|
||||||
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
fields = dict(re.findall(r'''(?x)<input\s+
|
||||||
|
type="hidden"\s+
|
||||||
|
name="([^"]+)"\s+
|
||||||
|
(?:id="[^"]+"\s+)?
|
||||||
|
value="([^"]*)"
|
||||||
|
''', webpage))
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Referer': url,
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_time = int(self._search_regex(
|
||||||
|
r'var\s+cWaitTime\s*=\s*(\d+)',
|
||||||
|
webpage, 'wait time', default=7)) + 1
|
||||||
|
self._sleep(wait_time, video_id)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(
|
||||||
|
url, compat_urllib_parse.urlencode(fields), headers)
|
||||||
|
video_page = self._download_webpage(
|
||||||
|
req, video_id, 'Downloading video page')
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
|
||||||
|
video_page, 'video url')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1>Watch\s*(?: )?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?: )?\s*<strong>',
|
||||||
|
video_page, 'title')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
@@ -127,6 +127,47 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
for s in subs)
|
for s in subs)
|
||||||
|
|
||||||
|
|
||||||
|
class RTVEInfantilIE(InfoExtractor):
|
||||||
|
IE_NAME = 'rtve.es:infantil'
|
||||||
|
IE_DESC = 'RTVE infantil'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||||
|
'md5': '915319587b33720b8e0357caaa6617e6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3040283',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Maneras de vivir',
|
||||||
|
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
||||||
|
'duration': 357.958,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
info = self._download_json(
|
||||||
|
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||||
|
video_id)['page']['items'][0]
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
vidplayer_id = self._search_regex(
|
||||||
|
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||||
|
|
||||||
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||||
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
|
video_url = _decrypt_url(png)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': info['title'],
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': info.get('image'),
|
||||||
|
'duration': float_or_none(info.get('duration'), scale=1000),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class RTVELiveIE(InfoExtractor):
|
class RTVELiveIE(InfoExtractor):
|
||||||
IE_NAME = 'rtve.es:live'
|
IE_NAME = 'rtve.es:live'
|
||||||
IE_DESC = 'RTVE.es live streams'
|
IE_DESC = 'RTVE.es live streams'
|
||||||
|
@@ -4,22 +4,87 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .common import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request
|
||||||
|
)
|
||||||
|
from ..utils import sanitize_url_path_consecutive_slashes
|
||||||
|
|
||||||
|
|
||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
'note': 'This video is available only in Mainland China',
|
||||||
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||||
'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
|
'md5': '29175c8cadd8b5cc4055001e85d6b372',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '382479172',
|
'id': '382479172',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'MV:Far East Movement《The Illest》',
|
'title': 'MV:Far East Movement《The Illest》',
|
||||||
},
|
},
|
||||||
'skip': 'Only available from China',
|
'params': {
|
||||||
}
|
'cn_verification_proxy': 'proxy.uku.im:8888'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||||
|
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '409385080',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||||
|
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78693464',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【爱范品】第31期:MWC见不到的奇葩手机',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Multipart video',
|
||||||
|
'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339_part1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 294,
|
||||||
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339_part2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 300,
|
||||||
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '8407e634175fdac706766481b9443450',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339_part3',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 150,
|
||||||
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}, {
|
||||||
|
'note': 'Video with title containing dash',
|
||||||
|
'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78932792',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'youtube-dl testing video',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
||||||
@@ -29,8 +94,14 @@ class SohuIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(base_data_url + vid_id)
|
||||||
|
|
||||||
|
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||||
|
if cn_verification_proxy:
|
||||||
|
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||||
|
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
base_data_url + vid_id, video_id,
|
req, video_id,
|
||||||
'Downloading JSON data for %s' % vid_id)
|
'Downloading JSON data for %s' % vid_id)
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -38,10 +109,8 @@ class SohuIE(InfoExtractor):
|
|||||||
mytv = mobj.group('mytv') is not None
|
mytv = mobj.group('mytv') is not None
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
raw_title = self._html_search_regex(
|
|
||||||
r'(?s)<title>(.+?)</title>',
|
title = self._og_search_title(webpage)
|
||||||
webpage, 'video title')
|
|
||||||
title = raw_title.partition('-')[0].strip()
|
|
||||||
|
|
||||||
vid = self._html_search_regex(
|
vid = self._html_search_regex(
|
||||||
r'var vid ?= ?["\'](\d+)["\']',
|
r'var vid ?= ?["\'](\d+)["\']',
|
||||||
@@ -77,7 +146,9 @@ class SohuIE(InfoExtractor):
|
|||||||
% (format_id, i + 1, part_count))
|
% (format_id, i + 1, part_count))
|
||||||
|
|
||||||
part_info = part_str.split('|')
|
part_info = part_str.split('|')
|
||||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
|
||||||
|
video_url = sanitize_url_path_consecutive_slashes(
|
||||||
|
'%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@@ -23,6 +23,7 @@ class TwitchBaseIE(InfoExtractor):
|
|||||||
_API_BASE = 'https://api.twitch.tv'
|
_API_BASE = 'https://api.twitch.tv'
|
||||||
_USHER_BASE = 'http://usher.twitch.tv'
|
_USHER_BASE = 'http://usher.twitch.tv'
|
||||||
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
|
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
|
||||||
|
_LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
|
||||||
_NETRC_MACHINE = 'twitch'
|
_NETRC_MACHINE = 'twitch'
|
||||||
|
|
||||||
def _handle_error(self, response):
|
def _handle_error(self, response):
|
||||||
@@ -67,14 +68,14 @@ class TwitchBaseIE(InfoExtractor):
|
|||||||
'authenticity_token': authenticity_token,
|
'authenticity_token': authenticity_token,
|
||||||
'redirect_on_login': '',
|
'redirect_on_login': '',
|
||||||
'embed_form': 'false',
|
'embed_form': 'false',
|
||||||
'mp_source_action': '',
|
'mp_source_action': 'login-button',
|
||||||
'follow': '',
|
'follow': '',
|
||||||
'user[login]': username,
|
'login': username,
|
||||||
'user[password]': password,
|
'password': password,
|
||||||
}
|
}
|
||||||
|
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||||
request.add_header('Referer', self._LOGIN_URL)
|
request.add_header('Referer', self._LOGIN_URL)
|
||||||
response = self._download_webpage(
|
response = self._download_webpage(
|
||||||
request, None, 'Logging in as %s' % username)
|
request, None, 'Logging in as %s' % username)
|
||||||
@@ -148,7 +149,7 @@ class TwitchItemBaseIE(TwitchBaseIE):
|
|||||||
|
|
||||||
class TwitchVideoIE(TwitchItemBaseIE):
|
class TwitchVideoIE(TwitchItemBaseIE):
|
||||||
IE_NAME = 'twitch:video'
|
IE_NAME = 'twitch:video'
|
||||||
_VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
_VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
|
||||||
_ITEM_TYPE = 'video'
|
_ITEM_TYPE = 'video'
|
||||||
_ITEM_SHORTCUT = 'a'
|
_ITEM_SHORTCUT = 'a'
|
||||||
|
|
||||||
@@ -164,7 +165,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
|
|||||||
|
|
||||||
class TwitchChapterIE(TwitchItemBaseIE):
|
class TwitchChapterIE(TwitchItemBaseIE):
|
||||||
IE_NAME = 'twitch:chapter'
|
IE_NAME = 'twitch:chapter'
|
||||||
_VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
_VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
|
||||||
_ITEM_TYPE = 'chapter'
|
_ITEM_TYPE = 'chapter'
|
||||||
_ITEM_SHORTCUT = 'c'
|
_ITEM_SHORTCUT = 'c'
|
||||||
|
|
||||||
@@ -183,7 +184,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
|
|||||||
|
|
||||||
class TwitchVodIE(TwitchItemBaseIE):
|
class TwitchVodIE(TwitchItemBaseIE):
|
||||||
IE_NAME = 'twitch:vod'
|
IE_NAME = 'twitch:vod'
|
||||||
_VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
_VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
|
||||||
_ITEM_TYPE = 'vod'
|
_ITEM_TYPE = 'vod'
|
||||||
_ITEM_SHORTCUT = 'v'
|
_ITEM_SHORTCUT = 'v'
|
||||||
|
|
||||||
|
104
youtube_dl/extractor/ultimedia.py
Normal file
104
youtube_dl/extractor/ultimedia.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
qualities,
|
||||||
|
unified_strdate,
|
||||||
|
clean_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UltimediaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# news
|
||||||
|
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
|
||||||
|
'md5': '276a0e49de58c7e85d32b057837952a2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's8uk0r',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
||||||
|
'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'upload_date': '20150317',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# music
|
||||||
|
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
|
||||||
|
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'xvpfp8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Two - C'est la vie (Clip)",
|
||||||
|
'description': 'Two',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'upload_date': '20150224',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
deliver_url = self._search_regex(
|
||||||
|
r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
|
||||||
|
webpage, 'deliver URL')
|
||||||
|
|
||||||
|
deliver_page = self._download_webpage(
|
||||||
|
deliver_url, video_id, 'Downloading iframe page')
|
||||||
|
|
||||||
|
if '>This video is currently not available' in deliver_page:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Video %s is currently not available' % video_id, expected=True)
|
||||||
|
|
||||||
|
player = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
quality = qualities(['flash', 'html5'])
|
||||||
|
formats = []
|
||||||
|
for mode in player['modes']:
|
||||||
|
video_url = mode.get('config', {}).get('file')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
if re.match(r'https?://www\.youtube\.com/.+?', video_url):
|
||||||
|
return self.url_result(video_url, 'Youtube')
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': mode.get('type'),
|
||||||
|
'quality': quality(mode.get('type')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = player.get('image')
|
||||||
|
|
||||||
|
title = clean_html((
|
||||||
|
self._html_search_regex(
|
||||||
|
r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
|
||||||
|
webpage, 'title', default=None)
|
||||||
|
or self._search_regex(
|
||||||
|
r"var\s+nameVideo\s*=\s*'([^']+)'",
|
||||||
|
deliver_page, 'title')))
|
||||||
|
|
||||||
|
description = clean_html(self._html_search_regex(
|
||||||
|
r'(?s)<span>Description</span>(.+?)</p>', webpage,
|
||||||
|
'description', fatal=False))
|
||||||
|
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'Ajouté le\s*<span>([^<]+)', webpage,
|
||||||
|
'upload date', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -4,28 +4,21 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_urllib_request
|
||||||
compat_urllib_parse,
|
|
||||||
compat_urllib_request,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
remove_start,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class VideoMegaIE(InfoExtractor):
|
class VideoMegaIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?:www\.)?videomega\.tv/
|
(?:www\.)?videomega\.tv/
|
||||||
(?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
|
(?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
|
||||||
'''
|
'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ',
|
'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
|
||||||
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
|
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'QR0HCUHI1661IHUCH0RQ',
|
'id': '4GNA688SU99US886ANG4',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny',
|
'title': 'BigBuckBunny_320x180',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -33,34 +26,24 @@ class VideoMegaIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
|
iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
|
||||||
req = compat_urllib_request.Request(iframe_url)
|
req = compat_urllib_request.Request(iframe_url)
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
try:
|
title = self._html_search_regex(
|
||||||
escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
|
r'<title>(.*?)</title>', webpage, 'title')
|
||||||
except IndexError:
|
title = re.sub(
|
||||||
raise ExtractorError('Unable to extract escaped data')
|
r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
|
||||||
|
|
||||||
playlist = compat_urllib_parse.unquote(escaped_data)
|
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
|
r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||||
video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
|
video_url = self._search_regex(
|
||||||
title = remove_start(self._html_search_regex(
|
r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
|
||||||
r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'format_id': 'sd',
|
|
||||||
'url': video_url,
|
|
||||||
}]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'url': video_url,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
'Referer': iframe_url,
|
'Referer': iframe_url,
|
||||||
|
@@ -33,14 +33,13 @@ class VineIE(InfoExtractor):
|
|||||||
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
|
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': data['videoLowURL'],
|
'format_id': '%(format)s-%(rate)s' % f,
|
||||||
'ext': 'mp4',
|
'vcodec': f['format'],
|
||||||
'format_id': 'low',
|
'quality': f['rate'],
|
||||||
}, {
|
'url': f['videoUrl'],
|
||||||
'url': data['videoUrl'],
|
} for f in data['videoUrls'] if f.get('rate')]
|
||||||
'ext': 'mp4',
|
|
||||||
'format_id': 'standard',
|
self._sort_formats(formats)
|
||||||
}]
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -794,6 +794,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
|
write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||||
else:
|
else:
|
||||||
command_line_conf = sys.argv[1:]
|
command_line_conf = sys.argv[1:]
|
||||||
|
# Workaround for Python 2.x, where argv is a byte list
|
||||||
|
if sys.version_info < (3,):
|
||||||
|
command_line_conf = [
|
||||||
|
a.decode('utf-8', 'replace') for a in command_line_conf]
|
||||||
|
|
||||||
if '--ignore-config' in command_line_conf:
|
if '--ignore-config' in command_line_conf:
|
||||||
system_conf = []
|
system_conf = []
|
||||||
user_conf = []
|
user_conf = []
|
||||||
|
@@ -326,6 +326,13 @@ def sanitize_path(s):
|
|||||||
return os.path.join(*sanitized_path)
|
return os.path.join(*sanitized_path)
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_url_path_consecutive_slashes(url):
|
||||||
|
"""Collapses consecutive slashes in URLs' path"""
|
||||||
|
parsed_url = list(compat_urlparse.urlparse(url))
|
||||||
|
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
|
||||||
|
return compat_urlparse.urlunparse(parsed_url)
|
||||||
|
|
||||||
|
|
||||||
def orderedSet(iterable):
|
def orderedSet(iterable):
|
||||||
""" Remove all duplicates from the input iterable """
|
""" Remove all duplicates from the input iterable """
|
||||||
res = []
|
res = []
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.03.15'
|
__version__ = '2015.03.24'
|
||||||
|
Reference in New Issue
Block a user