Compare commits
137 Commits
2015.01.23
...
2015.01.30
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c03844a4ec | ||
|
|
6449cd807e | ||
|
|
e2a08185c6 | ||
|
|
5d6677ca28 | ||
|
|
5a8a29cfea | ||
|
|
c1708b89c0 | ||
|
|
83fddfd493 | ||
|
|
1798791df1 | ||
|
|
6ebb0dca9f | ||
|
|
cf8d6ec865 | ||
|
|
f452f72c6b | ||
|
|
3198291f26 | ||
|
|
02c1d5e285 | ||
|
|
ec4161a57d | ||
|
|
03d8d4df38 | ||
|
|
03d2d6d51b | ||
|
|
83fda3c000 | ||
|
|
4fe8495a23 | ||
|
|
a16f6643f0 | ||
|
|
adc0ae3ceb | ||
|
|
7bb3ceb4c7 | ||
|
|
75a4fc5b72 | ||
|
|
87673cd438 | ||
|
|
f345fe9db7 | ||
|
|
e683a48d0e | ||
|
|
a7a14d9586 | ||
|
|
219337990b | ||
|
|
376a770cc4 | ||
|
|
7e500dbd93 | ||
|
|
affd04a45d | ||
|
|
c84130e865 | ||
|
|
4f264c02c7 | ||
|
|
d205476103 | ||
|
|
367cc95aa7 | ||
|
|
206dba27a4 | ||
|
|
dcf53d4408 | ||
|
|
63be3b8989 | ||
|
|
18b4e9e79d | ||
|
|
cb454b333d | ||
|
|
e0d9f85aee | ||
|
|
b04fbd789c | ||
|
|
aad9556414 | ||
|
|
48a1e5141a | ||
|
|
0865f397ae | ||
|
|
796df3c631 | ||
|
|
a28383834b | ||
|
|
3a0d2f520a | ||
|
|
6348ad12a0 | ||
|
|
fe7710cbcc | ||
|
|
2103d038b3 | ||
|
|
9f0df77ab1 | ||
|
|
e72c7e4123 | ||
|
|
2b1bd292ae | ||
|
|
71e7da6533 | ||
|
|
80a49d3d7b | ||
|
|
d862a4f94f | ||
|
|
a57e8ce658 | ||
|
|
96a53167fa | ||
|
|
6d2749aac4 | ||
|
|
b1b0b1ca30 | ||
|
|
3dee7826e7 | ||
|
|
c9326b38b8 | ||
|
|
d4f64cabf4 | ||
|
|
fe41ddbb28 | ||
|
|
ee69b99af6 | ||
|
|
767ff0a2d1 | ||
|
|
8604e882a8 | ||
|
|
cc1237f484 | ||
|
|
37f4ce538a | ||
|
|
7d346331b5 | ||
|
|
e1ccc04e9f | ||
|
|
881e6a1f5c | ||
|
|
baeaeffce5 | ||
|
|
c14e88f0f5 | ||
|
|
8940b8608e | ||
|
|
ec82d85acd | ||
|
|
cfb56d1af3 | ||
|
|
1e10802990 | ||
|
|
6695916045 | ||
|
|
7906d199a1 | ||
|
|
1070711d60 | ||
|
|
4b405cfc6e | ||
|
|
e5660ee6ae | ||
|
|
8011fba3ae | ||
|
|
587a9c2749 | ||
|
|
e1554a407d | ||
|
|
3fcfb8e9fa | ||
|
|
384b62028a | ||
|
|
b95aab8482 | ||
|
|
fc2d6abfe7 | ||
|
|
27de5625d4 | ||
|
|
6aa4f54d66 | ||
|
|
222516d97d | ||
|
|
a055469faf | ||
|
|
fdaaaaa878 | ||
|
|
12d1fb5aa9 | ||
|
|
48f00d15b1 | ||
|
|
3e055aa5c3 | ||
|
|
6896a52721 | ||
|
|
5779b3e1fe | ||
|
|
62cd676c74 | ||
|
|
0c17278843 | ||
|
|
d229ee70da | ||
|
|
26e274666d | ||
|
|
ebd46aed51 | ||
|
|
e793f7671c | ||
|
|
c2e64f71d0 | ||
|
|
0920e5830f | ||
|
|
bf7fa94ec7 | ||
|
|
6f58db8982 | ||
|
|
aa42e87340 | ||
|
|
649f7966f7 | ||
|
|
5f0d813d93 | ||
|
|
501f13fbf3 | ||
|
|
ba55168157 | ||
|
|
d79323136f | ||
|
|
08ff6ab07e | ||
|
|
ba655a0e4c | ||
|
|
b59c17e543 | ||
|
|
61ca9a80b3 | ||
|
|
bd3cbe0716 | ||
|
|
3d5f7a3947 | ||
|
|
5a000b45b3 | ||
|
|
40b1cbafac | ||
|
|
4231235cda | ||
|
|
ca7a9c1bf7 | ||
|
|
247a5da704 | ||
|
|
d1b4617e1d | ||
|
|
74dcf42a85 | ||
|
|
a42c921598 | ||
|
|
f96252b913 | ||
|
|
04b89c9026 | ||
|
|
0c72eb9060 | ||
|
|
f9f86b0c64 | ||
|
|
0aed8df2bf | ||
|
|
2f61fe4ccc | ||
|
|
03359e9864 |
@@ -4,6 +4,9 @@ python:
|
||||
- "2.7"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -yqq rtmpdump
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
||||
5
AUTHORS
5
AUTHORS
@@ -104,3 +104,8 @@ Ondřej Caletka
|
||||
Dinesh S
|
||||
Johan K. Jensen
|
||||
Yen Chi Hsuan
|
||||
Enam Mijbah Noor
|
||||
David Luhmer
|
||||
Shaya Goldberg
|
||||
Yen Chi Hsuan
|
||||
Paul Hartmann
|
||||
|
||||
40
README.md
40
README.md
@@ -93,6 +93,14 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER playlist video to end at (default is last)
|
||||
--playlist-items ITEM_SPEC playlist video items to download. Specify
|
||||
indices of the videos in the playlist
|
||||
seperated by commas like: "--playlist-items
|
||||
1,2,5,8" if you want to download videos
|
||||
indexed 1, 2, 5, 8 in the playlist. You can
|
||||
specify range: "--playlist-items
|
||||
1-3,7,10-13", it will download the videos
|
||||
at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||
--match-title REGEX download only matching titles (regex or
|
||||
caseless sub-string)
|
||||
--reject-title REGEX skip download for matching titles (regex or
|
||||
@@ -124,7 +132,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES number of retries (default is 10)
|
||||
-R, --retries RETRIES number of retries (default is 10), or
|
||||
"infinite".
|
||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer do not automatically adjust the buffer
|
||||
@@ -132,6 +141,11 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize (experimental) set file xattribute
|
||||
ytdl.filesize with expected filesize
|
||||
--external-downloader COMMAND (experimental) Use the specified external
|
||||
downloader. Currently supports
|
||||
aria2c,curl,wget
|
||||
|
||||
## Filesystem Options:
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
@@ -191,7 +205,6 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--write-info-json write video metadata to a .info.json file
|
||||
--write-annotations write video annotations to a .annotation
|
||||
file
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
--load-info FILE json file containing the video information
|
||||
(created with the "--write-json" option)
|
||||
--cookies FILE file to read cookies from and dump cookie
|
||||
@@ -206,6 +219,12 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Thumbnail images:
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
--write-all-thumbnails write all thumbnail image formats to disk
|
||||
--list-thumbnails Simulate and list all available thumbnail
|
||||
formats
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
--no-warnings Ignore warnings
|
||||
@@ -259,6 +278,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||
download.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
@@ -271,9 +292,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
video results by putting a condition in
|
||||
brackets, as in -f "best[height=720]" (or
|
||||
-f "[filesize>10M]"). This works for
|
||||
filesize, height, width, tbr, abr, and vbr
|
||||
and the comparisons <, <=, >, >=, =, != .
|
||||
Formats for which the value is not known
|
||||
filesize, height, width, tbr, abr, vbr, and
|
||||
fps and the comparisons <, <=, >, >=, =, !=
|
||||
. Formats for which the value is not known
|
||||
are excluded unless you put a question mark
|
||||
(?) after the operator. You can combine
|
||||
format filters, so -f "[height <=?
|
||||
@@ -504,6 +525,13 @@ From then on, after restarting your shell, you will be able to access both youtu
|
||||
|
||||
Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
|
||||
|
||||
### How do I download a video starting with a `-` ?
|
||||
|
||||
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
|
||||
|
||||
youtube-dl -- -wNyEUrxzFU
|
||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||
|
||||
### How can I detect whether a given URL is supported by youtube-dl?
|
||||
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
@@ -584,7 +612,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
universal = True
|
||||
|
||||
[flake8]
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
|
||||
ignore = E501
|
||||
|
||||
@@ -140,7 +140,7 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
# Are checkable fields missing from the test case definition?
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in got_dict.items()
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||
if missing_keys:
|
||||
def _repr(v):
|
||||
|
||||
72
test/test_http.py
Normal file
72
test/test_http.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/video.html':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
||||
elif self.path == '/vid.mp4':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class FakeLogger(object):
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
self.port = self.httpd.socket.getsockname()[1]
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def test_nocheckcertificate(self):
|
||||
if sys.version_info >= (2, 7, 9): # No certificate checking anyways
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
self.assertRaises(
|
||||
Exception,
|
||||
ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -28,6 +28,7 @@ from youtube_dl.utils import (
|
||||
fix_xml_ampersands,
|
||||
InAdvancePagedList,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
OnDemandPagedList,
|
||||
@@ -51,6 +52,7 @@ from youtube_dl.utils import (
|
||||
urlencode_postdata,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
render_table,
|
||||
)
|
||||
|
||||
|
||||
@@ -417,5 +419,31 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
self.assertTrue(age_restricted(18, 14))
|
||||
self.assertFalse(age_restricted(18, 18))
|
||||
|
||||
def test_is_html(self):
|
||||
self.assertFalse(is_html(b'\x49\x44\x43<html'))
|
||||
self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa'))
|
||||
self.assertTrue(is_html( # UTF-8 with BOM
|
||||
b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa'))
|
||||
self.assertTrue(is_html( # UTF-16-LE
|
||||
b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00'
|
||||
))
|
||||
self.assertTrue(is_html( # UTF-16-BE
|
||||
b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4'
|
||||
))
|
||||
self.assertTrue(is_html( # UTF-32-BE
|
||||
b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4'))
|
||||
self.assertTrue(is_html( # UTF-32-LE
|
||||
b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
|
||||
|
||||
def test_render_table(self):
|
||||
self.assertEqual(
|
||||
render_table(
|
||||
['a', 'bcd'],
|
||||
[[123, 4], [9999, 51]]),
|
||||
'a bcd\n'
|
||||
'123 4\n'
|
||||
'9999 51')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
52
test/testcert.pem
Normal file
52
test/testcert.pem
Normal file
@@ -0,0 +1,52 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB
|
||||
HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp
|
||||
Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h
|
||||
A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev
|
||||
mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J
|
||||
aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP
|
||||
tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54
|
||||
BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4
|
||||
Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd
|
||||
63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk
|
||||
ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE
|
||||
8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw
|
||||
XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA
|
||||
G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o
|
||||
zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN
|
||||
8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP
|
||||
gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ
|
||||
XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28
|
||||
Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/
|
||||
98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ
|
||||
1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S
|
||||
DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN
|
||||
Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB
|
||||
FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD
|
||||
pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z
|
||||
VJua1Wn8HKxnXMI61DhTCSo=
|
||||
-----END PRIVATE KEY-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD
|
||||
VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG
|
||||
A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl
|
||||
c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw
|
||||
aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw
|
||||
CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb
|
||||
MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs
|
||||
IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh
|
||||
Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA
|
||||
zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU
|
||||
YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq
|
||||
O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8
|
||||
A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh
|
||||
KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/
|
||||
Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd
|
||||
ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD
|
||||
AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x
|
||||
5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb
|
||||
PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6
|
||||
cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu
|
||||
SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe
|
||||
Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK
|
||||
-----END CERTIFICATE-----
|
||||
@@ -54,8 +54,10 @@ from .utils import (
|
||||
PostProcessingError,
|
||||
platform_name,
|
||||
preferredencoding,
|
||||
render_table,
|
||||
SameFileError,
|
||||
sanitize_filename,
|
||||
std_headers,
|
||||
subtitles_filename,
|
||||
takewhile_inclusive,
|
||||
UnavailableVideoError,
|
||||
@@ -73,6 +75,7 @@ from .extractor import get_info_extractor, gen_extractors
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import (
|
||||
FFmpegFixupM4aPP,
|
||||
FFmpegFixupStretchedPP,
|
||||
FFmpegMergerPP,
|
||||
FFmpegPostProcessor,
|
||||
@@ -134,6 +137,7 @@ class YoutubeDL(object):
|
||||
nooverwrites: Prevent overwriting files.
|
||||
playliststart: Playlist item to start at.
|
||||
playlistend: Playlist item to end at.
|
||||
playlist_items: Specific indices of playlist to download.
|
||||
playlistreverse: Download playlist items in reverse order.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
@@ -143,6 +147,7 @@ class YoutubeDL(object):
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
writeannotations: Write the video annotations to a .annotations.xml file
|
||||
writethumbnail: Write the thumbnail image to a file
|
||||
write_all_thumbnails: Write all thumbnail formats to files
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
writeautomaticsub: Write the automatic subtitles to a file
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
@@ -193,11 +198,12 @@ class YoutubeDL(object):
|
||||
postprocessor.
|
||||
progress_hooks: A list of functions that get called on download
|
||||
progress, with a dictionary with the entries
|
||||
* filename: The final filename
|
||||
* status: One of "downloading" and "finished"
|
||||
|
||||
The dict may also have some of the following entries:
|
||||
* status: One of "downloading" and "finished".
|
||||
Check this first and ignore unknown values.
|
||||
|
||||
If status is one of "downloading" or "finished", the
|
||||
following properties may also be present:
|
||||
* filename: The final filename (always present)
|
||||
* downloaded_bytes: Bytes on disk
|
||||
* total_bytes: Size of the whole file, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
@@ -213,16 +219,21 @@ class YoutubeDL(object):
|
||||
- "never": do nothing
|
||||
- "warn": only emit a warning
|
||||
- "detect_or_warn": check whether we can do anything
|
||||
about it, warn otherwise
|
||||
about it, warn otherwise (default)
|
||||
source_address: (Experimental) Client-side IP address to bind to.
|
||||
call_home: Boolean, true iff we are allowed to contact the
|
||||
youtube-dl servers for debugging.
|
||||
sleep_interval: Number of seconds to sleep before each download.
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
listformats: Print an overview of available video formats and exit.
|
||||
list_thumbnails: Print a table of all thumbnails and exit.
|
||||
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||
xattr_set_filesize.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
@@ -532,6 +543,11 @@ class YoutubeDL(object):
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
# Temporary fix for #4787
|
||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||
return filename
|
||||
except ValueError as err:
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
@@ -695,24 +711,51 @@ class YoutubeDL(object):
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
playlistitems_str = self.params.get('playlist_items', None)
|
||||
playlistitems = None
|
||||
if playlistitems_str is not None:
|
||||
def iter_playlistitems(format):
|
||||
for string_segment in format.split(','):
|
||||
if '-' in string_segment:
|
||||
start, end = string_segment.split('-')
|
||||
for item in range(int(start), int(end) + 1):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = iter_playlistitems(playlistitems_str)
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
if playlistitems:
|
||||
entries = [ie_entries[i - 1] for i in playlistitems]
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
if playlistitems:
|
||||
entries = []
|
||||
for item in playlistitems:
|
||||
entries.extend(ie_entries.getslice(
|
||||
item - 1, item
|
||||
))
|
||||
else:
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
else: # iterable
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
if playlistitems:
|
||||
entry_list = list(ie_entries)
|
||||
entries = [entry_list[i - 1] for i in playlistitems]
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
@@ -782,7 +825,7 @@ class YoutubeDL(object):
|
||||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*\[
|
||||
(?P<key>width|height|tbr|abr|vbr|filesize)
|
||||
(?P<key>width|height|tbr|abr|vbr|filesize|fps)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||
\]$
|
||||
@@ -862,6 +905,42 @@ class YoutubeDL(object):
|
||||
return matches[-1]
|
||||
return None
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = std_headers.copy()
|
||||
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
res.update(add_headers)
|
||||
|
||||
cookies = self._calc_cookies(info_dict)
|
||||
if cookies:
|
||||
res['Cookie'] = cookies
|
||||
|
||||
return res
|
||||
|
||||
def _calc_cookies(self, info_dict):
|
||||
class _PseudoRequest(object):
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.headers = {}
|
||||
self.unverifiable = False
|
||||
|
||||
def add_unredirected_header(self, k, v):
|
||||
self.headers[k] = v
|
||||
|
||||
def get_full_url(self):
|
||||
return self.url
|
||||
|
||||
def is_unverifiable(self):
|
||||
return self.unverifiable
|
||||
|
||||
def has_header(self, h):
|
||||
return h in self.headers
|
||||
|
||||
pr = _PseudoRequest(info_dict['url'])
|
||||
self.cookiejar.add_cookie_header(pr)
|
||||
return pr.headers.get('Cookie')
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
|
||||
@@ -876,9 +955,14 @@ class YoutubeDL(object):
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if thumbnails is None:
|
||||
thumbnail = info_dict.get('thumbnail')
|
||||
if thumbnail:
|
||||
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
||||
if thumbnails:
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('width'), t.get('height'), t.get('url')))
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
t.get('id'), t.get('url')))
|
||||
for t in thumbnails:
|
||||
if 'width' in t and 'height' in t:
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
@@ -930,6 +1014,11 @@ class YoutubeDL(object):
|
||||
# Automatically determine file extension if missing
|
||||
if 'ext' not in format:
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
# Add HTTP headers, so that external programs can use them from the
|
||||
# json output
|
||||
full_format_info = info_dict.copy()
|
||||
full_format_info.update(format)
|
||||
format['http_headers'] = self._calc_headers(full_format_info)
|
||||
|
||||
format_limit = self.params.get('format_limit', None)
|
||||
if format_limit:
|
||||
@@ -945,9 +1034,12 @@ class YoutubeDL(object):
|
||||
# element in the 'formats' field in info_dict is info_dict itself,
|
||||
# wich can't be exported to json
|
||||
info_dict['formats'] = formats
|
||||
if self.params.get('listformats', None):
|
||||
if self.params.get('listformats'):
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
if self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(info_dict)
|
||||
return
|
||||
|
||||
req_format = self.params.get('format')
|
||||
if req_format is None:
|
||||
@@ -982,6 +1074,7 @@ class YoutubeDL(object):
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'format_id': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
@@ -1043,7 +1136,7 @@ class YoutubeDL(object):
|
||||
|
||||
self._num_downloads += 1
|
||||
|
||||
filename = self.prepare_filename(info_dict)
|
||||
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
@@ -1068,10 +1161,7 @@ class YoutubeDL(object):
|
||||
if self.params.get('forceformat', False):
|
||||
self.to_stdout(info_dict['format'])
|
||||
if self.params.get('forcejson', False):
|
||||
info_dict['_filename'] = filename
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
if self.params.get('dump_single_json', False):
|
||||
info_dict['_filename'] = filename
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
@@ -1154,35 +1244,18 @@ class YoutubeDL(object):
|
||||
self.report_error('Cannot write metadata to JSON file ' + infofn)
|
||||
return
|
||||
|
||||
if self.params.get('writethumbnail', False):
|
||||
if info_dict.get('thumbnail') is not None:
|
||||
thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
|
||||
thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||
self.to_screen('[%s] %s: Thumbnail is already present' %
|
||||
(info_dict['extractor'], info_dict['id']))
|
||||
else:
|
||||
self.to_screen('[%s] %s: Downloading thumbnail ...' %
|
||||
(info_dict['extractor'], info_dict['id']))
|
||||
try:
|
||||
uf = self.urlopen(info_dict['thumbnail'])
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_warning('Unable to download thumbnail "%s": %s' %
|
||||
(info_dict['thumbnail'], compat_str(err)))
|
||||
self._write_thumbnails(info_dict, filename)
|
||||
|
||||
if not self.params.get('skip_download', False):
|
||||
try:
|
||||
def dl(name, info):
|
||||
fd = get_suitable_downloader(info)(self, self.params)
|
||||
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
if self.params.get('verbose'):
|
||||
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
||||
return fd.download(name, info)
|
||||
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
downloaded = []
|
||||
success = True
|
||||
@@ -1218,11 +1291,12 @@ class YoutubeDL(object):
|
||||
|
||||
if success:
|
||||
# Fixup content
|
||||
fixup_policy = self.params.get('fixup')
|
||||
if fixup_policy is None:
|
||||
fixup_policy = 'detect_or_warn'
|
||||
|
||||
stretched_ratio = info_dict.get('stretched_ratio')
|
||||
if stretched_ratio is not None and stretched_ratio != 1:
|
||||
fixup_policy = self.params.get('fixup')
|
||||
if fixup_policy is None:
|
||||
fixup_policy = 'detect_or_warn'
|
||||
if fixup_policy == 'warn':
|
||||
self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
|
||||
info_dict['id'], stretched_ratio))
|
||||
@@ -1236,7 +1310,23 @@ class YoutubeDL(object):
|
||||
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
|
||||
info_dict['id'], stretched_ratio))
|
||||
else:
|
||||
assert fixup_policy == 'ignore'
|
||||
assert fixup_policy in ('ignore', 'never')
|
||||
|
||||
if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
|
||||
if fixup_policy == 'warn':
|
||||
self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
|
||||
info_dict['id']))
|
||||
elif fixup_policy == 'detect_or_warn':
|
||||
fixup_pp = FFmpegFixupM4aPP(self)
|
||||
if fixup_pp.available:
|
||||
info_dict.setdefault('__postprocessors', [])
|
||||
info_dict['__postprocessors'].append(fixup_pp)
|
||||
else:
|
||||
self.report_warning(
|
||||
'%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
|
||||
info_dict['id']))
|
||||
else:
|
||||
assert fixup_policy in ('ignore', 'never')
|
||||
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
@@ -1438,8 +1528,26 @@ class YoutubeDL(object):
|
||||
header_line = line({
|
||||
'format_id': 'format code', 'ext': 'extension',
|
||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
||||
self.to_screen('[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
||||
self.to_screen(
|
||||
'[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
||||
|
||||
def list_thumbnails(self, info_dict):
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if not thumbnails:
|
||||
tn_url = info_dict.get('thumbnail')
|
||||
if tn_url:
|
||||
thumbnails = [{'id': '0', 'url': tn_url}]
|
||||
else:
|
||||
self.to_screen(
|
||||
'[info] No thumbnails present for %s' % info_dict['id'])
|
||||
return
|
||||
|
||||
self.to_screen(
|
||||
'[info] Thumbnails for %s:' % info_dict['id'])
|
||||
self.to_screen(render_table(
|
||||
['ID', 'width', 'height', 'URL'],
|
||||
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
@@ -1585,3 +1693,39 @@ class YoutubeDL(object):
|
||||
if encoding is None:
|
||||
encoding = preferredencoding()
|
||||
return encoding
|
||||
|
||||
def _write_thumbnails(self, info_dict, filename):
|
||||
if self.params.get('writethumbnail', False):
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if thumbnails:
|
||||
thumbnails = [thumbnails[-1]]
|
||||
elif self.params.get('write_all_thumbnails', False):
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
else:
|
||||
return
|
||||
|
||||
if not thumbnails:
|
||||
# No thumbnails present, so return immediately
|
||||
return
|
||||
|
||||
for t in thumbnails:
|
||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
|
||||
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
||||
thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
||||
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
||||
else:
|
||||
self.to_screen('[%s] %s: Downloading thumbnail %s...' %
|
||||
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
||||
try:
|
||||
uf = self.urlopen(t['url'])
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
|
||||
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_warning('Unable to download thumbnail "%s": %s' %
|
||||
(t['url'], compat_str(err)))
|
||||
|
||||
@@ -143,10 +143,13 @@ def _real_main(argv=None):
|
||||
parser.error('invalid max_filesize specified')
|
||||
opts.max_filesize = numeric_limit
|
||||
if opts.retries is not None:
|
||||
try:
|
||||
opts.retries = int(opts.retries)
|
||||
except (TypeError, ValueError):
|
||||
parser.error('invalid retry count specified')
|
||||
if opts.retries in ('inf', 'infinite'):
|
||||
opts_retries = float('inf')
|
||||
else:
|
||||
try:
|
||||
opts_retries = int(opts.retries)
|
||||
except (TypeError, ValueError):
|
||||
parser.error('invalid retry count specified')
|
||||
if opts.buffersize is not None:
|
||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||
if numeric_buffersize is None:
|
||||
@@ -238,6 +241,12 @@ def _real_main(argv=None):
|
||||
'verboseOutput': opts.verbose,
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
})
|
||||
if opts.xattr_set_filesize:
|
||||
try:
|
||||
import xattr
|
||||
xattr # Confuse flake8
|
||||
except ImportError:
|
||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||
|
||||
ydl_opts = {
|
||||
'usenetrc': opts.usenetrc,
|
||||
@@ -268,7 +277,7 @@ def _real_main(argv=None):
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'ratelimit': opts.ratelimit,
|
||||
'nooverwrites': opts.nooverwrites,
|
||||
'retries': opts.retries,
|
||||
'retries': opts_retries,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'continuedl': opts.continue_dl,
|
||||
@@ -286,6 +295,7 @@ def _real_main(argv=None):
|
||||
'writeannotations': opts.writeannotations,
|
||||
'writeinfojson': opts.writeinfojson,
|
||||
'writethumbnail': opts.writethumbnail,
|
||||
'write_all_thumbnails': opts.write_all_thumbnails,
|
||||
'writesubtitles': opts.writesubtitles,
|
||||
'writeautomaticsub': opts.writeautomaticsub,
|
||||
'allsubtitles': opts.allsubtitles,
|
||||
@@ -329,6 +339,11 @@ def _real_main(argv=None):
|
||||
'fixup': opts.fixup,
|
||||
'source_address': opts.source_address,
|
||||
'call_home': opts.call_home,
|
||||
'sleep_interval': opts.sleep_interval,
|
||||
'external_downloader': opts.external_downloader,
|
||||
'list_thumbnails': opts.list_thumbnails,
|
||||
'playlist_items': opts.playlist_items,
|
||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
@@ -346,7 +361,9 @@ def _real_main(argv=None):
|
||||
sys.exit()
|
||||
|
||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||
parser.error('you must provide at least one URL')
|
||||
parser.error(
|
||||
'You must provide at least one URL.\n'
|
||||
'Type youtube-dl --help to see a list of all options.')
|
||||
|
||||
try:
|
||||
if opts.load_info_filename is not None:
|
||||
|
||||
@@ -71,6 +71,11 @@ try:
|
||||
except ImportError:
|
||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||
|
||||
try:
|
||||
import http.server as compat_http_server
|
||||
except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
except ImportError:
|
||||
@@ -365,6 +370,7 @@ __all__ = [
|
||||
'compat_html_entities',
|
||||
'compat_html_parser',
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_kwargs',
|
||||
'compat_ord',
|
||||
'compat_parse_qs',
|
||||
|
||||
@@ -1,35 +1,41 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import FileDownloader
|
||||
from .external import get_external_downloader
|
||||
from .f4m import F4mFD
|
||||
from .hls import HlsFD
|
||||
from .hls import NativeHlsFD
|
||||
from .http import HttpFD
|
||||
from .mplayer import MplayerFD
|
||||
from .rtmp import RtmpFD
|
||||
from .f4m import F4mFD
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
)
|
||||
|
||||
PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
'm3u8_native': NativeHlsFD,
|
||||
'm3u8': HlsFD,
|
||||
'mms': MplayerFD,
|
||||
'rtsp': MplayerFD,
|
||||
'f4m': F4mFD,
|
||||
}
|
||||
|
||||
def get_suitable_downloader(info_dict):
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
url = info_dict['url']
|
||||
protocol = info_dict.get('protocol')
|
||||
protocol = determine_protocol(info_dict)
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
external_downloader = params.get('external_downloader')
|
||||
if external_downloader is not None:
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.supports(info_dict):
|
||||
return ed
|
||||
|
||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||
|
||||
if url.startswith('rtmp'):
|
||||
return RtmpFD
|
||||
if protocol == 'm3u8_native':
|
||||
return NativeHlsFD
|
||||
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
||||
return HlsFD
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return MplayerFD
|
||||
if determine_ext(url) == 'f4m':
|
||||
return F4mFD
|
||||
else:
|
||||
return HttpFD
|
||||
|
||||
__all__ = [
|
||||
'get_suitable_downloader',
|
||||
|
||||
@@ -25,21 +25,23 @@ class FileDownloader(object):
|
||||
|
||||
Available options:
|
||||
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
noprogress: Do not print the progress bar.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
nopart: Do not use temporary .part files.
|
||||
updatetime: Use the Last-modified header to set output file timestamps.
|
||||
test: Download only first bytes to test the downloader.
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
noprogress: Do not print the progress bar.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
nopart: Do not use temporary .part files.
|
||||
updatetime: Use the Last-modified header to set output file timestamps.
|
||||
test: Download only first bytes to test the downloader.
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
(experimenatal)
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
@@ -284,6 +286,7 @@ class FileDownloader(object):
|
||||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
self.params.get('nooverwrites', False)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
@@ -305,6 +308,11 @@ class FileDownloader(object):
|
||||
})
|
||||
return True
|
||||
|
||||
sleep_interval = self.params.get('sleep_interval')
|
||||
if sleep_interval:
|
||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
||||
time.sleep(sleep_interval)
|
||||
|
||||
return self.real_download(filename, info_dict)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
@@ -319,3 +327,24 @@ class FileDownloader(object):
|
||||
# See YoutubeDl.py (search for progress_hooks) for a description of
|
||||
# this interface
|
||||
self._progress_hooks.append(ph)
|
||||
|
||||
def _debug_cmd(self, args, subprocess_encoding, exe=None):
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
|
||||
if exe is None:
|
||||
exe = os.path.basename(args[0])
|
||||
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen('[debug] %s command line: %s' % (
|
||||
exe, shell_quote(str_args)))
|
||||
|
||||
117
youtube_dl/downloader/external.py
Normal file
117
youtube_dl/downloader/external.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class ExternalFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
retval = self._call_downloader(tmpfilename, info_dict)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('%s exited with code %d' % (
|
||||
self.get_basename(), retval))
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def get_basename(cls):
|
||||
return cls.__name__[:-2].lower()
|
||||
|
||||
@property
|
||||
def exe(self):
|
||||
return self.params.get('external_downloader')
|
||||
|
||||
@classmethod
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = self._make_cmd(tmpfilename, info_dict)
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
self._debug_cmd(cmd, subprocess_encoding)
|
||||
|
||||
p = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
self.to_stderr(stderr)
|
||||
return p.returncode
|
||||
|
||||
|
||||
class CurlFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class WgetFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class Aria2cFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [
|
||||
self.exe, '-c',
|
||||
'--min-split-size', '1M', '--max-connection-per-server', '4']
|
||||
dn = os.path.dirname(tmpfilename)
|
||||
if dn:
|
||||
cmd += ['--dir', dn]
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
_BY_NAME = dict(
|
||||
(klass.get_basename(), klass)
|
||||
for name, klass in globals().items()
|
||||
if name.endswith('FD') and name != 'ExternalFD'
|
||||
)
|
||||
|
||||
|
||||
def list_external_downloaders():
|
||||
return sorted(_BY_NAME.keys())
|
||||
|
||||
|
||||
def get_external_downloader(external_downloader):
|
||||
""" Given the name of the executable, see whether we support the given
|
||||
downloader . """
|
||||
bn = os.path.basename(external_downloader)
|
||||
return _BY_NAME[bn]
|
||||
@@ -177,13 +177,12 @@ def build_fragments_list(boot_info):
|
||||
""" Return a list of (segment, fragment) for each fragment in the video """
|
||||
res = []
|
||||
segment_run_table = boot_info['segments'][0]
|
||||
# I've only found videos with one segment
|
||||
segment_run_entry = segment_run_table['segment_run'][0]
|
||||
n_frags = segment_run_entry[1]
|
||||
fragment_run_entry_table = boot_info['fragments'][0]['fragments']
|
||||
first_frag_number = fragment_run_entry_table[0]['first']
|
||||
for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)):
|
||||
res.append((1, frag_number))
|
||||
fragments_counter = itertools.count(first_frag_number)
|
||||
for segment, fragments_count in segment_run_table['segment_run']:
|
||||
for _ in range(fragments_count):
|
||||
res.append((segment, next(fragments_counter)))
|
||||
return res
|
||||
|
||||
|
||||
|
||||
@@ -24,10 +24,6 @@ class HttpFD(FileDownloader):
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
if 'user_agent' in info_dict:
|
||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||
if 'http_referer' in info_dict:
|
||||
headers['Referer'] = info_dict['http_referer']
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
@@ -161,6 +157,14 @@ class HttpFD(FileDownloader):
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
import xattr
|
||||
xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
|
||||
except(OSError, IOError, ImportError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
|
||||
@@ -104,6 +104,9 @@ class RtmpFD(FileDownloader):
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
protocol = info_dict.get('rtmp_protocol', None)
|
||||
real_time = info_dict.get('rtmp_real_time', False)
|
||||
no_resume = info_dict.get('no_resume', False)
|
||||
continue_dl = info_dict.get('continuedl', False)
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
@@ -141,7 +144,14 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--conn', conn]
|
||||
if protocol is not None:
|
||||
basic_args += ['--protocol', protocol]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
||||
if real_time:
|
||||
basic_args += ['--realtime']
|
||||
|
||||
args = basic_args
|
||||
if not no_resume and continue_dl and not live:
|
||||
args += ['--resume']
|
||||
if not live and continue_dl:
|
||||
args += ['--skip', '1']
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
@@ -152,19 +162,7 @@ class RtmpFD(FileDownloader):
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
|
||||
if self.params.get('verbose', False):
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
self._debug_cmd(args, subprocess_encoding, exe='rtmpdump')
|
||||
|
||||
RD_SUCCESS = 0
|
||||
RD_FAILED = 1
|
||||
|
||||
@@ -29,7 +29,6 @@ from .arte import (
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .auengine import AUEngineIE
|
||||
from .azubu import AzubuIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
@@ -83,6 +82,7 @@ from .crunchyroll import (
|
||||
CrunchyrollShowPlaylistIE
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
@@ -90,6 +90,7 @@ from .dailymotion import (
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
@@ -285,6 +286,12 @@ from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
NextMediaIE,
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyRealtimeNewsIE,
|
||||
AppleDailyAnimationNewsIE
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
@@ -350,6 +357,7 @@ from .rtbf import RTBFIE
|
||||
from .rte import RteIE
|
||||
from .rtlnl import RtlXlIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||
@@ -467,6 +475,7 @@ from .twitch import (
|
||||
TwitchVodIE,
|
||||
TwitchProfileIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchBookmarksIE,
|
||||
TwitchStreamIE,
|
||||
)
|
||||
from .ubu import UbuIE
|
||||
@@ -545,6 +554,7 @@ from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
|
||||
@@ -129,7 +129,9 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'user_agent': 'QuickTime compatible (youtube-dl)',
|
||||
'http_headers': {
|
||||
'User-Agent': 'QuickTime compatible (youtube-dl)',
|
||||
},
|
||||
})
|
||||
|
||||
return {
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import time
|
||||
import hmac
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
@@ -17,7 +17,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
class AtresPlayerIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -95,7 +95,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
for fmt in ['windows', 'android_tablet']:
|
||||
request = compat_urllib_request.Request(
|
||||
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
||||
request.add_header('Youtubedl-user-agent', self._USER_AGENT)
|
||||
request.add_header('User-Agent', self._USER_AGENT)
|
||||
|
||||
fmt_json = self._download_json(
|
||||
request, video_id, 'Downloading %s video JSON' % fmt)
|
||||
@@ -105,13 +105,22 @@ class AtresPlayerIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
||||
|
||||
for _, video_url in fmt_json['resultObject'].items():
|
||||
for format_id, video_url in fmt_json['resultObject'].items():
|
||||
if format_id == 'token' or not video_url.startswith('http'):
|
||||
continue
|
||||
if video_url.endswith('/Manifest'):
|
||||
formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
|
||||
if 'geodeswowsmpra3player' in video_url:
|
||||
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
||||
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
||||
# this videos are protected by DRM, the f4m downloader doesn't support them
|
||||
continue
|
||||
else:
|
||||
f4m_url = video_url[:-9] + '/manifest.f4m'
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'android',
|
||||
'format_id': 'android-%s' % format_id,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
@@ -134,6 +143,15 @@ class AtresPlayerIE(InfoExtractor):
|
||||
description = xpath_text(art, './description', 'description')
|
||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||
if subtitle:
|
||||
subtitles['es'] = subtitle
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -141,4 +159,5 @@ class AtresPlayerIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
||||
}
|
||||
|
||||
@@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
# Album playlist ripped from fakeshoredrive with no metadata
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
|
||||
'info_dict': {
|
||||
'title': 'PPP (Pistol P Project)',
|
||||
'id': '837572',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
|
||||
'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
|
||||
'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
|
||||
'id': '837577',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
'playliststart': 8,
|
||||
'playlistend': 8,
|
||||
'playliststart': 9,
|
||||
'playlistend': 9,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class AUEngineIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
||||
'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
|
||||
'info_dict': {
|
||||
'id': 'lfvlytY6',
|
||||
'ext': 'mp4',
|
||||
'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
|
||||
video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
|
||||
video_url = compat_urllib_parse.unquote(video_urls[0])
|
||||
thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
|
||||
thumbnail = compat_urllib_parse.unquote(thumbnails[0])
|
||||
|
||||
if not video_url:
|
||||
raise ExtractorError('Could not find video URL')
|
||||
|
||||
ext = '.' + determine_ext(video_url)
|
||||
title = remove_end(title, ext)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
|
||||
}
|
||||
@@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||
# when we request with a common UA
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Youtubedl-user-agent', 'youtube-dl')
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
return self._download_webpage(req, None, note=False)
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
_translation_table = {
|
||||
@@ -27,10 +25,10 @@ class CliphunterIE(InfoExtractor):
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'md5': 'a2ba71eebf523859fe527a61018f723e',
|
||||
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
||||
'info_dict': {
|
||||
'id': '1012420',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
@@ -44,39 +42,31 @@ class CliphunterIE(InfoExtractor):
|
||||
video_title = self._search_regex(
|
||||
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
||||
|
||||
pl_fiji = self._search_regex(
|
||||
r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
|
||||
pl_c_qual = self._search_regex(
|
||||
r'pl_c_qual = "(.)"', webpage, 'video quality')
|
||||
video_url = _decode(pl_fiji)
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': 'default-%s' % pl_c_qual,
|
||||
}]
|
||||
fmts = {}
|
||||
for fmt in ('mp4', 'flv'):
|
||||
fmt_list = self._parse_json(self._search_regex(
|
||||
r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id)
|
||||
for f in fmt_list:
|
||||
fmts[f['fname']] = _decode(f['sUrl'])
|
||||
|
||||
qualities_json = self._search_regex(
|
||||
r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
|
||||
qualities_data = json.loads(qualities_json)
|
||||
qualities = self._parse_json(self._search_regex(
|
||||
r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id)
|
||||
|
||||
for i, t in enumerate(
|
||||
re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
|
||||
quality_id, crypted_url = t
|
||||
video_url = _decode(crypted_url)
|
||||
formats = []
|
||||
for fname, url in fmts.items():
|
||||
f = {
|
||||
'format_id': quality_id,
|
||||
'url': video_url,
|
||||
'quality': i,
|
||||
'url': url,
|
||||
}
|
||||
if quality_id in qualities_data:
|
||||
qd = qualities_data[quality_id]
|
||||
m = re.match(
|
||||
r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
|
||||
\s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
|
||||
if m:
|
||||
f['width'] = int(m.group('width'))
|
||||
f['height'] = int(m.group('height'))
|
||||
f['tbr'] = int(m.group('tbr'))
|
||||
if fname in qualities:
|
||||
qual = qualities[fname]
|
||||
f.update({
|
||||
'format_id': '%s_%sp' % (determine_ext(url), qual['h']),
|
||||
'width': qual['w'],
|
||||
'height': qual['h'],
|
||||
'tbr': qual['br'],
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
|
||||
@@ -14,6 +14,7 @@ import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_HTTPError,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -26,6 +27,7 @@ from ..utils import (
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
@@ -108,15 +110,17 @@ class InfoExtractor(object):
|
||||
(quality takes higher priority)
|
||||
-1 for default (order by other properties),
|
||||
-2 or smaller for less than default.
|
||||
* http_referer HTTP Referer header value to set.
|
||||
* http_method HTTP method to use for the download.
|
||||
* http_headers A dictionary of additional HTTP headers
|
||||
to add to the request.
|
||||
* http_post_data Additional data to send with a POST
|
||||
request.
|
||||
* stretched_ratio If given and not 1, indicates that the
|
||||
video's pixels are not square.
|
||||
width : height ratio as float.
|
||||
video's pixels are not square.
|
||||
width : height ratio as float.
|
||||
* no_resume The server does not support resuming the
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
|
||||
url: Final video URL.
|
||||
ext: Video filename extension.
|
||||
format: The video format, defaults to ext (used for --get-format)
|
||||
@@ -130,7 +134,9 @@ class InfoExtractor(object):
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
and display_id "dancing-naked-mole-rats"
|
||||
thumbnails: A list of dictionaries, with the following entries:
|
||||
* "id" (optional, string) - Thumbnail format ID
|
||||
* "url"
|
||||
* "preference" (optional, int) - quality of the image
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
@@ -712,6 +718,27 @@ class InfoExtractor(object):
|
||||
)
|
||||
formats.sort(key=_formats_key)
|
||||
|
||||
def _check_formats(self, formats, video_id):
|
||||
if formats:
|
||||
formats[:] = filter(
|
||||
lambda f: self._is_valid_url(
|
||||
f['url'], video_id,
|
||||
item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
|
||||
formats)
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video'):
|
||||
try:
|
||||
self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
'Checking %s URL' % item)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
self.report_warning(
|
||||
'%s URL is invalid, skipping' % item, video_id)
|
||||
return False
|
||||
raise
|
||||
|
||||
def http_scheme(self):
|
||||
""" Either "http:" or "https:", depending on the user's preferences """
|
||||
return (
|
||||
|
||||
93
youtube_dl/extractor/ctsnews.py
Normal file
93
youtube_dl/extractor/ctsnews.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, ExtractorError
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||
'md5': 'a9875cb790252b08431186d741beaabe',
|
||||
'info_dict': {
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
}, {
|
||||
# News count not appear on page but still available in database
|
||||
'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html',
|
||||
'md5': '3aee7e0df7cdff94e43581f54c22619e',
|
||||
'info_dict': {
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'description': 'md5:f183feeba3752b683827aab71adad584',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
'upload_date': '20130903',
|
||||
}
|
||||
}, {
|
||||
# With Youtube embedded video
|
||||
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
||||
'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': 'OVbfO7d0_hQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPhone6熱銷 蘋果財報亮眼',
|
||||
'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150128',
|
||||
'uploader_id': 'TBSCTS',
|
||||
'uploader': '中華電視公司',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
|
||||
if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
|
||||
feed_url = self._html_search_regex(
|
||||
r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
|
||||
page, 'feed url')
|
||||
video_url = self._download_webpage(
|
||||
feed_url, news_id, note='Fetching feed')
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
|
||||
default=None)
|
||||
if not youtube_url:
|
||||
raise ExtractorError('The news includes no videos!', expected=True)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
||||
description = self._html_search_meta('description', page)
|
||||
title = self._html_search_meta('title', page)
|
||||
thumbnail = self._html_search_meta('image', page)
|
||||
|
||||
datetime_str = self._html_search_regex(
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
|
||||
# Transform into ISO 8601 format with timezone info
|
||||
datetime_str = datetime_str.replace('/', '-') + ':00+0800'
|
||||
timestamp = parse_iso8601(datetime_str, delimiter=' ')
|
||||
|
||||
return {
|
||||
'id': news_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
57
youtube_dl/extractor/dctp.py
Normal file
57
youtube_dl/extractor/dctp.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_TEST = {
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'info_dict': {
|
||||
'id': '1324',
|
||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
|
||||
version_json = self._download_json(
|
||||
base_url + 'version.json',
|
||||
video_id, note='Determining file version')
|
||||
version = version_json['version_name']
|
||||
info_json = self._download_json(
|
||||
'{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
|
||||
video_id, note='Fetching object ID')
|
||||
object_id = compat_str(info_json['object_id'])
|
||||
meta_json = self._download_json(
|
||||
'{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
|
||||
video_id, note='Downloading metadata')
|
||||
uuid = meta_json['uuid']
|
||||
title = meta_json['title']
|
||||
wide = meta_json['is_wide']
|
||||
if wide:
|
||||
ratio = '16x9'
|
||||
else:
|
||||
ratio = '4x3'
|
||||
play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
|
||||
|
||||
servers_json = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/',
|
||||
video_id, note='Downloading server list')
|
||||
url = servers_json[0]['endpoint']
|
||||
|
||||
return {
|
||||
'id': object_id,
|
||||
'title': title,
|
||||
'format': 'rtmp',
|
||||
'url': url,
|
||||
'play_path': play_path,
|
||||
'rtmp_real_time': True,
|
||||
'ext': 'flv',
|
||||
'display_id': video_id
|
||||
}
|
||||
@@ -48,14 +48,20 @@ class DRTVIE(SubtitlesInfoExtractor):
|
||||
elif asset['Kind'] == 'VideoResource':
|
||||
duration = asset['DurationInMilliseconds'] / 1000.0
|
||||
restricted_to_denmark = asset['RestrictedToDenmark']
|
||||
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
|
||||
for link in asset['Links']:
|
||||
target = link['Target']
|
||||
uri = link['Uri']
|
||||
format_id = target
|
||||
preference = -1 if target == 'HDS' else -2
|
||||
if spoken_subtitles:
|
||||
preference -= 2
|
||||
format_id += '-spoken-subtitles'
|
||||
formats.append({
|
||||
'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
|
||||
'format_id': target,
|
||||
'format_id': format_id,
|
||||
'ext': link['FileFormat'],
|
||||
'preference': -1 if target == 'HDS' else -2,
|
||||
'preference': preference,
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
if isinstance(subtitles_list, list):
|
||||
|
||||
@@ -5,6 +5,7 @@ import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -16,7 +17,8 @@ from ..utils import (
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_TEST = {
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
|
||||
'md5': 'a6ebe8ebe0396518689d963774a54eb7',
|
||||
'info_dict': {
|
||||
@@ -24,12 +26,57 @@ class FC2IE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'Boxing again with Puff',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
|
||||
'info_dict': {
|
||||
'id': '20150125cEva0hDn',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'username': 'ytdl@yt-dl.org',
|
||||
'password': '(snip)',
|
||||
'skip': 'requires actual password'
|
||||
}
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
return False
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
'email': username,
|
||||
'password': password,
|
||||
'done': 'video',
|
||||
'Submit': ' Login ',
|
||||
}
|
||||
|
||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||
# chokes on unicode
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||
request = compat_urllib_request.Request(
|
||||
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||
|
||||
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
|
||||
if 'mode=redirect&login=done' not in login_results:
|
||||
self.report_warning('unable to log in: bad username or password')
|
||||
return False
|
||||
|
||||
# this is also needed
|
||||
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
self._download_webpage(
|
||||
login_redir, None, note='Login redirect', errnote='Login redirect failed')
|
||||
|
||||
return True
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._login()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
@@ -46,7 +93,12 @@ class FC2IE(InfoExtractor):
|
||||
info = compat_urlparse.parse_qs(info_webpage)
|
||||
|
||||
if 'err_code' in info:
|
||||
raise ExtractorError('Error code: %s' % info['err_code'][0])
|
||||
# most of the time we can still download wideo even if err_code is 403 or 602
|
||||
self.report_warning(
|
||||
'Error code was: %s... but still trying' % info['err_code'][0])
|
||||
|
||||
if 'filepath' not in info:
|
||||
raise ExtractorError('Cannot download file. Are you logged in?')
|
||||
|
||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||
title_info = info.get('title')
|
||||
|
||||
@@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
|
||||
_TEST = {
|
||||
'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
|
||||
'md5': '6269e8626fa1a891bf5369b386ae996a',
|
||||
'info_dict': {
|
||||
'id': '1165642',
|
||||
'ext': 'mp4',
|
||||
@@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor):
|
||||
'upload_date': '20141120',
|
||||
'duration': 3960,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'rtmpdump required',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -29,9 +27,7 @@ class GameStarIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
og_title = self._og_search_title(webpage)
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
is_html,
|
||||
orderedSet,
|
||||
parse_xml,
|
||||
smuggle_url,
|
||||
@@ -361,7 +362,7 @@ class GenericIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||
'title': 'Zero Punctuation',
|
||||
'description': 're:'
|
||||
'description': 're:.*groundbreaking video review series.*'
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
},
|
||||
@@ -488,6 +489,29 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
|
||||
}
|
||||
},
|
||||
# Cinerama player
|
||||
{
|
||||
'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
|
||||
'info_dict': {
|
||||
'id': '730m_DandD_1901_512k',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'www.abc.net.au',
|
||||
'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
|
||||
}
|
||||
},
|
||||
# embedded viddler video
|
||||
{
|
||||
'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'deadspin',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'timestamp': 1422285291,
|
||||
'upload_date': '20150126',
|
||||
},
|
||||
'add_ie': ['Viddler'],
|
||||
}
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -647,7 +671,7 @@ class GenericIE(InfoExtractor):
|
||||
# Maybe it's a direct link to a video?
|
||||
# Be careful not to download the whole thing!
|
||||
first_bytes = full_response.read(512)
|
||||
if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
|
||||
if not is_html(first_bytes):
|
||||
self._downloader.report_warning(
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
upload_date = unified_strdate(
|
||||
@@ -849,9 +873,16 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded Viddler player
|
||||
mobj = re.search(
|
||||
r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
if mobj is not None:
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
@@ -1045,6 +1076,10 @@ class GenericIE(InfoExtractor):
|
||||
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||
["']?url["']?\s*:\s*["']([^"']+)["']
|
||||
''', webpage))
|
||||
if not found:
|
||||
# Cinerama player
|
||||
found = re.findall(
|
||||
r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
|
||||
if not found:
|
||||
# Try to find twitter cards info
|
||||
found = filter_video(re.findall(
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
@@ -63,29 +63,34 @@ class IviIE(InfoExtractor):
|
||||
return int(m.group('commentcount')) if m is not None else 0
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_url = 'http://api.digitalaccess.ru/api/json/'
|
||||
|
||||
data = {'method': 'da.content.get',
|
||||
'params': [video_id, {'site': 's183',
|
||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||
'contentid': video_id
|
||||
}
|
||||
]
|
||||
data = {
|
||||
'method': 'da.content.get',
|
||||
'params': [
|
||||
video_id, {
|
||||
'site': 's183',
|
||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||
'contentid': video_id
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
||||
|
||||
video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
|
||||
video_json_page = self._download_webpage(
|
||||
request, video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
|
||||
if 'error' in video_json:
|
||||
error = video_json['error']
|
||||
if error['origin'] == 'NoRedisValidData':
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)
|
||||
raise ExtractorError(
|
||||
'Unable to download video %s: %s' % (video_id, error['message']),
|
||||
expected=True)
|
||||
|
||||
result = video_json['result']
|
||||
|
||||
|
||||
@@ -2,18 +2,17 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class KrasViewIE(InfoExtractor):
|
||||
IE_DESC = 'Красвью'
|
||||
_VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://krasview.ru/video/512228',
|
||||
@@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
flashvars = json.loads(self._search_regex(
|
||||
r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
|
||||
flashvars = json.loads(js_to_json(self._search_regex(
|
||||
r'video_Init\(({.+?})', webpage, 'flashvars')))
|
||||
|
||||
video_url = flashvars['url']
|
||||
title = unescapeHTML(flashvars['title'])
|
||||
description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
|
||||
thumbnail = flashvars['image']
|
||||
duration = int(flashvars['duration'])
|
||||
filesize = int(flashvars['size'])
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(flashvars.get('duration'))
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
||||
|
||||
@@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor):
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
@@ -8,20 +8,20 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
'md5': '50f79e05ba149149c1b4ea961223d5b3',
|
||||
'info_dict': {
|
||||
'id': '757_1364311680',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'description': 'extremely bad day for this guy..!',
|
||||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||
'md5': 'b13a29626183c9d33944e6a04f41aafc',
|
||||
'info_dict': {
|
||||
'id': 'f93_1390833151',
|
||||
'ext': 'mp4',
|
||||
@@ -43,8 +43,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||
@@ -81,9 +80,19 @@ class LiveLeakIE(InfoExtractor):
|
||||
sources = json.loads(sources_json)
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s' % i,
|
||||
'format_note': s.get('label'),
|
||||
'url': s['file'],
|
||||
} for s in sources]
|
||||
} for i, s in enumerate(sources)]
|
||||
for i, s in enumerate(sources):
|
||||
orig_url = s['file'].replace('.h264_base.mp4', '')
|
||||
if s['file'] != orig_url:
|
||||
formats.append({
|
||||
'format_id': 'original-%s' % i,
|
||||
'format_note': s.get('label'),
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -6,13 +6,12 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi\-video/(?P<show>[^/]+)/ziurek\-(?P<display_id>[A-Za-z0-9\-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
|
||||
'info_dict': {
|
||||
@@ -51,8 +50,7 @@ class LnkGoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Downloading player webpage')
|
||||
@@ -61,6 +59,8 @@ class LnkGoIE(InfoExtractor):
|
||||
r'data-ep="([^"]+)"', webpage, 'video ID')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
||||
|
||||
thumbnail_w = int_or_none(
|
||||
self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
|
||||
@@ -75,39 +75,28 @@ class LnkGoIE(InfoExtractor):
|
||||
'height': thumbnail_h,
|
||||
})
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="meta-item\sair-time">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'VideoDuration = "([^"]+)"', webpage, 'duration', fatal=False))
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
|
||||
|
||||
pg_rating = self._search_regex(
|
||||
r'pgrating="([^"]+)"', webpage, 'PG rating', fatal=False, default='')
|
||||
age_limit = self._AGE_LIMITS.get(pg_rating.upper(), 0)
|
||||
if config.get('pGeo'):
|
||||
self.report_warning(
|
||||
'This content might not be available in your country due to copyright reasons')
|
||||
|
||||
sources_js = self._search_regex(
|
||||
r'(?s)sources:\s(\[.*?\]),', webpage, 'sources')
|
||||
sources = self._parse_json(
|
||||
sources_js, video_id, transform_source=js_to_json)
|
||||
formats = [{
|
||||
'format_id': 'hls',
|
||||
'ext': 'mp4',
|
||||
'url': config['EpisodeVideoLink_HLS'],
|
||||
}]
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source.get('provider') == 'rtmp':
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', source['file'])
|
||||
if not m:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'play_path': m.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
elif source.get('file').endswith('.m3u8'):
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'ext': source.get('type', 'mp4'),
|
||||
'url': source['file'],
|
||||
})
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
|
||||
if m:
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'play_path': m.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -117,8 +106,8 @@ class LnkGoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': [thumbnail],
|
||||
'duration': duration,
|
||||
'duration': int_or_none(config.get('VideoTime')),
|
||||
'description': description,
|
||||
'age_limit': age_limit,
|
||||
'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
@@ -85,6 +85,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
} for format_id, video_url in prioritized_streams['0'].items()
|
||||
])
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
|
||||
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
# Otherwise we get a webpage that would execute some javascript
|
||||
req.add_header('Youtubedl-user-agent', 'curl/7')
|
||||
req.add_header('User-Agent', 'curl/7')
|
||||
webpage = self._download_webpage(req, mtvn_id,
|
||||
'Downloading mobile page')
|
||||
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||
|
||||
163
youtube_dl/extractor/nextmedia.py
Normal file
163
youtube_dl/extractor/nextmedia.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class NextMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
||||
'md5': 'dff9fad7009311c421176d1ac90bfe4f',
|
||||
'info_dict': {
|
||||
'id': '53109199',
|
||||
'ext': 'mp4',
|
||||
'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:28222b9912b6665a21011b034c70fcc7',
|
||||
'timestamp': 1415456273,
|
||||
'upload_date': '20141108',
|
||||
}
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{ url: \'(.+)\' \}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
return self._extract_from_nextmedia_page(news_id, url, page)
|
||||
|
||||
def _extract_from_nextmedia_page(self, news_id, url, page):
|
||||
title = self._fetch_title(page)
|
||||
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
||||
|
||||
attrs = {
|
||||
'id': news_id,
|
||||
'title': title,
|
||||
'url': video_url, # ext can be inferred from url
|
||||
'thumbnail': self._fetch_thumbnail(page),
|
||||
'description': self._fetch_description(page),
|
||||
}
|
||||
|
||||
timestamp = self._fetch_timestamp(page)
|
||||
if timestamp:
|
||||
attrs['timestamp'] = timestamp
|
||||
else:
|
||||
attrs['upload_date'] = self._fetch_upload_date(url)
|
||||
|
||||
return attrs
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._og_search_title(page)
|
||||
|
||||
def _fetch_thumbnail(self, page):
|
||||
return self._og_search_thumbnail(page)
|
||||
|
||||
def _fetch_timestamp(self, page):
|
||||
dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
|
||||
return parse_iso8601(dateCreated)
|
||||
|
||||
def _fetch_upload_date(self, url):
|
||||
return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._og_search_property('description', page)
|
||||
|
||||
|
||||
class NextMediaActionNewsIE(NextMediaIE):
|
||||
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
||||
'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
|
||||
'info_dict': {
|
||||
'id': '19009428',
|
||||
'ext': 'mp4',
|
||||
'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
|
||||
'timestamp': 1421791200,
|
||||
'upload_date': '20150120',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
actionnews_page = self._download_webpage(url, news_id)
|
||||
article_url = self._og_search_url(actionnews_page)
|
||||
article_page = self._download_webpage(article_url, news_id)
|
||||
return self._extract_from_nextmedia_page(news_id, url, article_page)
|
||||
|
||||
|
||||
class AppleDailyRealtimeNewsIE(NextMediaIE):
|
||||
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||
'info_dict': {
|
||||
'id': '36354694',
|
||||
'ext': 'mp4',
|
||||
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:b23787119933404ce515c6356a8c355c',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
|
||||
'md5': '86b4e9132d158279c7883822d94ccc49',
|
||||
'info_dict': {
|
||||
'id': '550549',
|
||||
'ext': 'mp4',
|
||||
'title': '不滿被踩腳 山東兩大媽一路打下車',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
|
||||
|
||||
def _fetch_thumbnail(self, page):
|
||||
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
|
||||
|
||||
def _fetch_timestamp(self, page):
|
||||
return None
|
||||
|
||||
|
||||
class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
|
||||
_VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
|
||||
'md5': '03df296d95dedc2d5886debbb80cb43f',
|
||||
'info_dict': {
|
||||
'id': '5003671',
|
||||
'ext': 'mp4',
|
||||
'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}, {
|
||||
# No thumbnail
|
||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
|
||||
'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
|
||||
'info_dict': {
|
||||
'id': '5003673',
|
||||
'ext': 'mp4',
|
||||
'title': '半夜尿尿 好像會看到___',
|
||||
'description': 'md5:61d2da7fe117fede148706cdb85ac066',
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'video thumbnail',
|
||||
]
|
||||
}]
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._html_search_meta('description', page, 'news title')
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._html_search_meta('description', page, 'news description')
|
||||
72
youtube_dl/extractor/rtl2.py
Normal file
72
youtube_dl/extractor/rtl2.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RTL2IE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
|
||||
'md5': 'bfcc179030535b08dc2b36b469b5adc7',
|
||||
'info_dict': {
|
||||
'id': 'folge-203-0',
|
||||
'ext': 'f4v',
|
||||
'title': 'GRIP sucht den Sommerkönig',
|
||||
'description': 'Matthias, Det und Helge treten gegeneinander an.'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
|
||||
'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
|
||||
'info_dict': {
|
||||
'id': '21040-anna-erwischt-alex',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anna erwischt Alex!',
|
||||
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Some rtl2 urls have no slash at the end, so append it.
|
||||
if not url.endswith('/'):
|
||||
url += '/'
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
vico_id = self._html_search_regex(
|
||||
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
||||
vivi_id = self._html_search_regex(
|
||||
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
||||
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
|
||||
webpage = self._download_webpage(info_url, '')
|
||||
|
||||
info = self._download_json(info_url, video_id)
|
||||
video_info = info['video']
|
||||
title = video_info['titel']
|
||||
description = video_info.get('beschreibung')
|
||||
thumbnail = video_info.get('image')
|
||||
|
||||
download_url = video_info['streamurl']
|
||||
download_url = download_url.replace('\\', '')
|
||||
stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL')
|
||||
rtmp_conn = ["S:connect", "O:1", "NS:pageUrl:" + url, "NB:fpad:0", "NN:videoFunction:1", "O:0"]
|
||||
|
||||
formats = [{
|
||||
'url': download_url,
|
||||
'play_path': stream_url,
|
||||
'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
|
||||
'page_url': url,
|
||||
'flash_version': 'LNX 11,2,202,429',
|
||||
'rtmp_conn': rtmp_conn,
|
||||
'no_resume': True,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -102,6 +102,7 @@ class SmotriIE(InfoExtractor):
|
||||
'uploader_id': 'mopeder',
|
||||
'duration': 71,
|
||||
'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
|
||||
'upload_date': '20150114',
|
||||
},
|
||||
},
|
||||
# swf player
|
||||
|
||||
@@ -4,14 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
|
||||
|
||||
@@ -72,16 +65,6 @@ class SpiegelIE(InfoExtractor):
|
||||
if n.tag.startswith('type') and n.tag != 'type6':
|
||||
format_id = n.tag.rpartition('type')[2]
|
||||
video_url = base_url + n.find('./filename').text
|
||||
# Test video URLs beforehand as some of them are invalid
|
||||
try:
|
||||
self._request_webpage(
|
||||
HEADRequest(video_url), video_id,
|
||||
'Checking %s video URL' % format_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
self.report_warning(
|
||||
'%s video URL is invalid, skipping' % format_id, video_id)
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
@@ -94,6 +77,7 @@ class SpiegelIE(InfoExtractor):
|
||||
})
|
||||
duration = float(idoc[0].findall('./duration')[0].text)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import js_to_json
|
||||
|
||||
|
||||
class SRMediathekIE(InfoExtractor):
|
||||
IE_DESC = 'Süddeutscher Rundfunk'
|
||||
IE_DESC = 'Saarländischer Rundfunk'
|
||||
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class TestTubeIE(InfoExtractor):
|
||||
@@ -46,13 +49,22 @@ class TestTubeIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(info.get('duration'))
|
||||
images = info.get('images')
|
||||
thumbnails = None
|
||||
preference = qualities(['mini', 'small', 'medium', 'large'])
|
||||
if images:
|
||||
thumbnails = [{
|
||||
'id': thumbnail_id,
|
||||
'url': img_url,
|
||||
'preference': preference(thumbnail_id)
|
||||
} for thumbnail_id, img_url in images.items()]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'description': info.get('summary'),
|
||||
'thumbnail': info.get('images', {}).get('large'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': info.get('show', {}).get('name'),
|
||||
'uploader_id': info.get('show', {}).get('slug'),
|
||||
'duration': duration,
|
||||
|
||||
@@ -220,12 +220,18 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
response = self._download_json(
|
||||
self._PLAYLIST_URL % (channel_id, offset, limit),
|
||||
channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
|
||||
videos = response['videos']
|
||||
if not videos:
|
||||
page_entries = self._extract_playlist_page(response)
|
||||
if not page_entries:
|
||||
break
|
||||
entries.extend([self.url_result(video['url']) for video in videos])
|
||||
entries.extend(page_entries)
|
||||
offset += limit
|
||||
return self.playlist_result(entries, channel_id, channel_name)
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry) for entry in set(entries)],
|
||||
channel_id, channel_name)
|
||||
|
||||
def _extract_playlist_page(self, response):
|
||||
videos = response.get('videos')
|
||||
return [video['url'] for video in videos] if videos else []
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_playlist(self._match_id(url))
|
||||
@@ -262,6 +268,31 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class TwitchBookmarksIE(TwitchPlaylistBaseIE):
|
||||
IE_NAME = 'twitch:bookmarks'
|
||||
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
_PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
|
||||
_PLAYLIST_TYPE = 'bookmarks'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.twitch.tv/ognos/profile/bookmarks',
|
||||
'info_dict': {
|
||||
'id': 'ognos',
|
||||
'title': 'Ognos',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}
|
||||
|
||||
def _extract_playlist_page(self, response):
|
||||
entries = []
|
||||
for bookmark in response.get('bookmarks', []):
|
||||
video = bookmark.get('video')
|
||||
if not video:
|
||||
continue
|
||||
entries.append(video['url'])
|
||||
return entries
|
||||
|
||||
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
|
||||
@@ -3,50 +3,51 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class UbuIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://ubu.com/film/her_noise.html',
|
||||
'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
|
||||
'md5': '138d5652618bf0f03878978db9bef1ee',
|
||||
'info_dict': {
|
||||
'id': 'her_noise',
|
||||
'ext': 'mp4',
|
||||
'ext': 'm4v',
|
||||
'title': 'Her Noise - The Making Of (2007)',
|
||||
'duration': 3600,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title')
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
|
||||
if duration:
|
||||
duration *= 60
|
||||
r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
|
||||
invscale=60)
|
||||
|
||||
formats = []
|
||||
|
||||
FORMAT_REGEXES = [
|
||||
['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
|
||||
['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
|
||||
('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
|
||||
('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
|
||||
]
|
||||
|
||||
preference = qualities([fid for fid, _ in FORMAT_REGEXES])
|
||||
for format_id, format_regex in FORMAT_REGEXES:
|
||||
m = re.search(format_regex, webpage)
|
||||
if m:
|
||||
formats.append({
|
||||
'url': m.group(1),
|
||||
'format_id': format_id,
|
||||
'preference': preference(format_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -5,27 +5,58 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urllib_request
|
||||
)
|
||||
|
||||
|
||||
class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
"url": "http://www.viddler.com/v/43903784",
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viddler.com/v/43903784',
|
||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
||||
'info_dict': {
|
||||
'id': '43903784',
|
||||
'ext': 'mp4',
|
||||
"title": "Video Made Easy",
|
||||
'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ',
|
||||
"uploader": "viddler",
|
||||
'title': 'Video Made Easy',
|
||||
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||
'uploader': 'viddler',
|
||||
'timestamp': 1335371429,
|
||||
'upload_date': '20120425',
|
||||
"duration": 100.89,
|
||||
'duration': 100.89,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'upload_date': '20150126',
|
||||
'uploader': 'deadspin',
|
||||
'timestamp': 1422285291,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
||||
'info_dict': {
|
||||
'id': '221ebbbd',
|
||||
'ext': 'mp4',
|
||||
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||
'description': ' ',
|
||||
'upload_date': '20140929',
|
||||
'uploader': 'BCLETeens',
|
||||
'timestamp': 1411997190,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -33,14 +64,17 @@ class ViddlerIE(InfoExtractor):
|
||||
json_url = (
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
||||
video_id)
|
||||
data = self._download_json(json_url, video_id)['video']
|
||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||
request = compat_urllib_request.Request(json_url, None, headers)
|
||||
data = self._download_json(request, video_id)['video']
|
||||
|
||||
formats = []
|
||||
for filed in data['files']:
|
||||
if filed.get('status', 'ready') != 'ready':
|
||||
continue
|
||||
format_id = filed.get('profile_id') or filed['profile_name']
|
||||
f = {
|
||||
'format_id': filed['profile_id'],
|
||||
'format_id': format_id,
|
||||
'format_note': filed['profile_name'],
|
||||
'url': self._proto_relative_url(filed['url']),
|
||||
'width': int_or_none(filed.get('width')),
|
||||
@@ -53,16 +87,15 @@ class ViddlerIE(InfoExtractor):
|
||||
|
||||
if filed.get('cdn_url'):
|
||||
f = f.copy()
|
||||
f['url'] = self._proto_relative_url(filed['cdn_url'])
|
||||
f['format_id'] = filed['profile_id'] + '-cdn'
|
||||
f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
|
||||
f['format_id'] = format_id + '-cdn'
|
||||
f['source_preference'] = 1
|
||||
formats.append(f)
|
||||
|
||||
if filed.get('html5_video_source'):
|
||||
f = f.copy()
|
||||
f['url'] = self._proto_relative_url(
|
||||
filed['html5_video_source'])
|
||||
f['format_id'] = filed['profile_id'] + '-html5'
|
||||
f['url'] = self._proto_relative_url(filed['html5_video_source'])
|
||||
f['format_id'] = format_id + '-html5'
|
||||
f['source_preference'] = 0
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
@@ -71,7 +104,6 @@ class ViddlerIE(InfoExtractor):
|
||||
t.get('text') for t in data.get('tags', []) if 'text' in t]
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
@@ -81,5 +113,6 @@ class ViddlerIE(InfoExtractor):
|
||||
'uploader': data.get('author'),
|
||||
'duration': float_or_none(data.get('length')),
|
||||
'view_count': int_or_none(data.get('view_count')),
|
||||
'comment_count': int_or_none(data.get('comment_count')),
|
||||
'categories': categories,
|
||||
}
|
||||
|
||||
@@ -62,5 +62,7 @@ class VideoMegaIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'http_referer': iframe_url,
|
||||
'http_headers': {
|
||||
'Referer': iframe_url,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@ from ..utils import (
|
||||
class VideoTtIE(InfoExtractor):
|
||||
ID_NAME = 'video.tt'
|
||||
IE_DESC = 'video.tt - Your True Tube'
|
||||
_VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
|
||||
_VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
|
||||
'md5': 'b13aa9e2f267effb5d1094443dff65ba',
|
||||
'info_dict': {
|
||||
@@ -26,7 +26,10 @@ class VideoTtIE(InfoExtractor):
|
||||
'upload_date': '20130827',
|
||||
'uploader': 'joseph313',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.tt/embed/amd5YujV8',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
@@ -169,7 +169,9 @@ class WDRMobileIE(InfoExtractor):
|
||||
'title': mobj.group('title'),
|
||||
'age_limit': int(mobj.group('age_limit')),
|
||||
'url': url,
|
||||
'user_agent': 'mobile',
|
||||
'http_headers': {
|
||||
'User-Agent': 'mobile',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
142
youtube_dl/extractor/xuite.py
Normal file
142
youtube_dl/extractor/xuite.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class XuiteIE(InfoExtractor):
|
||||
_REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
|
||||
_VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
|
||||
_TESTS = [{
|
||||
# Audio
|
||||
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
|
||||
'md5': '63a42c705772aa53fd4c1a0027f86adf',
|
||||
'info_dict': {
|
||||
'id': '3860914',
|
||||
'ext': 'mp3',
|
||||
'title': '孤單南半球-歐德陽',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 247.246,
|
||||
'timestamp': 1314932940,
|
||||
'upload_date': '20110902',
|
||||
'uploader': '阿能',
|
||||
'uploader_id': '15973816',
|
||||
'categories': ['個人短片'],
|
||||
},
|
||||
}, {
|
||||
# Video with only one format
|
||||
'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2',
|
||||
'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e',
|
||||
'info_dict': {
|
||||
'id': '3441629',
|
||||
'ext': 'mp4',
|
||||
'title': '孫燕姿 - 眼淚成詩',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 217.399,
|
||||
'timestamp': 1299383640,
|
||||
'upload_date': '20110306',
|
||||
'uploader': 'Valen',
|
||||
'uploader_id': '10400126',
|
||||
'categories': ['影視娛樂'],
|
||||
},
|
||||
}, {
|
||||
# Video with two formats
|
||||
'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==',
|
||||
'md5': '1166e0f461efe55b62e26a2d2a68e6de',
|
||||
'info_dict': {
|
||||
'id': '21301170',
|
||||
'ext': 'mp4',
|
||||
'title': '暗殺教室 02',
|
||||
'description': '字幕:【極影字幕社】',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1384.907,
|
||||
'timestamp': 1421481240,
|
||||
'upload_date': '20150117',
|
||||
'uploader': '我只是想認真點',
|
||||
'uploader_id': '242127761',
|
||||
'categories': ['電玩動漫'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_flv_config(self, media_id):
|
||||
base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
|
||||
flv_config = self._download_xml(
|
||||
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
|
||||
'flv config')
|
||||
prop_dict = {}
|
||||
for prop in flv_config.findall('./property'):
|
||||
prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
|
||||
# CDATA may be empty in flv config
|
||||
if not prop.text:
|
||||
continue
|
||||
encoded_content = base64.b64decode(prop.text).decode('utf-8')
|
||||
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
|
||||
return prop_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
error_msg = self._search_regex(
|
||||
r'<div id="error-message-content">([^<]+)',
|
||||
webpage, 'error message', default=None)
|
||||
if error_msg:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_msg),
|
||||
expected=True)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-mediaid="(\d+)"', webpage, 'media id')
|
||||
flv_config = self._extract_flv_config(video_id)
|
||||
|
||||
FORMATS = {
|
||||
'audio': 'mp3',
|
||||
'video': 'mp4',
|
||||
}
|
||||
|
||||
formats = []
|
||||
for format_tag in ('src', 'hq_src'):
|
||||
video_url = flv_config.get(format_tag)
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
r'\bq=(.+?)\b', video_url, 'format id', default=format_tag)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': FORMATS.get(flv_config['type'], 'mp4'),
|
||||
'format_id': format_id,
|
||||
'height': int(format_id) if format_id.isnumeric() else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = flv_config.get('publish_datetime')
|
||||
if timestamp:
|
||||
timestamp = parse_iso8601(timestamp + ' +0800', ' ')
|
||||
|
||||
category = flv_config.get('category')
|
||||
categories = [category] if category else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': flv_config['title'],
|
||||
'description': flv_config.get('description'),
|
||||
'thumbnail': flv_config.get('thumb'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': flv_config.get('author_name'),
|
||||
'uploader_id': flv_config.get('author_id'),
|
||||
'duration': parse_duration(flv_config.get('duration')),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
unescapeHTML,
|
||||
@@ -264,9 +265,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
|
||||
|
||||
# Dash mp4 audio
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
@@ -809,6 +810,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_url = None
|
||||
|
||||
# Get video info
|
||||
embed_webpage = None
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
@@ -1016,10 +1018,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
||||
|
||||
jsplayer_url_json = self._search_regex(
|
||||
r'"assets":.+?"js":\s*("[^"]+")',
|
||||
embed_webpage if age_gate else video_webpage, 'JS player URL')
|
||||
ASSETS_RE,
|
||||
embed_webpage if age_gate else video_webpage,
|
||||
'JS player URL (1)', default=None)
|
||||
if not jsplayer_url_json and not age_gate:
|
||||
# We need the embed website after all
|
||||
if embed_webpage is None:
|
||||
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||
embed_webpage = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed webpage')
|
||||
jsplayer_url_json = self._search_regex(
|
||||
ASSETS_RE, embed_webpage, 'JS player URL')
|
||||
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
@@ -1682,11 +1695,18 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
IE_NAME = 'youtube:truncated_url'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?[^/]+/watch\?(?:
|
||||
(?:https?://)?
|
||||
(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
|
||||
(?:watch\?(?:
|
||||
feature=[a-z_]+|
|
||||
annotation_id=annotation_[^&]+
|
||||
)?$|
|
||||
(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
|
||||
annotation_id=annotation_[^&]+|
|
||||
x-yt-cl=[0-9]+|
|
||||
hl=[^&]*|
|
||||
)?
|
||||
|
|
||||
attribution_link\?a=[^&]+
|
||||
)
|
||||
$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
@@ -1695,6 +1715,15 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.youtube.com/watch?',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?feature=foo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?hl=en-GB',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -1710,7 +1739,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
class YoutubeTruncatedIDIE(InfoExtractor):
|
||||
IE_NAME = 'youtube:truncated_id'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
|
||||
|
||||
@@ -5,6 +5,7 @@ import optparse
|
||||
import shlex
|
||||
import sys
|
||||
|
||||
from .downloader.external import list_external_downloaders
|
||||
from .compat import (
|
||||
compat_expanduser,
|
||||
compat_getenv,
|
||||
@@ -199,6 +200,10 @@ def parseOpts(overrideArguments=None):
|
||||
'--playlist-end',
|
||||
dest='playlistend', metavar='NUMBER', default=None, type=int,
|
||||
help='playlist video to end at (default is last)')
|
||||
selection.add_option(
|
||||
'--playlist-items',
|
||||
dest='playlist_items', metavar='ITEM_SPEC', default=None,
|
||||
help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
|
||||
selection.add_option(
|
||||
'--match-title',
|
||||
dest='matchtitle', metavar='REGEX',
|
||||
@@ -292,7 +297,7 @@ def parseOpts(overrideArguments=None):
|
||||
' You can filter the video results by putting a condition in'
|
||||
' brackets, as in -f "best[height=720]"'
|
||||
' (or -f "[filesize>10M]"). '
|
||||
' This works for filesize, height, width, tbr, abr, and vbr'
|
||||
' This works for filesize, height, width, tbr, abr, vbr, and fps'
|
||||
' and the comparisons <, <=, >, >=, =, != .'
|
||||
' Formats for which the value is not known are excluded unless you'
|
||||
' put a question mark (?) after the operator.'
|
||||
@@ -372,7 +377,7 @@ def parseOpts(overrideArguments=None):
|
||||
downloader.add_option(
|
||||
'-R', '--retries',
|
||||
dest='retries', metavar='RETRIES', default=10,
|
||||
help='number of retries (default is %default)')
|
||||
help='number of retries (default is %default), or "infinite".')
|
||||
downloader.add_option(
|
||||
'--buffer-size',
|
||||
dest='buffersize', metavar='SIZE', default='1024',
|
||||
@@ -389,6 +394,15 @@ def parseOpts(overrideArguments=None):
|
||||
'--playlist-reverse',
|
||||
action='store_true',
|
||||
help='Download playlist videos in reverse order')
|
||||
downloader.add_option(
|
||||
'--xattr-set-filesize',
|
||||
dest='xattr_set_filesize', action='store_true',
|
||||
help='(experimental) set file xattribute ytdl.filesize with expected filesize')
|
||||
downloader.add_option(
|
||||
'--external-downloader',
|
||||
dest='external_downloader', metavar='COMMAND',
|
||||
help='(experimental) Use the specified external downloader. '
|
||||
'Currently supports %s' % ','.join(list_external_downloaders()))
|
||||
|
||||
workarounds = optparse.OptionGroup(parser, 'Workarounds')
|
||||
workarounds.add_option(
|
||||
@@ -421,6 +435,10 @@ def parseOpts(overrideArguments=None):
|
||||
'--bidi-workaround',
|
||||
dest='bidi_workaround', action='store_true',
|
||||
help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||
workarounds.add_option(
|
||||
'--sleep-interval', metavar='SECONDS',
|
||||
dest='sleep_interval', type=float,
|
||||
help='Number of seconds to sleep before each download.')
|
||||
|
||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||
verbosity.add_option(
|
||||
@@ -604,10 +622,6 @@ def parseOpts(overrideArguments=None):
|
||||
'--write-annotations',
|
||||
action='store_true', dest='writeannotations', default=False,
|
||||
help='write video annotations to a .annotation file')
|
||||
filesystem.add_option(
|
||||
'--write-thumbnail',
|
||||
action='store_true', dest='writethumbnail', default=False,
|
||||
help='write thumbnail image to disk')
|
||||
filesystem.add_option(
|
||||
'--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
@@ -627,6 +641,20 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='rm_cachedir',
|
||||
help='Delete all filesystem cache files')
|
||||
|
||||
thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
|
||||
thumbnail.add_option(
|
||||
'--write-thumbnail',
|
||||
action='store_true', dest='writethumbnail', default=False,
|
||||
help='write thumbnail image to disk')
|
||||
thumbnail.add_option(
|
||||
'--write-all-thumbnails',
|
||||
action='store_true', dest='write_all_thumbnails', default=False,
|
||||
help='write all thumbnail image formats to disk')
|
||||
thumbnail.add_option(
|
||||
'--list-thumbnails',
|
||||
action='store_true', dest='list_thumbnails', default=False,
|
||||
help='Simulate and list all available thumbnail formats')
|
||||
|
||||
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
||||
postproc.add_option(
|
||||
'-x', '--extract-audio',
|
||||
@@ -692,6 +720,7 @@ def parseOpts(overrideArguments=None):
|
||||
parser.add_option_group(selection)
|
||||
parser.add_option_group(downloader)
|
||||
parser.add_option_group(filesystem)
|
||||
parser.add_option_group(thumbnail)
|
||||
parser.add_option_group(verbosity)
|
||||
parser.add_option_group(workarounds)
|
||||
parser.add_option_group(video_format)
|
||||
|
||||
@@ -7,6 +7,7 @@ from .ffmpeg import (
|
||||
FFmpegEmbedSubtitlePP,
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegFixupStretchedPP,
|
||||
FFmpegFixupM4aPP,
|
||||
FFmpegMergerPP,
|
||||
FFmpegMetadataPP,
|
||||
FFmpegVideoConvertorPP,
|
||||
@@ -25,6 +26,7 @@ __all__ = [
|
||||
'FFmpegAudioFixPP',
|
||||
'FFmpegEmbedSubtitlePP',
|
||||
'FFmpegExtractAudioPP',
|
||||
'FFmpegFixupM4aPP',
|
||||
'FFmpegFixupStretchedPP',
|
||||
'FFmpegMergerPP',
|
||||
'FFmpegMetadataPP',
|
||||
|
||||
@@ -509,6 +509,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
metadata['artist'] = info['uploader']
|
||||
elif info.get('uploader_id') is not None:
|
||||
metadata['artist'] = info['uploader_id']
|
||||
if info.get('description') is not None:
|
||||
metadata['description'] = info['description']
|
||||
if info.get('webpage_url') is not None:
|
||||
metadata['comment'] = info['webpage_url']
|
||||
|
||||
if not metadata:
|
||||
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
|
||||
@@ -560,7 +564,7 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
stretched_ratio = info.get('stretched_ratio')
|
||||
if stretched_ratio is None or stretched_ratio == 1:
|
||||
return
|
||||
return True, info
|
||||
|
||||
filename = info['filepath']
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
@@ -573,3 +577,21 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
return True, info
|
||||
|
||||
|
||||
class FFmpegFixupM4aPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
if info.get('container') != 'm4a_dash':
|
||||
return True, info
|
||||
|
||||
filename = info['filepath']
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
|
||||
options = ['-c', 'copy', '-f', 'mp4']
|
||||
self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
return True, info
|
||||
|
||||
@@ -411,25 +411,9 @@ def make_HTTPS_handler(params, **kwargs):
|
||||
pass
|
||||
|
||||
if sys.version_info < (3, 2):
|
||||
import httplib
|
||||
|
||||
class HTTPSConnectionV3(httplib.HTTPSConnection):
|
||||
def __init__(self, *args, **kwargs):
|
||||
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
|
||||
|
||||
def connect(self):
|
||||
sock = socket.create_connection((self.host, self.port), self.timeout)
|
||||
if getattr(self, '_tunnel_host', False):
|
||||
self.sock = sock
|
||||
self._tunnel()
|
||||
try:
|
||||
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
|
||||
except ssl.SSLError:
|
||||
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
|
||||
|
||||
return YoutubeDLHTTPSHandler(params, https_conn_class=HTTPSConnectionV3, **kwargs)
|
||||
return YoutubeDLHTTPSHandler(params, **kwargs)
|
||||
else: # Python < 3.4
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
|
||||
context.verify_mode = (ssl.CERT_NONE
|
||||
if opts_no_check_certificate
|
||||
else ssl.CERT_REQUIRED)
|
||||
@@ -560,7 +544,9 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||
sock = compat_socket_create_connection(
|
||||
(self.host, self.port), self.timeout, sa)
|
||||
if is_https:
|
||||
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
|
||||
self.sock = ssl.wrap_socket(
|
||||
sock, self.key_file, self.cert_file,
|
||||
ssl_version=ssl.PROTOCOL_TLSv1)
|
||||
else:
|
||||
self.sock = sock
|
||||
hc.connect = functools.partial(_hc_connect, hc)
|
||||
@@ -612,17 +598,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
|
||||
def http_request(self, req):
|
||||
for h, v in std_headers.items():
|
||||
if h not in req.headers:
|
||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||
# The dict keys are capitalized because of this bug by urllib
|
||||
if h.capitalize() not in req.headers:
|
||||
req.add_header(h, v)
|
||||
if 'Youtubedl-no-compression' in req.headers:
|
||||
if 'Accept-encoding' in req.headers:
|
||||
del req.headers['Accept-encoding']
|
||||
del req.headers['Youtubedl-no-compression']
|
||||
if 'Youtubedl-user-agent' in req.headers:
|
||||
if 'User-agent' in req.headers:
|
||||
del req.headers['User-agent']
|
||||
req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
|
||||
del req.headers['Youtubedl-user-agent']
|
||||
|
||||
if sys.version_info < (2, 7) and '#' in req.get_full_url():
|
||||
# Python 2.6 is brain-dead when it comes to fragments
|
||||
@@ -671,9 +654,14 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
||||
self._params = params
|
||||
|
||||
def https_open(self, req):
|
||||
kwargs = {}
|
||||
if hasattr(self, '_context'): # python > 2.6
|
||||
kwargs['context'] = self._context
|
||||
if hasattr(self, '_check_hostname'): # python 3.x
|
||||
kwargs['check_hostname'] = self._check_hostname
|
||||
return self.do_open(functools.partial(
|
||||
_create_http_connection, self, self._https_conn_class, True),
|
||||
req)
|
||||
req, **kwargs)
|
||||
|
||||
|
||||
def parse_iso8601(date_str, delimiter='T'):
|
||||
@@ -875,6 +863,9 @@ def _windows_write_string(s, out):
|
||||
except AttributeError:
|
||||
# If the output stream doesn't have a fileno, it's virtual
|
||||
return False
|
||||
except io.UnsupportedOperation:
|
||||
# Some strange Windows pseudo files?
|
||||
return False
|
||||
if fileno not in WIN_OUTPUT_IDS:
|
||||
return False
|
||||
|
||||
@@ -1631,3 +1622,53 @@ def age_restricted(content_limit, age_limit):
|
||||
if content_limit is None:
|
||||
return False # Content available for everyone
|
||||
return age_limit < content_limit
|
||||
|
||||
|
||||
def is_html(first_bytes):
|
||||
""" Detect whether a file contains HTML by examining its first bytes. """
|
||||
|
||||
BOMS = [
|
||||
(b'\xef\xbb\xbf', 'utf-8'),
|
||||
(b'\x00\x00\xfe\xff', 'utf-32-be'),
|
||||
(b'\xff\xfe\x00\x00', 'utf-32-le'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
for bom, enc in BOMS:
|
||||
if first_bytes.startswith(bom):
|
||||
s = first_bytes[len(bom):].decode(enc, 'replace')
|
||||
break
|
||||
else:
|
||||
s = first_bytes.decode('utf-8', 'replace')
|
||||
|
||||
return re.match(r'^\s*<', s)
|
||||
|
||||
|
||||
def determine_protocol(info_dict):
|
||||
protocol = info_dict.get('protocol')
|
||||
if protocol is not None:
|
||||
return protocol
|
||||
|
||||
url = info_dict['url']
|
||||
if url.startswith('rtmp'):
|
||||
return 'rtmp'
|
||||
elif url.startswith('mms'):
|
||||
return 'mms'
|
||||
elif url.startswith('rtsp'):
|
||||
return 'rtsp'
|
||||
|
||||
ext = determine_ext(url)
|
||||
if ext == 'm3u8':
|
||||
return 'm3u8'
|
||||
elif ext == 'f4m':
|
||||
return 'f4m'
|
||||
|
||||
return compat_urllib_parse_urlparse(url).scheme
|
||||
|
||||
|
||||
def render_table(header_row, data):
|
||||
""" Render a list of rows, each as a list of values """
|
||||
table = [header_row] + data
|
||||
max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
|
||||
format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
|
||||
return '\n'.join(format_str % tuple(row) for row in table)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.01.23.1'
|
||||
__version__ = '2015.01.30.2'
|
||||
|
||||
Reference in New Issue
Block a user