Compare commits
90 Commits
2014.07.22
...
2014.08.02
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c97797a737 | ||
![]() |
8d7d9d3452 | ||
![]() |
7a5e7b303c | ||
![]() |
61aabb9d70 | ||
![]() |
62af3a0eb5 | ||
![]() |
60064c53f1 | ||
![]() |
98eb1c3fa2 | ||
![]() |
201e9eaa0e | ||
![]() |
9afa6ede21 | ||
![]() |
f4776371ae | ||
![]() |
328a20bf9c | ||
![]() |
5622f29ae4 | ||
![]() |
b4f23afbd1 | ||
![]() |
0138968a6a | ||
![]() |
4f31d0f2b7 | ||
![]() |
bff74bdd1a | ||
![]() |
10b04ff7f4 | ||
![]() |
1f7ccb9014 | ||
![]() |
c7b3209668 | ||
![]() |
895ba7d1dd | ||
![]() |
a2a1b0baa2 | ||
![]() |
8646eb790e | ||
![]() |
f036a6328e | ||
![]() |
31bb8d3f51 | ||
![]() |
4958ae2058 | ||
![]() |
7e8d73c183 | ||
![]() |
65bc504db8 | ||
![]() |
0fc74a0d91 | ||
![]() |
8d2cc6fbb1 | ||
![]() |
a954584f63 | ||
![]() |
cb3ff6fb01 | ||
![]() |
71aa656d13 | ||
![]() |
366b1f3cfe | ||
![]() |
64ce58db38 | ||
![]() |
11b85ce62e | ||
![]() |
1220352ff7 | ||
![]() |
8f3034d871 | ||
![]() |
7fa547ab02 | ||
![]() |
3182f3e2dc | ||
![]() |
cbf915f3f6 | ||
![]() |
b490b8849a | ||
![]() |
5d2519e5bf | ||
![]() |
c3415d1bac | ||
![]() |
36f3542883 | ||
![]() |
4cb71e9b6a | ||
![]() |
4bc7009e8a | ||
![]() |
16f8e9df8a | ||
![]() |
b081cebefa | ||
![]() |
916c145217 | ||
![]() |
4192b51c7c | ||
![]() |
052421ff09 | ||
![]() |
4e99f48817 | ||
![]() |
a11165ecc6 | ||
![]() |
fbb2fc5580 | ||
![]() |
2fe3d240cc | ||
![]() |
42f4dcfe41 | ||
![]() |
892e3192fb | ||
![]() |
7272eab9d0 | ||
![]() |
ebe832dc37 | ||
![]() |
825abb8175 | ||
![]() |
8944ec0109 | ||
![]() |
c084c93402 | ||
![]() |
d799b47b82 | ||
![]() |
b7f8116406 | ||
![]() |
6db274e057 | ||
![]() |
0c92b57398 | ||
![]() |
becafcbf0f | ||
![]() |
92a86f4c1a | ||
![]() |
dfe029a62c | ||
![]() |
b0472057a3 | ||
![]() |
c081b35c27 | ||
![]() |
9f43890bcd | ||
![]() |
94a20aa5f8 | ||
![]() |
94e8df3a7e | ||
![]() |
37e64addc8 | ||
![]() |
d82ba23ba5 | ||
![]() |
0fd7fd71b4 | ||
![]() |
eae12e3fe3 | ||
![]() |
798a2cad4f | ||
![]() |
41c0849429 | ||
![]() |
a4e5af1184 | ||
![]() |
b090af5922 | ||
![]() |
388841f819 | ||
![]() |
1a2ecbfbc4 | ||
![]() |
38e292b112 | ||
![]() |
c4f731262d | ||
![]() |
07cc63f386 | ||
![]() |
e42a692f00 | ||
![]() |
6ec7538bb4 | ||
![]() |
2871d489a9 |
71
README.md
71
README.md
@@ -38,12 +38,6 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
playlist or the command line) if an error
|
||||
occurs
|
||||
--dump-user-agent display the current browser identification
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--list-extractors List all supported extractors and the URLs
|
||||
they would handle
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
@@ -51,35 +45,22 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--socket-timeout None Time to wait before giving up, in seconds
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
example "gvsearch2:" downloads two videos
|
||||
from google videos for youtube-dl "large
|
||||
apple". Use the value "auto" to let
|
||||
youtube-dl guess. The default value "error"
|
||||
just throws an error.
|
||||
youtube-dl guess ("auto_warning" to emit a
|
||||
warning when guessing). "error" just throws
|
||||
an error. The default value "fixup_error"
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: do not read the user
|
||||
configuration in ~/.config/youtube-dl.conf
|
||||
(%APPDATA%/youtube-dl/config.txt on
|
||||
Windows)
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
@@ -125,9 +106,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
of SIZE.
|
||||
|
||||
## Filesystem Options:
|
||||
-t, --title use title in file name (default)
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
stdin)
|
||||
--id use only video ID in file name
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-A, --auto-number number downloaded files starting from 00000
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to
|
||||
get the title, %(uploader)s for the
|
||||
@@ -160,18 +141,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
stdin)
|
||||
--load-info FILE json file containing the video information
|
||||
(created with the "--write-json" option)
|
||||
-t, --title [deprecated] use title in file name
|
||||
(default)
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-w, --no-overwrites do not overwrite files
|
||||
-c, --continue force resume of partially downloaded files.
|
||||
By default, youtube-dl will resume
|
||||
downloads if possible.
|
||||
--no-continue do not resume partially downloaded files
|
||||
(restart from beginning)
|
||||
--cookies FILE file to read cookies from and dump cookie
|
||||
jar in
|
||||
--no-part do not use .part files
|
||||
--no-mtime do not use the Last-modified header to set
|
||||
the file modification time
|
||||
@@ -181,6 +159,19 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--write-annotations write video annotations to a .annotation
|
||||
file
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
--load-info FILE json file containing the video information
|
||||
(created with the "--write-json" option)
|
||||
--cookies FILE file to read cookies from and dump cookie
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
@@ -210,6 +201,22 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
problems
|
||||
--print-traffic Display sent and read HTTP traffic
|
||||
|
||||
## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: "-f 22/17/18".
|
||||
|
@@ -137,8 +137,8 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
|
||||
|
||||
def assertRegexpMatches(self, text, regexp, msg=None):
|
||||
if hasattr(self, 'assertRegexpMatches'):
|
||||
return self.assertRegexpMatches(text, regexp, msg)
|
||||
if hasattr(self, 'assertRegexp'):
|
||||
return self.assertRegexp(text, regexp, msg)
|
||||
else:
|
||||
m = re.match(regexp, text)
|
||||
if not m:
|
||||
|
@@ -15,7 +15,6 @@ from youtube_dl.extractor import (
|
||||
FacebookIE,
|
||||
gen_extractors,
|
||||
JustinTVIE,
|
||||
PBSIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
|
@@ -10,7 +10,6 @@ from test.helper import (
|
||||
get_params,
|
||||
gettestcases,
|
||||
expect_info_dict,
|
||||
md5,
|
||||
try_rm,
|
||||
report_warning,
|
||||
)
|
||||
@@ -24,7 +23,6 @@ import socket
|
||||
import youtube_dl.YoutubeDL
|
||||
from youtube_dl.utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_HTTPError,
|
||||
DownloadError,
|
||||
|
@@ -154,7 +154,7 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['id'], '4110309')
|
||||
self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
|
||||
assertRegexpMatches(
|
||||
self, result['description'], r'TILT Brass - Bowery Poetry Club')
|
||||
self, result['description'], r'.*?TILT Brass - Bowery Poetry Club')
|
||||
self.assertEqual(len(result['entries']), 6)
|
||||
|
||||
def test_livestream_event(self):
|
||||
@@ -193,10 +193,10 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_bandcamp_album(self):
|
||||
dl = FakeYDL()
|
||||
ie = BandcampAlbumIE(dl)
|
||||
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
|
||||
result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'Nightmare Night EP')
|
||||
assertGreaterEqual(self, len(result['entries']), 4)
|
||||
self.assertEqual(result['title'], 'Hierophany of the Open Grave')
|
||||
assertGreaterEqual(self, len(result['entries']), 9)
|
||||
|
||||
def test_smotri_community(self):
|
||||
dl = FakeYDL()
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
@@ -16,47 +18,65 @@ from youtube_dl.utils import compat_str, compat_urlretrieve
|
||||
|
||||
_TESTS = [
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
'js',
|
||||
86,
|
||||
u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
'js',
|
||||
85,
|
||||
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
'js',
|
||||
90,
|
||||
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
|
||||
'js',
|
||||
84,
|
||||
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
u'js',
|
||||
u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
'js',
|
||||
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
),
|
||||
(
|
||||
u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
|
||||
u'swf',
|
||||
'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
|
||||
'swf',
|
||||
86,
|
||||
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
|
||||
),
|
||||
(
|
||||
u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
|
||||
u'swf',
|
||||
u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
|
||||
u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
|
||||
'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
|
||||
'swf',
|
||||
'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
|
||||
'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
'js',
|
||||
84,
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
'js',
|
||||
83,
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
|
||||
'js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@@ -69,7 +89,7 @@ class TestSignature(unittest.TestCase):
|
||||
|
||||
|
||||
def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
|
||||
assert m, '%r should follow URL format' % url
|
||||
test_id = m.group(1)
|
||||
|
||||
|
@@ -1,12 +0,0 @@
|
||||
# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
|
||||
from .downloader import FileDownloader as RealFileDownloader
|
||||
from .downloader import get_suitable_downloader
|
||||
|
||||
|
||||
# This class reproduces the old behaviour of FileDownloader
|
||||
class FileDownloader(RealFileDownloader):
|
||||
def _do_download(self, filename, info_dict):
|
||||
real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
real_fd.add_progress_hook(ph)
|
||||
return real_fd.download(filename, info_dict)
|
@@ -275,7 +275,7 @@ class YoutubeDL(object):
|
||||
return message
|
||||
|
||||
assert hasattr(self, '_output_process')
|
||||
assert type(message) == type('')
|
||||
assert isinstance(message, compat_str)
|
||||
line_count = message.count('\n') + 1
|
||||
self._output_process.stdin.write((message + '\n').encode('utf-8'))
|
||||
self._output_process.stdin.flush()
|
||||
@@ -303,7 +303,7 @@ class YoutubeDL(object):
|
||||
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
assert type(message) == type('')
|
||||
assert isinstance(message, compat_str)
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].error(message)
|
||||
else:
|
||||
@@ -849,7 +849,7 @@ class YoutubeDL(object):
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
|
||||
if not 'format' in info_dict:
|
||||
if 'format' not in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
reason = self._match_entry(info_dict)
|
||||
@@ -999,7 +999,7 @@ class YoutubeDL(object):
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
downloaded = []
|
||||
success = True
|
||||
merger = FFmpegMergerPP(self)
|
||||
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
||||
if not merger._get_executable():
|
||||
postprocessors = []
|
||||
self.report_warning('You have requested multiple '
|
||||
@@ -1234,14 +1234,18 @@ class YoutubeDL(object):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
|
||||
write_string(
|
||||
if type('') is not compat_str:
|
||||
# Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
|
||||
self.report_warning(
|
||||
'Your Python is broken! Update to a newer and supported version')
|
||||
|
||||
encoding_str = (
|
||||
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
|
||||
locale.getpreferredencoding(),
|
||||
sys.getfilesystemencoding(),
|
||||
sys.stdout.encoding,
|
||||
self.get_encoding()),
|
||||
encoding=None
|
||||
)
|
||||
self.get_encoding()))
|
||||
write_string(encoding_str, encoding=None)
|
||||
|
||||
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
||||
try:
|
||||
|
@@ -76,6 +76,7 @@ import optparse
|
||||
import os
|
||||
import random
|
||||
import shlex
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
|
||||
@@ -97,7 +98,7 @@ from .utils import (
|
||||
write_string,
|
||||
)
|
||||
from .update import update_self
|
||||
from .FileDownloader import (
|
||||
from .downloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors
|
||||
@@ -222,6 +223,7 @@ def parseOpts(overrideArguments=None):
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
||||
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
|
||||
workarounds = optparse.OptionGroup(parser, 'Workarounds')
|
||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||
|
||||
general.add_option('-h', '--help',
|
||||
@@ -238,14 +240,6 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('--dump-user-agent',
|
||||
action='store_true', dest='dump_user_agent',
|
||||
help='display the current browser identification', default=False)
|
||||
general.add_option('--user-agent',
|
||||
dest='user_agent', help='specify a custom user agent', metavar='UA')
|
||||
general.add_option('--referer',
|
||||
dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
|
||||
metavar='REF', default=None)
|
||||
general.add_option('--add-header',
|
||||
dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
|
||||
metavar='FIELD:VALUE')
|
||||
general.add_option('--list-extractors',
|
||||
action='store_true', dest='list_extractors',
|
||||
help='List all supported extractors and the URLs they would handle', default=False)
|
||||
@@ -255,33 +249,17 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option(
|
||||
'--proxy', dest='proxy', default=None, metavar='URL',
|
||||
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option(
|
||||
'--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
general.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
general.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
general.add_option(
|
||||
'--socket-timeout', dest='socket_timeout',
|
||||
type=float, default=None, help=u'Time to wait before giving up, in seconds')
|
||||
general.add_option(
|
||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||
general.add_option(
|
||||
'--default-search',
|
||||
dest='default_search', metavar='PREFIX',
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
|
||||
general.add_option(
|
||||
'--ignore-config',
|
||||
action='store_true',
|
||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||
general.add_option(
|
||||
'--encoding', dest='encoding', metavar='ENCODING',
|
||||
help='Force the specified encoding (experimental)')
|
||||
|
||||
selection.add_option(
|
||||
'--playlist-start',
|
||||
@@ -382,6 +360,33 @@ def parseOpts(overrideArguments=None):
|
||||
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
|
||||
downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
|
||||
|
||||
workarounds.add_option(
|
||||
'--encoding', dest='encoding', metavar='ENCODING',
|
||||
help='Force the specified encoding (experimental)')
|
||||
workarounds.add_option(
|
||||
'--no-check-certificate', action='store_true',
|
||||
dest='no_check_certificate', default=False,
|
||||
help='Suppress HTTPS certificate validation.')
|
||||
workarounds.add_option(
|
||||
'--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
workarounds.add_option(
|
||||
'--user-agent', metavar='UA',
|
||||
dest='user_agent', help='specify a custom user agent')
|
||||
workarounds.add_option(
|
||||
'--referer', metavar='REF',
|
||||
dest='referer', default=None,
|
||||
help='specify a custom referer, use if the video access is restricted to one domain',
|
||||
)
|
||||
workarounds.add_option(
|
||||
'--add-header', metavar='FIELD:VALUE',
|
||||
dest='headers', action='append',
|
||||
help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
|
||||
)
|
||||
workarounds.add_option(
|
||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||
|
||||
verbosity.add_option('-q', '--quiet',
|
||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||
verbosity.add_option(
|
||||
@@ -439,12 +444,10 @@ def parseOpts(overrideArguments=None):
|
||||
help='Display sent and read HTTP traffic')
|
||||
|
||||
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
|
||||
filesystem.add_option('-a', '--batch-file',
|
||||
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
||||
filesystem.add_option('--id',
|
||||
action='store_true', dest='useid', help='use only video ID in file name', default=False)
|
||||
filesystem.add_option('-l', '--literal',
|
||||
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
|
||||
filesystem.add_option('-A', '--auto-number',
|
||||
action='store_true', dest='autonumber',
|
||||
help='number downloaded files starting from 00000', default=False)
|
||||
@@ -470,11 +473,10 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--restrict-filenames',
|
||||
action='store_true', dest='restrictfilenames',
|
||||
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
||||
filesystem.add_option('-a', '--batch-file',
|
||||
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
||||
filesystem.add_option('--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
help='json file containing the video information (created with the "--write-json" option)')
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
|
||||
filesystem.add_option('-l', '--literal',
|
||||
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
|
||||
filesystem.add_option('-w', '--no-overwrites',
|
||||
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
||||
filesystem.add_option('-c', '--continue',
|
||||
@@ -482,8 +484,6 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--no-continue',
|
||||
action='store_false', dest='continue_dl',
|
||||
help='do not resume partially downloaded files (restart from beginning)')
|
||||
filesystem.add_option('--cookies',
|
||||
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
|
||||
filesystem.add_option('--no-part',
|
||||
action='store_true', dest='nopart', help='do not use .part files', default=False)
|
||||
filesystem.add_option('--no-mtime',
|
||||
@@ -501,6 +501,20 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--write-thumbnail',
|
||||
action='store_true', dest='writethumbnail',
|
||||
help='write thumbnail image to disk', default=False)
|
||||
filesystem.add_option('--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
help='json file containing the video information (created with the "--write-json" option)')
|
||||
filesystem.add_option('--cookies',
|
||||
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
|
||||
filesystem.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
filesystem.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
filesystem.add_option(
|
||||
'--rm-cache-dir', action='store_true', dest='rm_cachedir',
|
||||
help='Delete all filesystem cache files')
|
||||
|
||||
|
||||
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
|
||||
@@ -534,6 +548,7 @@ def parseOpts(overrideArguments=None):
|
||||
parser.add_option_group(downloader)
|
||||
parser.add_option_group(filesystem)
|
||||
parser.add_option_group(verbosity)
|
||||
parser.add_option_group(workarounds)
|
||||
parser.add_option_group(video_format)
|
||||
parser.add_option_group(subtitles)
|
||||
parser.add_option_group(authentication)
|
||||
@@ -694,7 +709,7 @@ def _real_main(argv=None):
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
date = DateRange(opts.dateafter, opts.datebefore)
|
||||
if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
|
||||
if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
|
||||
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
|
||||
|
||||
# Do not download videos when there are audio-only formats
|
||||
@@ -833,9 +848,26 @@ def _real_main(argv=None):
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose)
|
||||
|
||||
# Remove cache dir
|
||||
if opts.rm_cachedir:
|
||||
if opts.cachedir is None:
|
||||
ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||
else:
|
||||
if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
|
||||
ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
|
||||
retcode = 141
|
||||
else:
|
||||
ydl.to_screen(
|
||||
u'Removing cache dir %s .' % opts.cachedir,
|
||||
skip_eol=True)
|
||||
if os.path.exists(opts.cachedir):
|
||||
ydl.to_screen(u'.', skip_eol=True)
|
||||
shutil.rmtree(opts.cachedir)
|
||||
ydl.to_screen(u'.')
|
||||
|
||||
# Maybe do nothing
|
||||
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
||||
if not opts.update_self:
|
||||
if not (opts.update_self or opts.rm_cachedir):
|
||||
parser.error(u'you must provide at least one URL')
|
||||
else:
|
||||
sys.exit()
|
||||
|
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
self.report_destination(filename)
|
||||
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
if requested_bitrate is None:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
else:
|
||||
rate, media = list(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||
|
||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
|
||||
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from .abc import ABCIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .adultswim import AdultSwimIE
|
||||
@@ -111,9 +112,11 @@ from .funnyordie import FunnyOrDieIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import GameOneIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .godtube import GodTubeIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
@@ -140,6 +143,7 @@ from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .izlesene import IzleseneIE
|
||||
from .jadorecettepub import JadoreCettePubIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
@@ -151,6 +155,7 @@ from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .la7 import LA7IE
|
||||
from .lifenews import LifeNewsIE
|
||||
@@ -258,6 +263,7 @@ from .savefrom import SaveFromIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .shared import SharedIE
|
||||
from .sina import SinaIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
@@ -320,6 +326,8 @@ from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .tvplay import TVPlayIE
|
||||
from.ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
@@ -341,6 +349,7 @@ from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .videoweed import VideoWeedIE
|
||||
from .vidme import VidmeIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoChannelIE,
|
||||
|
48
youtube_dl/extractor/abc.py
Normal file
48
youtube_dl/extractor/abc.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
|
||||
'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
|
||||
'info_dict': {
|
||||
'id': '5624716',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
|
||||
'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
urls_info_json = self._search_regex(
|
||||
r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
|
||||
flags=re.DOTALL)
|
||||
urls_info = json.loads(urls_info_json.replace('\'', '"'))
|
||||
formats = [{
|
||||
'url': url_info['url'],
|
||||
'width': int(url_info['width']),
|
||||
'height': int(url_info['height']),
|
||||
'tbr': int(url_info['bitrate']),
|
||||
'filesize': int(url_info['filesize']),
|
||||
} for url_info in urls_info]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@@ -8,6 +8,8 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
qualities,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
@@ -44,6 +46,9 @@ class ARDIE(InfoExtractor):
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
|
@@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor):
|
||||
'height': int(m['h']),
|
||||
})
|
||||
elif m['type'] == 'original':
|
||||
duration = m['d']
|
||||
duration = float(m['d'])
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
||||
|
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
'md5': '7bf08858ff7c203c870e8a6190e221e5',
|
||||
# The md5 checksum changes
|
||||
'info_dict': {
|
||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||
'ext': 'flv',
|
||||
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
|
||||
return {
|
||||
'id': name.split('-')[-1],
|
||||
'title': title,
|
||||
'url': f4m_url,
|
||||
'ext': 'flv',
|
||||
'formats': self._extract_f4m_formats(f4m_url, name),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,8 +23,9 @@ class BRIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Am 1. und 2. August in Oberammergau',
|
||||
'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
|
||||
'title': 'Wenn das Traditions-Theater wackelt',
|
||||
'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
|
||||
'duration': 34,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -34,6 +36,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Über den Pass',
|
||||
'description': 'Die Eroberung der Alpen: Über den Pass',
|
||||
'duration': 2588,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -44,6 +47,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'aac',
|
||||
'title': '"Keine neuen Schulden im nächsten Jahr"',
|
||||
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
|
||||
'duration': 64,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -54,6 +58,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Umweltbewusster Häuslebauer',
|
||||
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
|
||||
'duration': 116,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -64,6 +69,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Folge 1 - Metaphysik',
|
||||
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
|
||||
'duration': 893,
|
||||
'uploader': 'Eva Maria Steimle',
|
||||
'upload_date': '20140117',
|
||||
}
|
||||
@@ -84,6 +90,7 @@ class BRIE(InfoExtractor):
|
||||
media = {
|
||||
'id': xml_media.get('externalId'),
|
||||
'title': xml_media.find('title').text,
|
||||
'duration': parse_duration(xml_media.find('duration').text),
|
||||
'formats': self._extract_formats(xml_media.find('assets')),
|
||||
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
|
||||
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
|
||||
|
@@ -1,24 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P<id>[^/]+)/.*'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
u'file': u'4JUVEwq3wUT7.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Connect Chat feat. Garth Brooks',
|
||||
u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||
u'duration': 1495,
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
'id': '4JUVEwq3wUT7',
|
||||
'ext': 'flv',
|
||||
'title': 'Connect Chat feat. Garth Brooks',
|
||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||
'duration': 1495,
|
||||
},
|
||||
u'params': {
|
||||
'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
|
||||
'info_dict': {
|
||||
'id': 'P9gjWjelt6iP',
|
||||
'ext': 'flv',
|
||||
'title': 'Live on Letterman - St. Vincent',
|
||||
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
|
||||
'duration': 3221,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -26,5 +44,5 @@ class CBSIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
real_id = self._search_regex(
|
||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||
webpage, u'real video ID')
|
||||
webpage, 'real video ID')
|
||||
return self.url_result(u'theplatform:%s' % real_id)
|
||||
|
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
@@ -301,8 +302,12 @@ class InfoExtractor(object):
|
||||
def _download_json(self, url_or_request, video_id,
|
||||
note=u'Downloading JSON metadata',
|
||||
errnote=u'Unable to download JSON metadata',
|
||||
transform_source=None):
|
||||
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||
transform_source=None,
|
||||
fatal=True):
|
||||
json_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
||||
if (not fatal) and json_string is False:
|
||||
return None
|
||||
if transform_source:
|
||||
json_string = transform_source(json_string)
|
||||
try:
|
||||
@@ -369,7 +374,8 @@ class InfoExtractor(object):
|
||||
else:
|
||||
for p in pattern:
|
||||
mobj = re.search(p, string, flags)
|
||||
if mobj: break
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if os.name != 'nt' and sys.stderr.isatty():
|
||||
_name = u'\033[0;34m%s\033[0m' % name
|
||||
@@ -585,6 +591,24 @@ class InfoExtractor(object):
|
||||
self.to_screen(msg)
|
||||
time.sleep(timeout)
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest')
|
||||
|
||||
formats = []
|
||||
for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
|
||||
formats.append({
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'tbr': int_or_none(media_el.attrib.get('bitrate')),
|
||||
'width': int_or_none(media_el.attrib.get('width')),
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
+ video_id, video_id, 'Downloading XML config')
|
||||
|
||||
manifest_url = info.find('videos/video/url').text
|
||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||
video_url = video_url.replace('/z/', '/i/')
|
||||
manifest_url = manifest_url.replace('/z/', '/i/')
|
||||
|
||||
if manifest_url.startswith('rtmp'):
|
||||
formats = [{'url': manifest_url, 'ext': 'flv'}]
|
||||
else:
|
||||
formats = []
|
||||
available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
|
||||
for index, format_descr in enumerate(available_formats.split(',')):
|
||||
format_info = {
|
||||
'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
|
||||
if m_resolution is not None:
|
||||
format_info.update({
|
||||
'width': int(m_resolution.group('width')),
|
||||
'height': int(m_resolution.group('height')),
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
||||
thumbnail_path = info.find('image').text
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
|
||||
'url': video_url,
|
||||
'title': info.find('titre').text,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('titre').text,
|
||||
'formats': formats,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
|
||||
|
||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
74
youtube_dl/extractor/gamestar.py
Normal file
74
youtube_dl/extractor/gamestar.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class GameStarIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
|
||||
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
|
||||
'info_dict': {
|
||||
'id': '76110',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
|
||||
'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
|
||||
'upload_date': '20140728',
|
||||
'duration': 17
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
og_title = self._og_search_title(webpage)
|
||||
title = og_title.replace(' - Video bei GameStar.de', '').strip()
|
||||
|
||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
thumbnail = self._proto_relative_url(
|
||||
self._og_search_thumbnail(webpage), scheme='http:')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+) ',
|
||||
webpage, 'upload_date', fatal=False))
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r' Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r' Zuschauer: ([0-9\.]+) ', webpage,
|
||||
'view_count', fatal=False))
|
||||
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
|
||||
fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count
|
||||
}
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||
_TESTS = [
|
||||
@@ -31,6 +32,15 @@ class GDCVaultIE(InfoExtractor):
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
|
||||
'md5': 'a5eb77996ef82118afbbe8e48731b98e',
|
||||
'info_dict': {
|
||||
'id': '1015301',
|
||||
'ext': 'flv',
|
||||
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _parse_mp4(self, xml_description):
|
||||
@@ -103,18 +113,40 @@ class GDCVaultIE(InfoExtractor):
|
||||
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||
start_page = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
|
||||
direct_url = self._search_regex(
|
||||
r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
|
||||
start_page, 'url', default=None)
|
||||
if direct_url:
|
||||
video_url = 'http://www.gdcvault.com/' + direct_url
|
||||
title = self._html_search_regex(
|
||||
r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
|
||||
start_page, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
}
|
||||
|
||||
xml_root = self._html_search_regex(
|
||||
r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
|
||||
start_page, 'xml root', default=None)
|
||||
if xml_root is None:
|
||||
# Probably need to authenticate
|
||||
start_page = self._login(webpage_url, video_id)
|
||||
if start_page is None:
|
||||
login_res = self._login(webpage_url, video_id)
|
||||
if login_res is None:
|
||||
self.report_warning('Could not login.')
|
||||
else:
|
||||
start_page = login_res
|
||||
# Grab the url from the authenticated page
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
|
||||
xml_root = self._html_search_regex(
|
||||
r'<iframe src="(.*?)player.html.*?".*?</iframe>',
|
||||
start_page, 'xml root')
|
||||
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
|
||||
xml_name = self._html_search_regex(
|
||||
r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
|
||||
start_page, 'xml filename', default=None)
|
||||
if xml_name is None:
|
||||
# Fallback to the older format
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
|
@@ -383,13 +383,13 @@ class GenericIE(InfoExtractor):
|
||||
if not parsed_url.scheme:
|
||||
default_search = self._downloader.params.get('default_search')
|
||||
if default_search is None:
|
||||
default_search = 'error'
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning'):
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if '/' in url:
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
else:
|
||||
elif default_search != 'fixup_error':
|
||||
if default_search == 'auto_warning':
|
||||
if re.match(r'^(?:url|URL)$', url):
|
||||
raise ExtractorError(
|
||||
@@ -399,7 +399,8 @@ class GenericIE(InfoExtractor):
|
||||
self._downloader.report_warning(
|
||||
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
|
||||
return self.url_result('ytsearch:' + url)
|
||||
elif default_search == 'error':
|
||||
|
||||
if default_search in ('error', 'fixup_error'):
|
||||
raise ExtractorError(
|
||||
('%r is not a valid URL. '
|
||||
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
|
||||
|
58
youtube_dl/extractor/godtube.py
Normal file
58
youtube_dl/extractor/godtube.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class GodTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
|
||||
'md5': '77108c1e4ab58f48031101a1a2119789',
|
||||
'info_dict': {
|
||||
'id': '0C0CNNNU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Woman at the well.',
|
||||
'duration': 159,
|
||||
'timestamp': 1205712000,
|
||||
'uploader': 'beverlybmusic',
|
||||
'upload_date': '20080317',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
config = self._download_xml(
|
||||
'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
|
||||
video_id, 'Downloading player config XML')
|
||||
|
||||
video_url = config.find('.//file').text
|
||||
uploader = config.find('.//author').text
|
||||
timestamp = parse_iso8601(config.find('.//date').text)
|
||||
duration = parse_duration(config.find('.//duration').text)
|
||||
thumbnail = config.find('.//image').text
|
||||
|
||||
media = self._download_xml(
|
||||
'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
|
||||
|
||||
title = media.find('.//title').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
}
|
97
youtube_dl/extractor/izlesene.py
Normal file
97
youtube_dl/extractor/izlesene.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
parse_iso8601,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class IzleseneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
|
||||
_STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
|
||||
_TEST = {
|
||||
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
|
||||
'md5': '4384f9f0ea65086734b881085ee05ac2',
|
||||
'info_dict': {
|
||||
'id': '7599694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.izlesene.com/video/%s' % video_id
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date', fatal=False))
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
|
||||
if duration:
|
||||
duration /= 1000.0
|
||||
|
||||
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
|
||||
comment_count = self._html_search_regex(
|
||||
r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
|
||||
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, 'age limit', fatal=False)
|
||||
|
||||
content_url = self._html_search_meta(
|
||||
'contentURL', webpage, 'content URL', fatal=False)
|
||||
ext = determine_ext(content_url, 'mp4')
|
||||
|
||||
# Might be empty for some videos.
|
||||
qualities = self._html_search_regex(
|
||||
r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
|
||||
|
||||
formats = []
|
||||
for quality in qualities.split('|'):
|
||||
json = self._download_json(
|
||||
self._STREAM_URL.format(id=video_id, format=quality), video_id,
|
||||
note='Getting video URL for "%s" quality' % quality,
|
||||
errnote='Failed to get video URL for "%s" quality' % quality
|
||||
)
|
||||
formats.append({
|
||||
'url': json.get('streamurl'),
|
||||
'ext': ext,
|
||||
'format_id': '%sp' % quality if quality else 'sd',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader_id': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'age_limit': 18 if family_friendly == 'False' else 0,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -43,10 +44,11 @@ class JustinTVIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# Return count of items, list of *valid* items
|
||||
def _parse_page(self, url, video_id):
|
||||
info_json = self._download_webpage(url, video_id,
|
||||
'Downloading video info JSON',
|
||||
'unable to download video info JSON')
|
||||
def _parse_page(self, url, video_id, counter):
|
||||
info_json = self._download_webpage(
|
||||
url, video_id,
|
||||
'Downloading video info JSON on page %d' % counter,
|
||||
'Unable to download video info JSON %d' % counter)
|
||||
|
||||
response = json.loads(info_json)
|
||||
if type(response) != list:
|
||||
@@ -138,11 +140,10 @@ class JustinTVIE(InfoExtractor):
|
||||
entries = []
|
||||
offset = 0
|
||||
limit = self._JUSTIN_PAGE_LIMIT
|
||||
while True:
|
||||
if paged:
|
||||
self.report_download_page(video_id, offset)
|
||||
for counter in itertools.count(1):
|
||||
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
||||
page_count, page_info = self._parse_page(page_url, video_id)
|
||||
page_count, page_info = self._parse_page(
|
||||
page_url, video_id, counter)
|
||||
entries.extend(page_info)
|
||||
if not paged or page_count != limit:
|
||||
break
|
||||
|
59
youtube_dl/extractor/krasview.py
Normal file
59
youtube_dl/extractor/krasview.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class KrasViewIE(InfoExtractor):
|
||||
IE_DESC = 'Красвью'
|
||||
_VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://krasview.ru/video/512228',
|
||||
'md5': '3b91003cf85fc5db277870c8ebd98eae',
|
||||
'info_dict': {
|
||||
'id': '512228',
|
||||
'ext': 'mp4',
|
||||
'title': 'Снег, лёд, заносы',
|
||||
'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
|
||||
'duration': 27,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
flashvars = json.loads(self._search_regex(
|
||||
r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
|
||||
|
||||
video_url = flashvars['url']
|
||||
title = unescapeHTML(flashvars['title'])
|
||||
description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
|
||||
thumbnail = flashvars['image']
|
||||
duration = int(flashvars['duration'])
|
||||
filesize = int(flashvars['size'])
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
@@ -5,11 +5,14 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
xpath_with_ns,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,20 +27,82 @@ class LivestreamIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Live from Webster Hall NYC',
|
||||
'upload_date': '20121012',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^http://.*\.jpg$'
|
||||
}
|
||||
}
|
||||
|
||||
def _parse_smil(self, video_id, smil_url):
|
||||
formats = []
|
||||
_SWITCH_XPATH = (
|
||||
'.//{http://www.w3.org/2001/SMIL20/Language}body/'
|
||||
'{http://www.w3.org/2001/SMIL20/Language}switch')
|
||||
smil_doc = self._download_xml(
|
||||
smil_url, video_id,
|
||||
note='Downloading SMIL information',
|
||||
errnote='Unable to download SMIL information',
|
||||
fatal=False)
|
||||
if smil_doc is False: # Download failed
|
||||
return formats
|
||||
title_node = find_xpath_attr(
|
||||
smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
|
||||
'name', 'title')
|
||||
if title_node is None:
|
||||
self.report_warning('Cannot find SMIL id')
|
||||
switch_node = smil_doc.find(_SWITCH_XPATH)
|
||||
else:
|
||||
title_id = title_node.attrib['content']
|
||||
switch_node = find_xpath_attr(
|
||||
smil_doc, _SWITCH_XPATH, 'id', title_id)
|
||||
if switch_node is None:
|
||||
raise ExtractorError('Cannot find switch node')
|
||||
video_nodes = switch_node.findall(
|
||||
'{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
|
||||
for vn in video_nodes:
|
||||
tbr = int_or_none(vn.attrib.get('system-bitrate'))
|
||||
furl = (
|
||||
'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' %
|
||||
(vn.attrib['src']))
|
||||
if 'clipBegin' in vn.attrib:
|
||||
furl += '&ssek=' + vn.attrib['clipBegin']
|
||||
formats.append({
|
||||
'url': furl,
|
||||
'format_id': 'smil_%d' % tbr,
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'preference': -1000,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_video_info(self, video_data):
|
||||
video_url = (
|
||||
video_data.get('progressive_url_hd') or
|
||||
video_data.get('progressive_url')
|
||||
video_id = compat_str(video_data['id'])
|
||||
|
||||
FORMAT_KEYS = (
|
||||
('sd', 'progressive_url'),
|
||||
('hd', 'progressive_url_hd'),
|
||||
)
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
'url': video_data[key],
|
||||
'quality': i + 1,
|
||||
} for i, (format_id, key) in enumerate(FORMAT_KEYS)
|
||||
if video_data.get(key)]
|
||||
|
||||
smil_url = video_data.get('smil_url')
|
||||
if smil_url:
|
||||
formats.extend(self._parse_smil(video_id, smil_url))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(video_data['id']),
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_data['caption'],
|
||||
'thumbnail': video_data['thumbnail_url'],
|
||||
'thumbnail': video_data.get('thumbnail_url'),
|
||||
'upload_date': video_data['updated_at'].replace('-', '')[:8],
|
||||
'like_count': video_data.get('likes', {}).get('total'),
|
||||
'view_count': video_data.get('views'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -11,8 +11,22 @@ from ..utils import (
|
||||
|
||||
|
||||
class MLBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
|
||||
_VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||
'md5': 'ff56a598c2cf411a9a38a69709e97079',
|
||||
'info_dict': {
|
||||
'id': '34698933',
|
||||
'ext': 'mp4',
|
||||
'title': "Ackley's spectacular catch",
|
||||
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
||||
'duration': 66,
|
||||
'timestamp': 1405980600,
|
||||
'upload_date': '20140721',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
|
||||
'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
|
||||
|
@@ -4,7 +4,11 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import find_xpath_attr, compat_str
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class NBCIE(InfoExtractor):
|
||||
|
@@ -32,13 +32,21 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
def _extract_ids(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
display_id = presumptive_id
|
||||
if presumptive_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
# frontline video embed
|
||||
media_id = self._search_regex(
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
|
||||
webpage, 'frontline video ID', fatal=False, default=None)
|
||||
if media_id:
|
||||
return media_id, presumptive_id
|
||||
|
||||
url = self._search_regex(
|
||||
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||
webpage, 'player URL')
|
||||
@@ -57,6 +65,11 @@ class PBSIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
return video_id, display_id
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._extract_ids(url)
|
||||
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info = self._download_json(info_url, display_id)
|
||||
|
||||
|
@@ -92,16 +92,7 @@ class RTLnowIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
|
||||
'info_dict': {
|
||||
'id': '153819',
|
||||
'ext': 'flv',
|
||||
'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner',
|
||||
'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631',
|
||||
'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg',
|
||||
'upload_date': '20140221',
|
||||
'duration': 2429,
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
|
@@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor):
|
||||
'upload_date': '20120816',
|
||||
'uploader': 'Howcast',
|
||||
'uploader_id': 'Howcast',
|
||||
'description': 'md5:727900f130df3dc9a25e2721497c7910',
|
||||
'description': 're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
|
57
youtube_dl/extractor/shared.py
Normal file
57
youtube_dl/extractor/shared.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SharedIE(InfoExtractor):
|
||||
_VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://shared.sx/0060718775',
|
||||
'md5': '53e1c58fc3e777ae1dfe9e57ba2f9c72',
|
||||
'info_dict': {
|
||||
'id': '0060718775',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny Trailer',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'>File does not exist<', page) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
|
||||
|
||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||
|
||||
video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
|
||||
title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
|
||||
thumbnail = self._html_search_regex(
|
||||
r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'filesize': filesize,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
determine_ext,
|
||||
)
|
||||
import re
|
||||
|
||||
@@ -68,6 +69,7 @@ class SockshareIE(InfoExtractor):
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
'ext': determine_ext(title),
|
||||
}]
|
||||
|
||||
return {
|
||||
|
@@ -82,10 +82,10 @@ class SoundcloudIE(InfoExtractor):
|
||||
# downloadable song
|
||||
{
|
||||
'url': 'https://soundcloud.com/oddsamples/bus-brakes',
|
||||
'md5': 'fee7b8747b09bb755cefd4b853e7249a',
|
||||
'md5': '7624f2351f8a3b2e7cd51522496e7631',
|
||||
'info_dict': {
|
||||
'id': '128590877',
|
||||
'ext': 'wav',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bus Brakes',
|
||||
'description': 'md5:0170be75dd395c96025d210d261c784e',
|
||||
'uploader': 'oddsamples',
|
||||
|
@@ -1,4 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
@@ -10,18 +12,18 @@ from ..utils import (
|
||||
|
||||
|
||||
class StreamcloudIE(InfoExtractor):
|
||||
IE_NAME = u'streamcloud.eu'
|
||||
IE_NAME = 'streamcloud.eu'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
u'file': u'skp9j99s4bpz.mp4',
|
||||
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
|
||||
u'info_dict': {
|
||||
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
||||
u'duration': 9,
|
||||
'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
'md5': '6bea4c7fa5daaacc2a946b7146286686',
|
||||
'info_dict': {
|
||||
'id': 'skp9j99s4bpz',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'/\\ ä ↭',
|
||||
},
|
||||
u'skip': u'Only available from the EU'
|
||||
'skip': 'Only available from the EU'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor):
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note=u'Downloading video page ...')
|
||||
req, video_id, note='Downloading video page ...')
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)<', webpage, u'title')
|
||||
r'<h1[^>]*>([^<]+)<', webpage, 'title')
|
||||
video_url = self._search_regex(
|
||||
r'file:\s*"([^"]+)"', webpage, u'video URL')
|
||||
duration_str = self._search_regex(
|
||||
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
|
||||
duration = None if duration_str is None else int(duration_str)
|
||||
r'file:\s*"([^"]+)"', webpage, 'video URL')
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import parse_duration
|
||||
|
||||
|
||||
class SWRMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
|
||||
@@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor):
|
||||
'uploader': 'SWR 2',
|
||||
'uploader_id': '284670',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
|
||||
'md5': '881531487d0633080a8cc88d31ef896f',
|
||||
'info_dict': {
|
||||
'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Familienspaß am Bodensee',
|
||||
'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 1784,
|
||||
'upload_date': '20140727',
|
||||
'uploader': 'SWR Fernsehen BW',
|
||||
'uploader_id': '281130',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
85
youtube_dl/extractor/tvplay.py
Normal file
85
youtube_dl/extractor/tvplay.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '418113',
|
||||
'ext': 'flv',
|
||||
'title': 'Kādi ir īri? - Viņas melo labāk',
|
||||
'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
|
||||
'duration': 25,
|
||||
'timestamp': 1406097056,
|
||||
'upload_date': '20140723',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
if video['is_geo_blocked']:
|
||||
raise ExtractorError(
|
||||
'This content is not available in your country due to copyright reasons', expected=True)
|
||||
|
||||
streams = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
|
||||
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams['streams'].items():
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'preference': quality(format_id),
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
||||
if not m:
|
||||
continue
|
||||
fmt.update({
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': video_url,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'duration': video['duration'],
|
||||
'timestamp': parse_iso8601(video['created_at']),
|
||||
'view_count': video['views']['total'],
|
||||
'age_limit': video.get('age_limit', 0),
|
||||
'formats': formats,
|
||||
}
|
56
youtube_dl/extractor/ubu.py
Normal file
56
youtube_dl/extractor/ubu.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class UbuIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://ubu.com/film/her_noise.html',
|
||||
'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
|
||||
'info_dict': {
|
||||
'id': 'her_noise',
|
||||
'ext': 'mp4',
|
||||
'title': 'Her Noise - The Making Of (2007)',
|
||||
'duration': 3600,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title')
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
|
||||
if duration:
|
||||
duration *= 60
|
||||
|
||||
formats = []
|
||||
|
||||
FORMAT_REGEXES = [
|
||||
['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
|
||||
['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
|
||||
]
|
||||
|
||||
for format_id, format_regex in FORMAT_REGEXES:
|
||||
m = re.search(format_regex, webpage)
|
||||
if m:
|
||||
formats.append({
|
||||
'url': m.group(1),
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -177,6 +177,7 @@ class VevoIE(InfoExtractor):
|
||||
self._downloader.report_warning(
|
||||
'Cannot download SMIL information, falling back to JSON ..')
|
||||
|
||||
self._sort_formats(formats)
|
||||
timestamp_ms = int(self._search_regex(
|
||||
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||
|
||||
|
68
youtube_dl/extractor/vidme.py
Normal file
68
youtube_dl/extractor/vidme.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class VidmeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'https://vid.me/QNB',
|
||||
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
||||
'info_dict': {
|
||||
'id': 'QNB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fishing for piranha - the easy way',
|
||||
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
|
||||
'duration': 119.92,
|
||||
'timestamp': 1406313244,
|
||||
'upload_date': '20140725',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default='')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
||||
webpage, 'like count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -20,12 +21,14 @@ class VubeIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
|
||||
'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$',
|
||||
'uploader': 'Chiara.Grispo',
|
||||
'uploader_id': '1u3hX0znhP',
|
||||
'timestamp': 1388743358,
|
||||
'upload_date': '20140103',
|
||||
'duration': 170.56
|
||||
'duration': 170.56,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -36,12 +39,30 @@ class VubeIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'My 7 year old Sister and I singing "Alive" by Krewella',
|
||||
'description': 'md5:40bcacb97796339f1690642c21d56f4a',
|
||||
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102265d5a9f-0f17-4f6b-5753-adf08484ee1e.jpg',
|
||||
'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$',
|
||||
'uploader': 'Seraina',
|
||||
'uploader_id': 'XU9VE2BQ2q',
|
||||
'timestamp': 1396492438,
|
||||
'upload_date': '20140403',
|
||||
'duration': 240.107
|
||||
'duration': 240.107,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
|
||||
'md5': '0584fc13b50f887127d9d1007589d27f',
|
||||
'info_dict': {
|
||||
'id': '0nmsMY5vEq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Frozen - Let It Go Cover by Siren Gene',
|
||||
'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
|
||||
'uploader': 'Siren Gene',
|
||||
'uploader_id': 'Siren',
|
||||
'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
|
||||
'duration': 221.788,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -50,8 +71,16 @@ class VubeIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://vube.com/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._search_regex(
|
||||
r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n',
|
||||
webpage, 'video data'
|
||||
)
|
||||
data = json.loads(data_json)
|
||||
video = (
|
||||
data.get('video') or
|
||||
data)
|
||||
assert isinstance(video, dict)
|
||||
|
||||
public_id = video['public_id']
|
||||
|
||||
@@ -69,21 +98,31 @@ class VubeIE(InfoExtractor):
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = video['thumbnail_src']
|
||||
if thumbnail.startswith('//'):
|
||||
thumbnail = 'http:' + thumbnail
|
||||
uploader = video['user_alias']
|
||||
uploader_id = video['user_url_id']
|
||||
timestamp = int(video['upload_time'])
|
||||
thumbnail = self._proto_relative_url(
|
||||
video.get('thumbnail') or video.get('thumbnail_src'),
|
||||
scheme='http:')
|
||||
uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias')
|
||||
uploader_id = data.get('user', {}).get('name')
|
||||
timestamp = int_or_none(video.get('upload_time'))
|
||||
duration = video['duration']
|
||||
view_count = video.get('raw_view_count')
|
||||
like_count = video.get('total_likes')
|
||||
dislike_count= video.get('total_hates')
|
||||
like_count = video.get('rlikes')
|
||||
if like_count is None:
|
||||
like_count = video.get('total_likes')
|
||||
dislike_count = video.get('rhates')
|
||||
if dislike_count is None:
|
||||
dislike_count = video.get('total_hates')
|
||||
|
||||
comment = self._download_json(
|
||||
'http://vube.com/api/video/%s/comment' % video_id, video_id, 'Downloading video comment JSON')
|
||||
|
||||
comment_count = int_or_none(comment.get('total'))
|
||||
comments = video.get('comments')
|
||||
comment_count = None
|
||||
if comments is None:
|
||||
comment_data = self._download_json(
|
||||
'http://vube.com/api/video/%s/comment' % video_id,
|
||||
video_id, 'Downloading video comment JSON', fatal=False)
|
||||
if comment_data is not None:
|
||||
comment_count = int_or_none(comment_data.get('total'))
|
||||
else:
|
||||
comment_count = len(comments)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -81,7 +81,7 @@ class WDRIE(InfoExtractor):
|
||||
]
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
flashvars = compat_urlparse.parse_qs(
|
||||
flashvars = compat_parse_qs(
|
||||
self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
|
||||
|
||||
page_id = flashvars['trackerClipId'][0]
|
||||
|
@@ -344,15 +344,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""Indicate the download will use the RTMP protocol."""
|
||||
self.to_screen(u'RTMP download detected')
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, slen):
|
||||
def _signature_cache_id(self, example_sig):
|
||||
""" Return a string representation of a signature """
|
||||
return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
id_m = re.match(
|
||||
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
|
||||
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
if not id_m:
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
player_type = id_m.group('ext')
|
||||
player_id = id_m.group('id')
|
||||
|
||||
# Read from filesystem cache
|
||||
func_id = '%s_%s_%d' % (player_type, player_id, slen)
|
||||
func_id = '%s_%s_%s' % (
|
||||
player_type, player_id, self._signature_cache_id(example_sig))
|
||||
assert os.path.basename(func_id) == func_id
|
||||
cache_dir = get_cachedir(self._downloader.params)
|
||||
|
||||
@@ -386,7 +393,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
if cache_enabled:
|
||||
try:
|
||||
test_string = u''.join(map(compat_chr, range(slen)))
|
||||
test_string = u''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
try:
|
||||
@@ -402,7 +409,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
return res
|
||||
|
||||
def _print_sig_code(self, func, slen):
|
||||
def _print_sig_code(self, func, example_sig):
|
||||
def gen_sig_code(idxs):
|
||||
def _genslice(start, end, step):
|
||||
starts = u'' if start == 0 else str(start)
|
||||
@@ -431,11 +438,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
else:
|
||||
yield _genslice(start, i, step)
|
||||
|
||||
test_string = u''.join(map(compat_chr, range(slen)))
|
||||
test_string = u''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = func(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
expr_code = u' + '.join(gen_sig_code(cache_spec))
|
||||
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
|
||||
signature_id_tuple = '(%s)' % (
|
||||
', '.join(compat_str(len(p)) for p in example_sig.split('.')))
|
||||
code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
||||
u' return %s\n') % (signature_id_tuple, expr_code)
|
||||
self.to_screen(u'Extracted signature function:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
@@ -463,20 +473,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if player_url.startswith(u'//'):
|
||||
player_url = u'https:' + player_url
|
||||
try:
|
||||
player_id = (player_url, len(s))
|
||||
player_id = (player_url, self._signature_cache_id(s))
|
||||
if player_id not in self._player_cache:
|
||||
func = self._extract_signature_function(
|
||||
video_id, player_url, len(s)
|
||||
video_id, player_url, s
|
||||
)
|
||||
self._player_cache[player_id] = func
|
||||
func = self._player_cache[player_id]
|
||||
if self._downloader.params.get('youtube_print_sig_code'):
|
||||
self._print_sig_code(func, len(s))
|
||||
self._print_sig_code(func, s)
|
||||
return func(s)
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
raise ExtractorError(
|
||||
u'Automatic signature extraction failed: ' + tb, cause=e)
|
||||
u'Signature extraction failed: ' + tb, cause=e)
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
@@ -609,7 +619,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'video_id': video_id,
|
||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||
'sts':'16268',
|
||||
'sts': self._search_regex(
|
||||
r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
|
||||
})
|
||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
@@ -803,50 +814,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url_map = {}
|
||||
for url_data_str in encoded_url_map.split(','):
|
||||
url_data = compat_parse_qs(url_data_str)
|
||||
if 'itag' in url_data and 'url' in url_data:
|
||||
url = url_data['url'][0]
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
if 'itag' not in url_data or 'url' not in url_data:
|
||||
continue
|
||||
format_id = url_data['itag'][0]
|
||||
url = url_data['url'][0]
|
||||
|
||||
if not age_gate:
|
||||
jsplayer_url_json = self._search_regex(
|
||||
r'"assets":.+?"js":\s*("[^"]+")',
|
||||
video_webpage, u'JS player URL')
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
|
||||
if not age_gate:
|
||||
jsplayer_url_json = self._search_regex(
|
||||
r'"assets":.+?"js":\s*("[^"]+")',
|
||||
video_webpage, u'JS player URL')
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
video_webpage, u'age gate player URL')
|
||||
player_url = json.loads(player_url_json)
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
video_webpage, u'age gate player URL')
|
||||
player_url = json.loads(player_url_json)
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
if player_url is None:
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
else:
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
u'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
u'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
r'html5player-(.+?)\.js', video_webpage,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = u'html5 player %s' % player_version
|
||||
player_version = self._search_regex(
|
||||
r'html5player-([^/]+?)(?:/html5player)?\.js',
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = u'html5 player %s' % player_version
|
||||
|
||||
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
|
||||
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
|
||||
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen(u'{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[format_id] = url
|
||||
formats = _map_to_format_list(url_map)
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .utils import (
|
||||
@@ -40,8 +41,9 @@ class JSInterpreter(object):
|
||||
assign = lambda v: v
|
||||
expr = stmt[len('return '):]
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Cannot determine left side of statement in %r' % stmt)
|
||||
# Try interpreting it as an expression
|
||||
expr = stmt
|
||||
assign = lambda v: v
|
||||
|
||||
v = self.interpret_expression(expr, local_vars, allow_recursion)
|
||||
return assign(v)
|
||||
@@ -53,35 +55,63 @@ class JSInterpreter(object):
|
||||
if expr.isalpha():
|
||||
return local_vars[expr]
|
||||
|
||||
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
|
||||
if m:
|
||||
member = m.group('member')
|
||||
variable = m.group('in')
|
||||
try:
|
||||
return json.loads(expr)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if variable not in local_vars:
|
||||
m = re.match(
|
||||
r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
|
||||
expr)
|
||||
if m:
|
||||
variable = m.group('var')
|
||||
member = m.group('member')
|
||||
arg_str = m.group('args')
|
||||
|
||||
if variable in local_vars:
|
||||
obj = local_vars[variable]
|
||||
else:
|
||||
if variable not in self._objects:
|
||||
self._objects[variable] = self.extract_object(variable)
|
||||
obj = self._objects[variable]
|
||||
key, args = member.split('(', 1)
|
||||
args = args.strip(')')
|
||||
argvals = [int(v) if v.isdigit() else local_vars[v]
|
||||
for v in args.split(',')]
|
||||
return obj[key](argvals)
|
||||
|
||||
val = local_vars[variable]
|
||||
if member == 'split("")':
|
||||
return list(val)
|
||||
if member == 'join("")':
|
||||
return ''.join(val)
|
||||
if member == 'length':
|
||||
return len(val)
|
||||
if member == 'reverse()':
|
||||
return val[::-1]
|
||||
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
|
||||
if slice_m:
|
||||
idx = self.interpret_expression(
|
||||
slice_m.group('idx'), local_vars, allow_recursion - 1)
|
||||
return val[idx:]
|
||||
if arg_str is None:
|
||||
# Member access
|
||||
if member == 'length':
|
||||
return len(obj)
|
||||
return obj[member]
|
||||
|
||||
assert expr.endswith(')')
|
||||
# Function call
|
||||
if arg_str == '':
|
||||
argvals = tuple()
|
||||
else:
|
||||
argvals = tuple([
|
||||
self.interpret_expression(v, local_vars, allow_recursion)
|
||||
for v in arg_str.split(',')])
|
||||
|
||||
if member == 'split':
|
||||
assert argvals == ('',)
|
||||
return list(obj)
|
||||
if member == 'join':
|
||||
assert len(argvals) == 1
|
||||
return argvals[0].join(obj)
|
||||
if member == 'reverse':
|
||||
assert len(argvals) == 0
|
||||
obj.reverse()
|
||||
return obj
|
||||
if member == 'slice':
|
||||
assert len(argvals) == 1
|
||||
return obj[argvals[0]:]
|
||||
if member == 'splice':
|
||||
assert isinstance(obj, list)
|
||||
index, howMany = argvals
|
||||
res = []
|
||||
for i in range(index, min(index + howMany, len(obj))):
|
||||
res.append(obj.pop(index))
|
||||
return res
|
||||
|
||||
return obj[member](argvals)
|
||||
|
||||
m = re.match(
|
||||
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
||||
@@ -103,10 +133,11 @@ class JSInterpreter(object):
|
||||
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||
if m:
|
||||
fname = m.group('func')
|
||||
argvals = tuple([
|
||||
int(v) if v.isdigit() else local_vars[v]
|
||||
for v in m.group('args').split(',')])
|
||||
if fname not in self._functions:
|
||||
self._functions[fname] = self.extract_function(fname)
|
||||
argvals = [int(v) if v.isdigit() else local_vars[v]
|
||||
for v in m.group('args').split(',')]
|
||||
return self._functions[fname](argvals)
|
||||
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||
|
||||
@@ -114,13 +145,13 @@ class JSInterpreter(object):
|
||||
obj = {}
|
||||
obj_m = re.search(
|
||||
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
|
||||
r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
|
||||
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
|
||||
r'\}\s*;',
|
||||
self.code)
|
||||
fields = obj_m.group('fields')
|
||||
# Currently, it only supports function definitions
|
||||
fields_m = re.finditer(
|
||||
r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
|
||||
r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
|
||||
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
||||
fields)
|
||||
for f in fields_m:
|
||||
|
@@ -18,14 +18,15 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
|
||||
class FFmpegPostProcessorError(PostProcessingError):
|
||||
pass
|
||||
|
||||
|
||||
class FFmpegPostProcessor(PostProcessor):
|
||||
def __init__(self,downloader=None):
|
||||
def __init__(self, downloader=None, deletetempfiles=False):
|
||||
PostProcessor.__init__(self, downloader)
|
||||
self._exes = self.detect_executables()
|
||||
self._deletetempfiles = deletetempfiles
|
||||
|
||||
@staticmethod
|
||||
def detect_executables():
|
||||
@@ -60,6 +61,9 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
msg = stderr.strip().split('\n')[-1]
|
||||
raise FFmpegPostProcessorError(msg)
|
||||
if self._deletetempfiles:
|
||||
for ipath in input_paths:
|
||||
os.remove(ipath)
|
||||
|
||||
def run_ffmpeg(self, path, out_path, opts):
|
||||
self.run_ffmpeg_multiple_files([path], out_path, opts)
|
||||
|
@@ -242,8 +242,8 @@ else:
|
||||
if sys.version_info >= (2,7):
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z]+$', key)
|
||||
assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
|
||||
assert re.match(r'^[a-zA-Z-]+$', key)
|
||||
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
||||
expr = xpath + u"[@%s='%s']" % (key, val)
|
||||
return node.find(expr)
|
||||
else:
|
||||
@@ -852,6 +852,8 @@ def unified_strdate(date_str):
|
||||
return upload_date
|
||||
|
||||
def determine_ext(url, default_ext=u'unknown_video'):
|
||||
if url is None:
|
||||
return default_ext
|
||||
guess = url.partition(u'?')[0].rpartition(u'.')[2]
|
||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||
return guess
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.07.22'
|
||||
__version__ = '2014.08.02.1'
|
||||
|
Reference in New Issue
Block a user