Compare commits

...

23 Commits

Author SHA1 Message Date
Philipp Hagemeister
7272eab9d0 release 2014.07.24 2014-07-24 11:24:43 +02:00
Jaime Marquínez Ferrándiz
ebe832dc37 [jsinterp] 'reverse' modifies the array in place (fixes #3334) 2014-07-24 11:08:31 +02:00
Philipp Hagemeister
825abb8175 [jsinterp] Implement splice and general improvement
I still get 403s on YouTube though.
2014-07-24 10:41:14 +02:00
Sergey M․
8944ec0109 [krasview] Add extractor (Closes #3313) 2014-07-23 19:29:15 +07:00
Jaime Marquínez Ferrándiz
c084c93402 [youtube] Extract the 'sts' parameter from the webpage (fixes #3327) 2014-07-23 12:16:26 +02:00
Philipp Hagemeister
d799b47b82 [ffmpeg] PEP8 and a more obvious variable name 2014-07-23 02:55:06 +02:00
rupertbaxter2
b7f8116406 Deletes temp files after postprocess merge unless -k option is specified 2014-07-23 02:53:44 +02:00
Philipp Hagemeister
6db274e057 Remove legacy FileDownloader (Closes #2964) 2014-07-23 02:47:52 +02:00
Philipp Hagemeister
0c92b57398 Remove unused imports 2014-07-23 02:46:21 +02:00
Philipp Hagemeister
becafcbf0f [wdr] fix up imports 2014-07-23 02:44:30 +02:00
Philipp Hagemeister
92a86f4c1a Do not import from legacy FileDownloader class 2014-07-23 02:43:59 +02:00
Philipp Hagemeister
dfe029a62c release 2014.07.23.2 2014-07-23 02:25:27 +02:00
Philipp Hagemeister
b0472057a3 [YoutubeDL] Make sure we really, really get out the encoding string
Fixes #3326
Apparently, on some platforms, even outputting this fails already.
2014-07-23 02:24:52 +02:00
Philipp Hagemeister
c081b35c27 [youtube] Support new player URLs (Fixes #3326) 2014-07-23 02:19:33 +02:00
Philipp Hagemeister
9f43890bcd [jsinterp] Allow digits in function names 2014-07-23 02:13:48 +02:00
Philipp Hagemeister
94a20aa5f8 [rtlnow] Simplify outdated test 2014-07-23 01:49:25 +02:00
Philipp Hagemeister
94e8df3a7e [wdr] Fix umlaut parsing on Python 2.x 2014-07-23 01:47:36 +02:00
Philipp Hagemeister
37e64addc8 [nbc] Add missing import 2014-07-23 01:47:18 +02:00
Philipp Hagemeister
d82ba23ba5 [soundcloud:playlist] Fix test description 2014-07-23 01:44:08 +02:00
Philipp Hagemeister
0fd7fd71b4 [test/helper] Do not use deprecated method 2014-07-23 01:43:46 +02:00
Philipp Hagemeister
eae12e3fe3 [soundcloud] Adapt test 2014-07-23 01:41:45 +02:00
Philipp Hagemeister
798a2cad4f [sockshare] Fix ext 2014-07-23 01:40:01 +02:00
Philipp Hagemeister
41c0849429 [savefrom] Make test description more flexible 2014-07-23 01:38:07 +02:00
20 changed files with 174 additions and 75 deletions

View File

@@ -137,8 +137,8 @@ def expect_info_dict(self, expected_dict, got_dict):
def assertRegexpMatches(self, text, regexp, msg=None):
if hasattr(self, 'assertRegexpMatches'):
return self.assertRegexpMatches(text, regexp, msg)
if hasattr(self, 'assertRegexp'):
return self.assertRegexp(text, regexp, msg)
else:
m = re.match(regexp, text)
if not m:

View File

@@ -15,7 +15,6 @@ from youtube_dl.extractor import (
FacebookIE,
gen_extractors,
JustinTVIE,
PBSIE,
YoutubeIE,
)

View File

@@ -10,7 +10,6 @@ from test.helper import (
get_params,
gettestcases,
expect_info_dict,
md5,
try_rm,
report_warning,
)
@@ -24,7 +23,6 @@ import socket
import youtube_dl.YoutubeDL
from youtube_dl.utils import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_HTTPError,
DownloadError,

View File

@@ -154,7 +154,7 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '4110309')
self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
assertRegexpMatches(
self, result['description'], r'TILT Brass - Bowery Poetry Club')
self, result['description'], r'.*?TILT Brass - Bowery Poetry Club')
self.assertEqual(len(result['entries']), 6)
def test_livestream_event(self):

View File

@@ -57,6 +57,18 @@ _TESTS = [
u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
),
(
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
u'js',
84,
u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
),
(
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
u'js',
83,
u'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
),
]

View File

@@ -1,12 +0,0 @@
# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
from .downloader import FileDownloader as RealFileDownloader
from .downloader import get_suitable_downloader
# This class reproduces the old behaviour of FileDownloader
class FileDownloader(RealFileDownloader):
def _do_download(self, filename, info_dict):
real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
for ph in self._progress_hooks:
real_fd.add_progress_hook(ph)
return real_fd.download(filename, info_dict)

View File

@@ -999,7 +999,7 @@ class YoutubeDL(object):
if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
merger = FFmpegMergerPP(self)
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
if not merger._get_executable():
postprocessors = []
self.report_warning('You have requested multiple '
@@ -1234,14 +1234,21 @@ class YoutubeDL(object):
if not self.params.get('verbose'):
return
write_string(
encoding_str = (
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
sys.stdout.encoding,
self.get_encoding()),
encoding=None
)
self.get_encoding()))
try:
write_string(encoding_str, encoding=None)
except:
errmsg = 'Failed to write encoding string %r' % encoding_str
try:
sys.stdout.write(errmsg)
except:
pass
raise IOError(errmsg)
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
try:

View File

@@ -97,7 +97,7 @@ from .utils import (
write_string,
)
from .update import update_self
from .FileDownloader import (
from .downloader import (
FileDownloader,
)
from .extractor import gen_extractors

View File

@@ -151,6 +151,7 @@ from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .la7 import LA7IE
from .lifenews import LifeNewsIE

View File

@@ -0,0 +1,59 @@
# encoding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unescapeHTML,
)
class KrasViewIE(InfoExtractor):
IE_DESC = 'Красвью'
_VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
_TEST = {
'url': 'http://krasview.ru/video/512228',
'md5': '3b91003cf85fc5db277870c8ebd98eae',
'info_dict': {
'id': '512228',
'ext': 'mp4',
'title': 'Снег, лёд, заносы',
'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
'duration': 27,
'thumbnail': 're:^https?://.*\.jpg',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
flashvars = json.loads(self._search_regex(
r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
video_url = flashvars['url']
title = unescapeHTML(flashvars['title'])
description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
thumbnail = flashvars['image']
duration = int(flashvars['duration'])
filesize = int(flashvars['size'])
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'filesize': filesize,
'width': width,
'height': height,
}

View File

@@ -4,7 +4,11 @@ import re
import json
from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str
from ..utils import (
compat_str,
ExtractorError,
find_xpath_attr,
)
class NBCIE(InfoExtractor):

View File

@@ -92,16 +92,7 @@ class RTLnowIE(InfoExtractor):
},
{
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
'info_dict': {
'id': '153819',
'ext': 'flv',
'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner',
'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631',
'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg',
'upload_date': '20140221',
'duration': 2429,
},
'skip': 'Only works from Germany',
'only_matching': True,
},
]

View File

@@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor):
'upload_date': '20120816',
'uploader': 'Howcast',
'uploader_id': 'Howcast',
'description': 'md5:727900f130df3dc9a25e2721497c7910',
'description': 're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
},
'params': {
'skip_download': True

View File

@@ -5,6 +5,7 @@ from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
determine_ext,
)
import re
@@ -68,6 +69,7 @@ class SockshareIE(InfoExtractor):
formats = [{
'format_id': 'sd',
'url': video_url,
'ext': determine_ext(title),
}]
return {

View File

@@ -82,10 +82,10 @@ class SoundcloudIE(InfoExtractor):
# downloadable song
{
'url': 'https://soundcloud.com/oddsamples/bus-brakes',
'md5': 'fee7b8747b09bb755cefd4b853e7249a',
'md5': '7624f2351f8a3b2e7cd51522496e7631',
'info_dict': {
'id': '128590877',
'ext': 'wav',
'ext': 'mp3',
'title': 'Bus Brakes',
'description': 'md5:0170be75dd395c96025d210d261c784e',
'uploader': 'oddsamples',

View File

@@ -81,7 +81,7 @@ class WDRIE(InfoExtractor):
]
return self.playlist_result(entries, page_id)
flashvars = compat_urlparse.parse_qs(
flashvars = compat_parse_qs(
self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
page_id = flashvars['trackerClipId'][0]

View File

@@ -346,8 +346,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _extract_signature_function(self, video_id, player_url, slen):
id_m = re.match(
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
player_url)
if not id_m:
raise ExtractorError('Cannot identify player %r' % player_url)
player_type = id_m.group('ext')
player_id = id_m.group('id')
@@ -609,7 +611,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
data = compat_urllib_parse.urlencode({
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'sts':'16268',
'sts': self._search_regex(
r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id,

View File

@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import json
import re
from .utils import (
@@ -40,8 +41,9 @@ class JSInterpreter(object):
assign = lambda v: v
expr = stmt[len('return '):]
else:
raise ExtractorError(
'Cannot determine left side of statement in %r' % stmt)
# Try interpreting it as an expression
expr = stmt
assign = lambda v: v
v = self.interpret_expression(expr, local_vars, allow_recursion)
return assign(v)
@@ -53,35 +55,63 @@ class JSInterpreter(object):
if expr.isalpha():
return local_vars[expr]
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
if m:
member = m.group('member')
variable = m.group('in')
try:
return json.loads(expr)
except ValueError:
pass
if variable not in local_vars:
m = re.match(
r'^(?P<var>[a-z]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
expr)
if m:
variable = m.group('var')
member = m.group('member')
arg_str = m.group('args')
if variable in local_vars:
obj = local_vars[variable]
else:
if variable not in self._objects:
self._objects[variable] = self.extract_object(variable)
obj = self._objects[variable]
key, args = member.split('(', 1)
args = args.strip(')')
argvals = [int(v) if v.isdigit() else local_vars[v]
for v in args.split(',')]
return obj[key](argvals)
val = local_vars[variable]
if member == 'split("")':
return list(val)
if member == 'join("")':
return ''.join(val)
if member == 'length':
return len(val)
if member == 'reverse()':
return val[::-1]
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
if slice_m:
idx = self.interpret_expression(
slice_m.group('idx'), local_vars, allow_recursion - 1)
return val[idx:]
if arg_str is None:
# Member access
if member == 'length':
return len(obj)
return obj[member]
assert expr.endswith(')')
# Function call
if arg_str == '':
argvals = tuple()
else:
argvals = tuple([
self.interpret_expression(v, local_vars, allow_recursion)
for v in arg_str.split(',')])
if member == 'split':
assert argvals == ('',)
return list(obj)
if member == 'join':
assert len(argvals) == 1
return argvals[0].join(obj)
if member == 'reverse':
assert len(argvals) == 0
obj.reverse()
return obj
if member == 'slice':
assert len(argvals) == 1
return obj[argvals[0]:]
if member == 'splice':
assert isinstance(obj, list)
index, howMany = argvals
res = []
for i in range(index, min(index + howMany, len(obj))):
res.append(obj.pop(i))
return res
return obj[member](argvals)
m = re.match(
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
@@ -100,13 +130,14 @@ class JSInterpreter(object):
return a % b
m = re.match(
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
r'^(?P<func>[.a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
if m:
fname = m.group('func')
argvals = tuple([
int(v) if v.isdigit() else local_vars[v]
for v in m.group('args').split(',')])
if fname not in self._functions:
self._functions[fname] = self.extract_function(fname)
argvals = [int(v) if v.isdigit() else local_vars[v]
for v in m.group('args').split(',')]
return self._functions[fname](argvals)
raise ExtractorError('Unsupported JS expression %r' % expr)
@@ -114,13 +145,13 @@ class JSInterpreter(object):
obj = {}
obj_m = re.search(
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
r'\}\s*;',
self.code)
fields = obj_m.group('fields')
# Currently, it only supports function definitions
fields_m = re.finditer(
r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
fields)
for f in fields_m:

View File

@@ -18,14 +18,15 @@ from ..utils import (
)
class FFmpegPostProcessorError(PostProcessingError):
pass
class FFmpegPostProcessor(PostProcessor):
def __init__(self,downloader=None):
def __init__(self, downloader=None, deletetempfiles=False):
PostProcessor.__init__(self, downloader)
self._exes = self.detect_executables()
self._deletetempfiles = deletetempfiles
@staticmethod
def detect_executables():
@@ -60,6 +61,9 @@ class FFmpegPostProcessor(PostProcessor):
stderr = stderr.decode('utf-8', 'replace')
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg)
if self._deletetempfiles:
for ipath in input_paths:
os.remove(ipath)
def run_ffmpeg(self, path, out_path, opts):
self.run_ffmpeg_multiple_files([path], out_path, opts)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.07.23.1'
__version__ = '2014.07.24'