release 2014.07.24

[jsinterp] 'reverse' modifies the array in place (fixes #3334 )
[jsinterp] Implement splice and general improvement
2014-07-24 11:24:43 +02:00 · 2014-07-24 11:08:31 +02:00 · 2014-07-24 10:41:14 +02:00 · 2014-07-23 19:29:15 +07:00 · 2014-07-23 12:16:26 +02:00 · 2014-07-23 02:55:06 +02:00
20 changed files with 174 additions and 75 deletions
--- a/test/helper.py
+++ b/test/helper.py
@@ -137,8 +137,8 @@ def expect_info_dict(self, expected_dict, got_dict):


 def assertRegexpMatches(self, text, regexp, msg=None):
-    if hasattr(self, 'assertRegexpMatches'):
-        return self.assertRegexpMatches(text, regexp, msg)
+    if hasattr(self, 'assertRegexp'):
+        return self.assertRegexp(text, regexp, msg)
    else:
        m = re.match(regexp, text)
        if not m:
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -15,7 +15,6 @@ from youtube_dl.extractor import (
    FacebookIE,
    gen_extractors,
    JustinTVIE,
-    PBSIE,
    YoutubeIE,
 )

--- a/test/test_download.py
+++ b/test/test_download.py
@@ -10,7 +10,6 @@ from test.helper import (
    get_params,
    gettestcases,
    expect_info_dict,
-    md5,
    try_rm,
    report_warning,
 )
@@ -24,7 +23,6 @@ import socket
 import youtube_dl.YoutubeDL
 from youtube_dl.utils import (
    compat_http_client,
-    compat_str,
    compat_urllib_error,
    compat_HTTPError,
    DownloadError,
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -154,7 +154,7 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['id'], '4110309')
        self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
        assertRegexpMatches(
-            self, result['description'], r'TILT Brass - Bowery Poetry Club')
+            self, result['description'], r'.*?TILT Brass - Bowery Poetry Club')
        self.assertEqual(len(result['entries']), 6)

    def test_livestream_event(self):
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -57,6 +57,18 @@ _TESTS = [
        u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
        u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
    ),
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
+        u'js',
+        84,
+        u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
+    ),
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
+        u'js',
+        83,
+        u'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
+    ),
 ]


--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -1,12 +0,0 @@
-# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
-from .downloader import FileDownloader as RealFileDownloader
-from .downloader import get_suitable_downloader
-
-
-# This class reproduces the old behaviour of FileDownloader
-class FileDownloader(RealFileDownloader):
-    def _do_download(self, filename, info_dict):
-        real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
-        for ph in self._progress_hooks:
-            real_fd.add_progress_hook(ph)
-        return real_fd.download(filename, info_dict)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -999,7 +999,7 @@ class YoutubeDL(object):
                    if info_dict.get('requested_formats') is not None:
                        downloaded = []
                        success = True
-                        merger = FFmpegMergerPP(self)
+                        merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
                        if not merger._get_executable():
                            postprocessors = []
                            self.report_warning('You have requested multiple '
@@ -1234,14 +1234,21 @@ class YoutubeDL(object):
        if not self.params.get('verbose'):
            return

-        write_string(
+        encoding_str = (
            '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
                locale.getpreferredencoding(),
                sys.getfilesystemencoding(),
                sys.stdout.encoding,
-                self.get_encoding()),
-            encoding=None
-        )
+                self.get_encoding()))
+        try:
+            write_string(encoding_str, encoding=None)
+        except:
+            errmsg = 'Failed to write encoding string %r' % encoding_str
+            try:
+                sys.stdout.write(errmsg)
+            except:
+                pass
+            raise IOError(errmsg)

        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
        try:
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -97,7 +97,7 @@ from .utils import (
    write_string,
 )
 from .update import update_self
-from .FileDownloader import (
+from .downloader import (
    FileDownloader,
 )
 from .extractor import gen_extractors
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -151,6 +151,7 @@ from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .kontrtube import KontrTubeIE
+from .krasview import KrasViewIE
 from .ku6 import Ku6IE
 from .la7 import LA7IE
 from .lifenews import LifeNewsIE
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dl/extractor/krasview.py
@@ -0,0 +1,59 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unescapeHTML,
+)
+
+
+class KrasViewIE(InfoExtractor):
+    IE_DESC = 'Красвью'
+    _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://krasview.ru/video/512228',
+        'md5': '3b91003cf85fc5db277870c8ebd98eae',
+        'info_dict': {
+            'id': '512228',
+            'ext': 'mp4',
+            'title': 'Снег, лёд, заносы',
+            'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
+            'duration': 27,
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        flashvars = json.loads(self._search_regex(
+            r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
+
+        video_url = flashvars['url']
+        title = unescapeHTML(flashvars['title'])
+        description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
+        thumbnail = flashvars['image']
+        duration = int(flashvars['duration'])
+        filesize = int(flashvars['size'])
+        width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
+        height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'filesize': filesize,
+            'width': width,
+            'height': height,
+        }
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -4,7 +4,11 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import find_xpath_attr, compat_str
+from ..utils import (
+    compat_str,
+    ExtractorError,
+    find_xpath_attr,
+)


 class NBCIE(InfoExtractor):
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -92,16 +92,7 @@ class RTLnowIE(InfoExtractor):
        },
        {
            'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
-            'info_dict': {
-                'id': '153819',
-                'ext': 'flv',
-                'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner',
-                'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631',
-                'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg',
-                'upload_date': '20140221',
-                'duration': 2429,
-            },
-            'skip': 'Only works from Germany',
+            'only_matching': True,
        },
    ]

--- a/youtube_dl/extractor/savefrom.py
+++ b/youtube_dl/extractor/savefrom.py
@@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor):
            'upload_date': '20120816',
            'uploader': 'Howcast',
            'uploader_id': 'Howcast',
-            'description': 'md5:727900f130df3dc9a25e2721497c7910',
+            'description': 're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
        },
        'params': {
            'skip_download': True
--- a/youtube_dl/extractor/sockshare.py
+++ b/youtube_dl/extractor/sockshare.py
@@ -5,6 +5,7 @@ from ..utils import (
    ExtractorError,
    compat_urllib_parse,
    compat_urllib_request,
+    determine_ext,
 )
 import re

@@ -68,6 +69,7 @@ class SockshareIE(InfoExtractor):
        formats = [{
            'format_id': 'sd',
            'url': video_url,
+            'ext': determine_ext(title),
        }]

        return {
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -82,10 +82,10 @@ class SoundcloudIE(InfoExtractor):
        # downloadable song
        {
            'url': 'https://soundcloud.com/oddsamples/bus-brakes',
-            'md5': 'fee7b8747b09bb755cefd4b853e7249a',
+            'md5': '7624f2351f8a3b2e7cd51522496e7631',
            'info_dict': {
                'id': '128590877',
-                'ext': 'wav',
+                'ext': 'mp3',
                'title': 'Bus Brakes',
                'description': 'md5:0170be75dd395c96025d210d261c784e',
                'uploader': 'oddsamples',
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -81,7 +81,7 @@ class WDRIE(InfoExtractor):
            ]
            return self.playlist_result(entries, page_id)

-        flashvars = compat_urlparse.parse_qs(
+        flashvars = compat_parse_qs(
            self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))

        page_id = flashvars['trackerClipId'][0]
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -346,8 +346,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

    def _extract_signature_function(self, video_id, player_url, slen):
        id_m = re.match(
-            r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
+            r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
            player_url)
+        if not id_m:
+            raise ExtractorError('Cannot identify player %r' % player_url)
        player_type = id_m.group('ext')
        player_id = id_m.group('id')

@@ -609,7 +611,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            data = compat_urllib_parse.urlencode({
                'video_id': video_id,
                'eurl': 'https://youtube.googleapis.com/v/' + video_id,
-                'sts':'16268',
+                'sts': self._search_regex(
+                    r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
            })
            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
            video_info_webpage = self._download_webpage(video_info_url, video_id,
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals

+import json
 import re

 from .utils import (
@@ -40,8 +41,9 @@ class JSInterpreter(object):
            assign = lambda v: v
            expr = stmt[len('return '):]
        else:
-            raise ExtractorError(
-                'Cannot determine left side of statement in %r' % stmt)
+            # Try interpreting it as an expression
+            expr = stmt
+            assign = lambda v: v

        v = self.interpret_expression(expr, local_vars, allow_recursion)
        return assign(v)
@@ -53,35 +55,63 @@ class JSInterpreter(object):
        if expr.isalpha():
            return local_vars[expr]

-        m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
-        if m:
-            member = m.group('member')
-            variable = m.group('in')
+        try:
+            return json.loads(expr)
+        except ValueError:
+            pass

-            if variable not in local_vars:
+        m = re.match(
+            r'^(?P<var>[a-z]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+            expr)
+        if m:
+            variable = m.group('var')
+            member = m.group('member')
+            arg_str = m.group('args')
+
+            if variable in local_vars:
+                obj = local_vars[variable]
+            else:
                if variable not in self._objects:
                    self._objects[variable] = self.extract_object(variable)
                obj = self._objects[variable]
-                key, args = member.split('(', 1)
-                args = args.strip(')')
-                argvals = [int(v) if v.isdigit() else local_vars[v]
-                           for v in args.split(',')]
-                return obj[key](argvals)

-            val = local_vars[variable]
-            if member == 'split("")':
-                return list(val)
-            if member == 'join("")':
-                return ''.join(val)
-            if member == 'length':
-                return len(val)
-            if member == 'reverse()':
-                return val[::-1]
-            slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
-            if slice_m:
-                idx = self.interpret_expression(
-                    slice_m.group('idx'), local_vars, allow_recursion - 1)
-                return val[idx:]
+            if arg_str is None:
+                # Member access
+                if member == 'length':
+                    return len(obj)
+                return obj[member]
+
+            assert expr.endswith(')')
+            # Function call
+            if arg_str == '':
+                argvals = tuple()
+            else:
+                argvals = tuple([
+                    self.interpret_expression(v, local_vars, allow_recursion)
+                    for v in arg_str.split(',')])
+
+            if member == 'split':
+                assert argvals == ('',)
+                return list(obj)
+            if member == 'join':
+                assert len(argvals) == 1
+                return argvals[0].join(obj)
+            if member == 'reverse':
+                assert len(argvals) == 0
+                obj.reverse()
+                return obj
+            if member == 'slice':
+                assert len(argvals) == 1
+                return obj[argvals[0]:]
+            if member == 'splice':
+                assert isinstance(obj, list)
+                index, howMany = argvals
+                res = []
+                for i in range(index, min(index + howMany, len(obj))):
+                    res.append(obj.pop(i))
+                return res
+
+            return obj[member](argvals)

        m = re.match(
            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
@@ -100,13 +130,14 @@ class JSInterpreter(object):
            return a % b

        m = re.match(
-            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+            r'^(?P<func>[.a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
        if m:
            fname = m.group('func')
+            argvals = tuple([
+                int(v) if v.isdigit() else local_vars[v]
+                for v in m.group('args').split(',')])
            if fname not in self._functions:
                self._functions[fname] = self.extract_function(fname)
-            argvals = [int(v) if v.isdigit() else local_vars[v]
-                       for v in m.group('args').split(',')]
            return self._functions[fname](argvals)
        raise ExtractorError('Unsupported JS expression %r' % expr)

@@ -114,13 +145,13 @@ class JSInterpreter(object):
        obj = {}
        obj_m = re.search(
            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
-            r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
+            r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
            r'\}\s*;',
            self.code)
        fields = obj_m.group('fields')
        # Currently, it only supports function definitions
        fields_m = re.finditer(
-            r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
+            r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
            fields)
        for f in fields_m:
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -18,14 +18,15 @@ from ..utils import (
 )


-
 class FFmpegPostProcessorError(PostProcessingError):
    pass

+
 class FFmpegPostProcessor(PostProcessor):
-    def __init__(self,downloader=None):
+    def __init__(self, downloader=None, deletetempfiles=False):
        PostProcessor.__init__(self, downloader)
        self._exes = self.detect_executables()
+        self._deletetempfiles = deletetempfiles

    @staticmethod
    def detect_executables():
@@ -60,6 +61,9 @@ class FFmpegPostProcessor(PostProcessor):
            stderr = stderr.decode('utf-8', 'replace')
            msg = stderr.strip().split('\n')[-1]
            raise FFmpegPostProcessorError(msg)
+        if self._deletetempfiles:
+            for ipath in input_paths:
+                os.remove(ipath)

    def run_ffmpeg(self, path, out_path, opts):
        self.run_ffmpeg_multiple_files([path], out_path, opts)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.07.23.1'
+__version__ = '2014.07.24'
Author	SHA1	Message	Date
Philipp Hagemeister	7272eab9d0	release 2014.07.24	2014-07-24 11:24:43 +02:00
Jaime Marquínez Ferrándiz	ebe832dc37	[jsinterp] 'reverse' modifies the array in place (fixes #3334 )	2014-07-24 11:08:31 +02:00
Philipp Hagemeister	825abb8175	[jsinterp] Implement splice and general improvement I still get 403s on YouTube though.	2014-07-24 10:41:14 +02:00
Sergey M․	8944ec0109	[krasview] Add extractor (Closes #3313 )	2014-07-23 19:29:15 +07:00
Jaime Marquínez Ferrándiz	c084c93402	[youtube] Extract the 'sts' parameter from the webpage (fixes #3327 )	2014-07-23 12:16:26 +02:00
Philipp Hagemeister	d799b47b82	[ffmpeg] PEP8 and a more obvious variable name	2014-07-23 02:55:06 +02:00
rupertbaxter2	b7f8116406	Deletes temp files after postprocess merge unless -k option is specified	2014-07-23 02:53:44 +02:00
Philipp Hagemeister	6db274e057	Remove legacy FileDownloader (Closes #2964 )	2014-07-23 02:47:52 +02:00
Philipp Hagemeister	0c92b57398	Remove unused imports	2014-07-23 02:46:21 +02:00
Philipp Hagemeister	becafcbf0f	[wdr] fix up imports	2014-07-23 02:44:30 +02:00
Philipp Hagemeister	92a86f4c1a	Do not import from legacy FileDownloader class	2014-07-23 02:43:59 +02:00
Philipp Hagemeister	dfe029a62c	release 2014.07.23.2	2014-07-23 02:25:27 +02:00
Philipp Hagemeister	b0472057a3	[YoutubeDL] Make sure we really, really get out the encoding string Fixes #3326 Apparently, on some platforms, even outputting this fails already.	2014-07-23 02:24:52 +02:00
Philipp Hagemeister	c081b35c27	[youtube] Support new player URLs (Fixes #3326 )	2014-07-23 02:19:33 +02:00
Philipp Hagemeister	9f43890bcd	[jsinterp] Allow digits in function names	2014-07-23 02:13:48 +02:00
Philipp Hagemeister	94a20aa5f8	[rtlnow] Simplify outdated test	2014-07-23 01:49:25 +02:00
Philipp Hagemeister	94e8df3a7e	[wdr] Fix umlaut parsing on Python 2.x	2014-07-23 01:47:36 +02:00
Philipp Hagemeister	37e64addc8	[nbc] Add missing import	2014-07-23 01:47:18 +02:00
Philipp Hagemeister	d82ba23ba5	[soundcloud:playlist] Fix test description	2014-07-23 01:44:08 +02:00
Philipp Hagemeister	0fd7fd71b4	[test/helper] Do not use deprecated method	2014-07-23 01:43:46 +02:00
Philipp Hagemeister	eae12e3fe3	[soundcloud] Adapt test	2014-07-23 01:41:45 +02:00
Philipp Hagemeister	798a2cad4f	[sockshare] Fix ext	2014-07-23 01:40:01 +02:00
Philipp Hagemeister	41c0849429	[savefrom] Make test description more flexible	2014-07-23 01:38:07 +02:00