Merge branch 'automatic-signatures'
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -24,3 +24,4 @@ updates_key.pem | |||||||
| *.flv | *.flv | ||||||
| *.mp4 | *.mp4 | ||||||
| *.part | *.part | ||||||
|  | test/testdata | ||||||
|   | |||||||
							
								
								
									
										80
									
								
								test/test_youtube_signature.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								test/test_youtube_signature.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | |||||||
|  | #!/usr/bin/env python | ||||||
|  |  | ||||||
|  | import io | ||||||
|  | import re | ||||||
|  | import string | ||||||
|  | import sys | ||||||
|  | import unittest | ||||||
|  |  | ||||||
|  | # Allow direct execution | ||||||
|  | import os | ||||||
|  | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
|  |  | ||||||
|  | from youtube_dl.extractor import YoutubeIE | ||||||
|  | from youtube_dl.utils import compat_str, compat_urlretrieve | ||||||
|  |  | ||||||
|  | _TESTS = [ | ||||||
|  |     ( | ||||||
|  |         u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', | ||||||
|  |         u'js', | ||||||
|  |         86, | ||||||
|  |         u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', | ||||||
|  |     ), | ||||||
|  |     ( | ||||||
|  |         u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', | ||||||
|  |         u'js', | ||||||
|  |         85, | ||||||
|  |         u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', | ||||||
|  |     ), | ||||||
|  |     ( | ||||||
|  |         u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf', | ||||||
|  |         u'swf', | ||||||
|  |         82, | ||||||
|  |         u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321' | ||||||
|  |     ), | ||||||
|  | ] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TestSignature(unittest.TestCase): | ||||||
|  |     def setUp(self): | ||||||
|  |         TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||||
|  |         self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') | ||||||
|  |         if not os.path.exists(self.TESTDATA_DIR): | ||||||
|  |             os.mkdir(self.TESTDATA_DIR) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def make_tfunc(url, stype, sig_length, expected_sig): | ||||||
|  |     basename = url.rpartition('/')[2] | ||||||
|  |     m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) | ||||||
|  |     assert m, '%r should follow URL format' % basename | ||||||
|  |     test_id = m.group(1) | ||||||
|  |  | ||||||
|  |     def test_func(self): | ||||||
|  |         fn = os.path.join(self.TESTDATA_DIR, basename) | ||||||
|  |  | ||||||
|  |         if not os.path.exists(fn): | ||||||
|  |             compat_urlretrieve(url, fn) | ||||||
|  |  | ||||||
|  |         ie = YoutubeIE() | ||||||
|  |         if stype == 'js': | ||||||
|  |             with io.open(fn, encoding='utf-8') as testf: | ||||||
|  |                 jscode = testf.read() | ||||||
|  |             func = ie._parse_sig_js(jscode) | ||||||
|  |         else: | ||||||
|  |             assert stype == 'swf' | ||||||
|  |             with open(fn, 'rb') as testf: | ||||||
|  |                 swfcode = testf.read() | ||||||
|  |             func = ie._parse_sig_swf(swfcode) | ||||||
|  |         src_sig = compat_str(string.printable[:sig_length]) | ||||||
|  |         got_sig = func(src_sig) | ||||||
|  |         self.assertEqual(got_sig, expected_sig) | ||||||
|  |  | ||||||
|  |     test_func.__name__ = str('test_signature_' + stype + '_' + test_id) | ||||||
|  |     setattr(TestSignature, test_func.__name__, test_func) | ||||||
|  |  | ||||||
|  | for test_spec in _TESTS: | ||||||
|  |     make_tfunc(*test_spec) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     unittest.main() | ||||||
| @@ -81,6 +81,8 @@ class YoutubeDL(object): | |||||||
|     keepvideo:         Keep the video file after post-processing |     keepvideo:         Keep the video file after post-processing | ||||||
|     daterange:         A DateRange object, download only if the upload_date is in the range. |     daterange:         A DateRange object, download only if the upload_date is in the range. | ||||||
|     skip_download:     Skip the actual download of the video file |     skip_download:     Skip the actual download of the video file | ||||||
|  |     cachedir:          Location of the cache files in the filesystem. | ||||||
|  |                        None to disable filesystem cache. | ||||||
|      |      | ||||||
|     The following parameters are not used by YoutubeDL itself, they are used by |     The following parameters are not used by YoutubeDL itself, they are used by | ||||||
|     the FileDownloader: |     the FileDownloader: | ||||||
|   | |||||||
| @@ -167,6 +167,12 @@ def parseOpts(overrideArguments=None): | |||||||
|             help='Output descriptions of all supported extractors', default=False) |             help='Output descriptions of all supported extractors', default=False) | ||||||
|     general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') |     general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') | ||||||
|     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') |     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') | ||||||
|  |     general.add_option( | ||||||
|  |         '--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', | ||||||
|  |         help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default') | ||||||
|  |     general.add_option( | ||||||
|  |         '--no-cache-dir', action='store_const', const=None, dest='cachedir', | ||||||
|  |         help='Disable filesystem caching') | ||||||
|  |  | ||||||
|  |  | ||||||
|     selection.add_option('--playlist-start', |     selection.add_option('--playlist-start', | ||||||
| @@ -272,6 +278,10 @@ def parseOpts(overrideArguments=None): | |||||||
|     verbosity.add_option('--dump-intermediate-pages', |     verbosity.add_option('--dump-intermediate-pages', | ||||||
|             action='store_true', dest='dump_intermediate_pages', default=False, |             action='store_true', dest='dump_intermediate_pages', default=False, | ||||||
|             help='print downloaded pages to debug problems(very verbose)') |             help='print downloaded pages to debug problems(very verbose)') | ||||||
|  |     verbosity.add_option('--youtube-print-sig-code', | ||||||
|  |             action='store_true', dest='youtube_print_sig_code', default=False, | ||||||
|  |             help=optparse.SUPPRESS_HELP) | ||||||
|  |  | ||||||
|  |  | ||||||
|     filesystem.add_option('-t', '--title', |     filesystem.add_option('-t', '--title', | ||||||
|             action='store_true', dest='usetitle', help='use title in file name (default)', default=False) |             action='store_true', dest='usetitle', help='use title in file name (default)', default=False) | ||||||
| @@ -555,7 +565,7 @@ def _real_main(argv=None): | |||||||
|         parser.error(u'Cannot download a video and extract audio into the same' |         parser.error(u'Cannot download a video and extract audio into the same' | ||||||
|                      u' file! Use "%%(ext)s" instead of %r' % |                      u' file! Use "%%(ext)s" instead of %r' % | ||||||
|                      determine_ext(outtmpl, u'')) |                      determine_ext(outtmpl, u'')) | ||||||
|  |     raise ValueError(repr(opts.cachedir)) | ||||||
|     # YoutubeDL |     # YoutubeDL | ||||||
|     ydl = YoutubeDL({ |     ydl = YoutubeDL({ | ||||||
|         'usenetrc': opts.usenetrc, |         'usenetrc': opts.usenetrc, | ||||||
| @@ -613,6 +623,8 @@ def _real_main(argv=None): | |||||||
|         'min_filesize': opts.min_filesize, |         'min_filesize': opts.min_filesize, | ||||||
|         'max_filesize': opts.max_filesize, |         'max_filesize': opts.max_filesize, | ||||||
|         'daterange': date, |         'daterange': date, | ||||||
|  |         'cachedir': opts.cachedir, | ||||||
|  |         'youtube_print_sig_code': opts.youtube_print_sig_code, | ||||||
|         }) |         }) | ||||||
|  |  | ||||||
|     if opts.verbose: |     if opts.verbose: | ||||||
|   | |||||||
| @@ -1,15 +1,23 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
|  |  | ||||||
|  | import collections | ||||||
|  | import errno | ||||||
|  | import io | ||||||
|  | import itertools | ||||||
| import json | import json | ||||||
| import netrc | import os.path | ||||||
| import re | import re | ||||||
| import socket | import socket | ||||||
| import itertools | import string | ||||||
|  | import struct | ||||||
|  | import traceback | ||||||
| import xml.etree.ElementTree | import xml.etree.ElementTree | ||||||
|  | import zlib | ||||||
|  |  | ||||||
| from .common import InfoExtractor, SearchInfoExtractor | from .common import InfoExtractor, SearchInfoExtractor | ||||||
| from .subtitles import SubtitlesInfoExtractor | from .subtitles import SubtitlesInfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     compat_chr, | ||||||
|     compat_http_client, |     compat_http_client, | ||||||
|     compat_parse_qs, |     compat_parse_qs, | ||||||
|     compat_urllib_error, |     compat_urllib_error, | ||||||
| @@ -23,6 +31,7 @@ from ..utils import ( | |||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     orderedSet, |     orderedSet, | ||||||
|  |     write_json_file, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| class YoutubeBaseInfoExtractor(InfoExtractor): | class YoutubeBaseInfoExtractor(InfoExtractor): | ||||||
| @@ -393,6 +402,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         if YoutubePlaylistIE.suitable(url): return False |         if YoutubePlaylistIE.suitable(url): return False | ||||||
|         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None |         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None | ||||||
|  |  | ||||||
|  |     def __init__(self, *args, **kwargs): | ||||||
|  |         super(YoutubeIE, self).__init__(*args, **kwargs) | ||||||
|  |         self._player_cache = {} | ||||||
|  |  | ||||||
|     def report_video_webpage_download(self, video_id): |     def report_video_webpage_download(self, video_id): | ||||||
|         """Report attempt to download video webpage.""" |         """Report attempt to download video webpage.""" | ||||||
|         self.to_screen(u'%s: Downloading video webpage' % video_id) |         self.to_screen(u'%s: Downloading video webpage' % video_id) | ||||||
| @@ -413,9 +426,657 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         """Indicate the download will use the RTMP protocol.""" |         """Indicate the download will use the RTMP protocol.""" | ||||||
|         self.to_screen(u'RTMP download detected') |         self.to_screen(u'RTMP download detected') | ||||||
|  |  | ||||||
|     def _decrypt_signature(self, s): |     def _extract_signature_function(self, video_id, player_url, slen): | ||||||
|  |         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$', | ||||||
|  |                         player_url) | ||||||
|  |         player_type = id_m.group('ext') | ||||||
|  |         player_id = id_m.group('id') | ||||||
|  |  | ||||||
|  |         # Read from filesystem cache | ||||||
|  |         func_id = '%s_%s_%d' % (player_type, player_id, slen) | ||||||
|  |         assert os.path.basename(func_id) == func_id | ||||||
|  |         cache_dir = self._downloader.params.get('cachedir', | ||||||
|  |                                                 u'~/.youtube-dl/cache') | ||||||
|  |  | ||||||
|  |         cache_enabled = cache_dir is not None | ||||||
|  |         if cache_enabled: | ||||||
|  |             cache_fn = os.path.join(os.path.expanduser(cache_dir), | ||||||
|  |                                     u'youtube-sigfuncs', | ||||||
|  |                                     func_id + '.json') | ||||||
|  |             try: | ||||||
|  |                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef: | ||||||
|  |                     cache_spec = json.load(cachef) | ||||||
|  |                 return lambda s: u''.join(s[i] for i in cache_spec) | ||||||
|  |             except IOError: | ||||||
|  |                 pass  # No cache available | ||||||
|  |  | ||||||
|  |         if player_type == 'js': | ||||||
|  |             code = self._download_webpage( | ||||||
|  |                 player_url, video_id, | ||||||
|  |                 note=u'Downloading %s player %s' % (player_type, player_id), | ||||||
|  |                 errnote=u'Download of %s failed' % player_url) | ||||||
|  |             res = self._parse_sig_js(code) | ||||||
|  |         elif player_type == 'swf': | ||||||
|  |             urlh = self._request_webpage( | ||||||
|  |                 player_url, video_id, | ||||||
|  |                 note=u'Downloading %s player %s' % (player_type, player_id), | ||||||
|  |                 errnote=u'Download of %s failed' % player_url) | ||||||
|  |             code = urlh.read() | ||||||
|  |             res = self._parse_sig_swf(code) | ||||||
|  |         else: | ||||||
|  |             assert False, 'Invalid player type %r' % player_type | ||||||
|  |  | ||||||
|  |         if cache_enabled: | ||||||
|  |             try: | ||||||
|  |                 test_string = u''.join(map(compat_chr, range(slen))) | ||||||
|  |                 cache_res = res(test_string) | ||||||
|  |                 cache_spec = [ord(c) for c in cache_res] | ||||||
|  |                 try: | ||||||
|  |                     os.makedirs(os.path.dirname(cache_fn)) | ||||||
|  |                 except OSError as ose: | ||||||
|  |                     if ose.errno != errno.EEXIST: | ||||||
|  |                         raise | ||||||
|  |                 write_json_file(cache_spec, cache_fn) | ||||||
|  |             except Exception: | ||||||
|  |                 tb = traceback.format_exc() | ||||||
|  |                 self._downloader.report_warning( | ||||||
|  |                     u'Writing cache to %r failed: %s' % (cache_fn, tb)) | ||||||
|  |  | ||||||
|  |         return res | ||||||
|  |  | ||||||
|  |     def _print_sig_code(self, func, slen): | ||||||
|  |         def gen_sig_code(idxs): | ||||||
|  |             def _genslice(start, end, step): | ||||||
|  |                 starts = u'' if start == 0 else str(start) | ||||||
|  |                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':' | ||||||
|  |                 steps = u'' if step == 1 else (u':%d' % step) | ||||||
|  |                 return u's[%s%s%s]' % (starts, ends, steps) | ||||||
|  |  | ||||||
|  |             step = None | ||||||
|  |             start = '(Never used)'  # Quelch pyflakes warnings - start will be | ||||||
|  |                                     # set as soon as step is set | ||||||
|  |             for i, prev in zip(idxs[1:], idxs[:-1]): | ||||||
|  |                 if step is not None: | ||||||
|  |                     if i - prev == step: | ||||||
|  |                         continue | ||||||
|  |                     yield _genslice(start, prev, step) | ||||||
|  |                     step = None | ||||||
|  |                     continue | ||||||
|  |                 if i - prev in [-1, 1]: | ||||||
|  |                     step = i - prev | ||||||
|  |                     start = prev | ||||||
|  |                     continue | ||||||
|  |                 else: | ||||||
|  |                     yield u's[%d]' % prev | ||||||
|  |             if step is None: | ||||||
|  |                 yield u's[%d]' % i | ||||||
|  |             else: | ||||||
|  |                 yield _genslice(start, i, step) | ||||||
|  |  | ||||||
|  |         test_string = u''.join(map(compat_chr, range(slen))) | ||||||
|  |         cache_res = func(test_string) | ||||||
|  |         cache_spec = [ord(c) for c in cache_res] | ||||||
|  |         expr_code = u' + '.join(gen_sig_code(cache_spec)) | ||||||
|  |         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code) | ||||||
|  |         self.to_screen(u'Extracted signature function:\n' + code) | ||||||
|  |  | ||||||
|  |     def _parse_sig_js(self, jscode): | ||||||
|  |         funcname = self._search_regex( | ||||||
|  |             r'signature=([a-zA-Z]+)', jscode, | ||||||
|  |             u'Initial JS player signature function name') | ||||||
|  |  | ||||||
|  |         functions = {} | ||||||
|  |  | ||||||
|  |         def argidx(varname): | ||||||
|  |             return string.lowercase.index(varname) | ||||||
|  |  | ||||||
|  |         def interpret_statement(stmt, local_vars, allow_recursion=20): | ||||||
|  |             if allow_recursion < 0: | ||||||
|  |                 raise ExtractorError(u'Recursion limit reached') | ||||||
|  |  | ||||||
|  |             if stmt.startswith(u'var '): | ||||||
|  |                 stmt = stmt[len(u'var '):] | ||||||
|  |             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + | ||||||
|  |                              r'=(?P<expr>.*)$', stmt) | ||||||
|  |             if ass_m: | ||||||
|  |                 if ass_m.groupdict().get('index'): | ||||||
|  |                     def assign(val): | ||||||
|  |                         lvar = local_vars[ass_m.group('out')] | ||||||
|  |                         idx = interpret_expression(ass_m.group('index'), | ||||||
|  |                                                    local_vars, allow_recursion) | ||||||
|  |                         assert isinstance(idx, int) | ||||||
|  |                         lvar[idx] = val | ||||||
|  |                         return val | ||||||
|  |                     expr = ass_m.group('expr') | ||||||
|  |                 else: | ||||||
|  |                     def assign(val): | ||||||
|  |                         local_vars[ass_m.group('out')] = val | ||||||
|  |                         return val | ||||||
|  |                     expr = ass_m.group('expr') | ||||||
|  |             elif stmt.startswith(u'return '): | ||||||
|  |                 assign = lambda v: v | ||||||
|  |                 expr = stmt[len(u'return '):] | ||||||
|  |             else: | ||||||
|  |                 raise ExtractorError( | ||||||
|  |                     u'Cannot determine left side of statement in %r' % stmt) | ||||||
|  |  | ||||||
|  |             v = interpret_expression(expr, local_vars, allow_recursion) | ||||||
|  |             return assign(v) | ||||||
|  |  | ||||||
|  |         def interpret_expression(expr, local_vars, allow_recursion): | ||||||
|  |             if expr.isdigit(): | ||||||
|  |                 return int(expr) | ||||||
|  |  | ||||||
|  |             if expr.isalpha(): | ||||||
|  |                 return local_vars[expr] | ||||||
|  |  | ||||||
|  |             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr) | ||||||
|  |             if m: | ||||||
|  |                 member = m.group('member') | ||||||
|  |                 val = local_vars[m.group('in')] | ||||||
|  |                 if member == 'split("")': | ||||||
|  |                     return list(val) | ||||||
|  |                 if member == 'join("")': | ||||||
|  |                     return u''.join(val) | ||||||
|  |                 if member == 'length': | ||||||
|  |                     return len(val) | ||||||
|  |                 if member == 'reverse()': | ||||||
|  |                     return val[::-1] | ||||||
|  |                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member) | ||||||
|  |                 if slice_m: | ||||||
|  |                     idx = interpret_expression( | ||||||
|  |                         slice_m.group('idx'), local_vars, allow_recursion-1) | ||||||
|  |                     return val[idx:] | ||||||
|  |  | ||||||
|  |             m = re.match( | ||||||
|  |                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr) | ||||||
|  |             if m: | ||||||
|  |                 val = local_vars[m.group('in')] | ||||||
|  |                 idx = interpret_expression(m.group('idx'), local_vars, | ||||||
|  |                                            allow_recursion-1) | ||||||
|  |                 return val[idx] | ||||||
|  |  | ||||||
|  |             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr) | ||||||
|  |             if m: | ||||||
|  |                 a = interpret_expression(m.group('a'), | ||||||
|  |                                          local_vars, allow_recursion) | ||||||
|  |                 b = interpret_expression(m.group('b'), | ||||||
|  |                                          local_vars, allow_recursion) | ||||||
|  |                 return a % b | ||||||
|  |  | ||||||
|  |             m = re.match( | ||||||
|  |                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr) | ||||||
|  |             if m: | ||||||
|  |                 fname = m.group('func') | ||||||
|  |                 if fname not in functions: | ||||||
|  |                     functions[fname] = extract_function(fname) | ||||||
|  |                 argvals = [int(v) if v.isdigit() else local_vars[v] | ||||||
|  |                            for v in m.group('args').split(',')] | ||||||
|  |                 return functions[fname](argvals) | ||||||
|  |             raise ExtractorError(u'Unsupported JS expression %r' % expr) | ||||||
|  |  | ||||||
|  |         def extract_function(funcname): | ||||||
|  |             func_m = re.search( | ||||||
|  |                 r'function ' + re.escape(funcname) + | ||||||
|  |                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', | ||||||
|  |                 jscode) | ||||||
|  |             argnames = func_m.group('args').split(',') | ||||||
|  |  | ||||||
|  |             def resf(args): | ||||||
|  |                 local_vars = dict(zip(argnames, args)) | ||||||
|  |                 for stmt in func_m.group('code').split(';'): | ||||||
|  |                     res = interpret_statement(stmt, local_vars) | ||||||
|  |                 return res | ||||||
|  |             return resf | ||||||
|  |  | ||||||
|  |         initial_function = extract_function(funcname) | ||||||
|  |         return lambda s: initial_function([s]) | ||||||
|  |  | ||||||
|  |     def _parse_sig_swf(self, file_contents): | ||||||
|  |         if file_contents[1:3] != b'WS': | ||||||
|  |             raise ExtractorError( | ||||||
|  |                 u'Not an SWF file; header is %r' % file_contents[:3]) | ||||||
|  |         if file_contents[:1] == b'C': | ||||||
|  |             content = zlib.decompress(file_contents[8:]) | ||||||
|  |         else: | ||||||
|  |             raise NotImplementedError(u'Unsupported compression format %r' % | ||||||
|  |                                       file_contents[:1]) | ||||||
|  |  | ||||||
|  |         def extract_tags(content): | ||||||
|  |             pos = 0 | ||||||
|  |             while pos < len(content): | ||||||
|  |                 header16 = struct.unpack('<H', content[pos:pos+2])[0] | ||||||
|  |                 pos += 2 | ||||||
|  |                 tag_code = header16 >> 6 | ||||||
|  |                 tag_len = header16 & 0x3f | ||||||
|  |                 if tag_len == 0x3f: | ||||||
|  |                     tag_len = struct.unpack('<I', content[pos:pos+4])[0] | ||||||
|  |                     pos += 4 | ||||||
|  |                 assert pos+tag_len <= len(content) | ||||||
|  |                 yield (tag_code, content[pos:pos+tag_len]) | ||||||
|  |                 pos += tag_len | ||||||
|  |  | ||||||
|  |         code_tag = next(tag | ||||||
|  |                         for tag_code, tag in extract_tags(content) | ||||||
|  |                         if tag_code == 82) | ||||||
|  |         p = code_tag.index(b'\0', 4) + 1 | ||||||
|  |         code_reader = io.BytesIO(code_tag[p:]) | ||||||
|  |  | ||||||
|  |         # Parse ABC (AVM2 ByteCode) | ||||||
|  |         def read_int(reader=None): | ||||||
|  |             if reader is None: | ||||||
|  |                 reader = code_reader | ||||||
|  |             res = 0 | ||||||
|  |             shift = 0 | ||||||
|  |             for _ in range(5): | ||||||
|  |                 buf = reader.read(1) | ||||||
|  |                 assert len(buf) == 1 | ||||||
|  |                 b = struct.unpack('<B', buf)[0] | ||||||
|  |                 res = res | ((b & 0x7f) << shift) | ||||||
|  |                 if b & 0x80 == 0: | ||||||
|  |                     break | ||||||
|  |                 shift += 7 | ||||||
|  |             return res | ||||||
|  |  | ||||||
|  |         def u30(reader=None): | ||||||
|  |             res = read_int(reader) | ||||||
|  |             assert res & 0xf0000000 == 0 | ||||||
|  |             return res | ||||||
|  |         u32 = read_int | ||||||
|  |  | ||||||
|  |         def s32(reader=None): | ||||||
|  |             v = read_int(reader) | ||||||
|  |             if v & 0x80000000 != 0: | ||||||
|  |                 v = - ((v ^ 0xffffffff) + 1) | ||||||
|  |             return v | ||||||
|  |  | ||||||
|  |         def read_string(reader=None): | ||||||
|  |             if reader is None: | ||||||
|  |                 reader = code_reader | ||||||
|  |             slen = u30(reader) | ||||||
|  |             resb = reader.read(slen) | ||||||
|  |             assert len(resb) == slen | ||||||
|  |             return resb.decode('utf-8') | ||||||
|  |  | ||||||
|  |         def read_bytes(count, reader=None): | ||||||
|  |             if reader is None: | ||||||
|  |                 reader = code_reader | ||||||
|  |             resb = reader.read(count) | ||||||
|  |             assert len(resb) == count | ||||||
|  |             return resb | ||||||
|  |  | ||||||
|  |         def read_byte(reader=None): | ||||||
|  |             resb = read_bytes(1, reader=reader) | ||||||
|  |             res = struct.unpack('<B', resb)[0] | ||||||
|  |             return res | ||||||
|  |  | ||||||
|  |         # minor_version + major_version | ||||||
|  |         read_bytes(2 + 2) | ||||||
|  |  | ||||||
|  |         # Constant pool | ||||||
|  |         int_count = u30() | ||||||
|  |         for _c in range(1, int_count): | ||||||
|  |             s32() | ||||||
|  |         uint_count = u30() | ||||||
|  |         for _c in range(1, uint_count): | ||||||
|  |             u32() | ||||||
|  |         double_count = u30() | ||||||
|  |         read_bytes((double_count-1) * 8) | ||||||
|  |         string_count = u30() | ||||||
|  |         constant_strings = [u''] | ||||||
|  |         for _c in range(1, string_count): | ||||||
|  |             s = read_string() | ||||||
|  |             constant_strings.append(s) | ||||||
|  |         namespace_count = u30() | ||||||
|  |         for _c in range(1, namespace_count): | ||||||
|  |             read_bytes(1)  # kind | ||||||
|  |             u30()  # name | ||||||
|  |         ns_set_count = u30() | ||||||
|  |         for _c in range(1, ns_set_count): | ||||||
|  |             count = u30() | ||||||
|  |             for _c2 in range(count): | ||||||
|  |                 u30() | ||||||
|  |         multiname_count = u30() | ||||||
|  |         MULTINAME_SIZES = { | ||||||
|  |             0x07: 2,  # QName | ||||||
|  |             0x0d: 2,  # QNameA | ||||||
|  |             0x0f: 1,  # RTQName | ||||||
|  |             0x10: 1,  # RTQNameA | ||||||
|  |             0x11: 0,  # RTQNameL | ||||||
|  |             0x12: 0,  # RTQNameLA | ||||||
|  |             0x09: 2,  # Multiname | ||||||
|  |             0x0e: 2,  # MultinameA | ||||||
|  |             0x1b: 1,  # MultinameL | ||||||
|  |             0x1c: 1,  # MultinameLA | ||||||
|  |         } | ||||||
|  |         multinames = [u''] | ||||||
|  |         for _c in range(1, multiname_count): | ||||||
|  |             kind = u30() | ||||||
|  |             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind | ||||||
|  |             if kind == 0x07: | ||||||
|  |                 u30()  # namespace_idx | ||||||
|  |                 name_idx = u30() | ||||||
|  |                 multinames.append(constant_strings[name_idx]) | ||||||
|  |             else: | ||||||
|  |                 multinames.append('[MULTINAME kind: %d]' % kind) | ||||||
|  |                 for _c2 in range(MULTINAME_SIZES[kind]): | ||||||
|  |                     u30() | ||||||
|  |  | ||||||
|  |         # Methods | ||||||
|  |         method_count = u30() | ||||||
|  |         MethodInfo = collections.namedtuple( | ||||||
|  |             'MethodInfo', | ||||||
|  |             ['NEED_ARGUMENTS', 'NEED_REST']) | ||||||
|  |         method_infos = [] | ||||||
|  |         for method_id in range(method_count): | ||||||
|  |             param_count = u30() | ||||||
|  |             u30()  # return type | ||||||
|  |             for _ in range(param_count): | ||||||
|  |                 u30()  # param type | ||||||
|  |             u30()  # name index (always 0 for youtube) | ||||||
|  |             flags = read_byte() | ||||||
|  |             if flags & 0x08 != 0: | ||||||
|  |                 # Options present | ||||||
|  |                 option_count = u30() | ||||||
|  |                 for c in range(option_count): | ||||||
|  |                     u30()  # val | ||||||
|  |                     read_bytes(1)  # kind | ||||||
|  |             if flags & 0x80 != 0: | ||||||
|  |                 # Param names present | ||||||
|  |                 for _ in range(param_count): | ||||||
|  |                     u30()  # param name | ||||||
|  |             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | ||||||
|  |             method_infos.append(mi) | ||||||
|  |  | ||||||
|  |         # Metadata | ||||||
|  |         metadata_count = u30() | ||||||
|  |         for _c in range(metadata_count): | ||||||
|  |             u30()  # name | ||||||
|  |             item_count = u30() | ||||||
|  |             for _c2 in range(item_count): | ||||||
|  |                 u30()  # key | ||||||
|  |                 u30()  # value | ||||||
|  |  | ||||||
|  |         def parse_traits_info(): | ||||||
|  |             trait_name_idx = u30() | ||||||
|  |             kind_full = read_byte() | ||||||
|  |             kind = kind_full & 0x0f | ||||||
|  |             attrs = kind_full >> 4 | ||||||
|  |             methods = {} | ||||||
|  |             if kind in [0x00, 0x06]:  # Slot or Const | ||||||
|  |                 u30()  # Slot id | ||||||
|  |                 u30()  # type_name_idx | ||||||
|  |                 vindex = u30() | ||||||
|  |                 if vindex != 0: | ||||||
|  |                     read_byte()  # vkind | ||||||
|  |             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter | ||||||
|  |                 u30()  # disp_id | ||||||
|  |                 method_idx = u30() | ||||||
|  |                 methods[multinames[trait_name_idx]] = method_idx | ||||||
|  |             elif kind == 0x04:  # Class | ||||||
|  |                 u30()  # slot_id | ||||||
|  |                 u30()  # classi | ||||||
|  |             elif kind == 0x05:  # Function | ||||||
|  |                 u30()  # slot_id | ||||||
|  |                 function_idx = u30() | ||||||
|  |                 methods[function_idx] = multinames[trait_name_idx] | ||||||
|  |             else: | ||||||
|  |                 raise ExtractorError(u'Unsupported trait kind %d' % kind) | ||||||
|  |  | ||||||
|  |             if attrs & 0x4 != 0:  # Metadata present | ||||||
|  |                 metadata_count = u30() | ||||||
|  |                 for _c3 in range(metadata_count): | ||||||
|  |                     u30()  # metadata index | ||||||
|  |  | ||||||
|  |             return methods | ||||||
|  |  | ||||||
|  |         # Classes | ||||||
|  |         TARGET_CLASSNAME = u'SignatureDecipher' | ||||||
|  |         searched_idx = multinames.index(TARGET_CLASSNAME) | ||||||
|  |         searched_class_id = None | ||||||
|  |         class_count = u30() | ||||||
|  |         for class_id in range(class_count): | ||||||
|  |             name_idx = u30() | ||||||
|  |             if name_idx == searched_idx: | ||||||
|  |                 # We found the class we're looking for! | ||||||
|  |                 searched_class_id = class_id | ||||||
|  |             u30()  # super_name idx | ||||||
|  |             flags = read_byte() | ||||||
|  |             if flags & 0x08 != 0:  # Protected namespace is present | ||||||
|  |                 u30()  # protected_ns_idx | ||||||
|  |             intrf_count = u30() | ||||||
|  |             for _c2 in range(intrf_count): | ||||||
|  |                 u30() | ||||||
|  |             u30()  # iinit | ||||||
|  |             trait_count = u30() | ||||||
|  |             for _c2 in range(trait_count): | ||||||
|  |                 parse_traits_info() | ||||||
|  |  | ||||||
|  |         if searched_class_id is None: | ||||||
|  |             raise ExtractorError(u'Target class %r not found' % | ||||||
|  |                                  TARGET_CLASSNAME) | ||||||
|  |  | ||||||
|  |         method_names = {} | ||||||
|  |         method_idxs = {} | ||||||
|  |         for class_id in range(class_count): | ||||||
|  |             u30()  # cinit | ||||||
|  |             trait_count = u30() | ||||||
|  |             for _c2 in range(trait_count): | ||||||
|  |                 trait_methods = parse_traits_info() | ||||||
|  |                 if class_id == searched_class_id: | ||||||
|  |                     method_names.update(trait_methods.items()) | ||||||
|  |                     method_idxs.update(dict( | ||||||
|  |                         (idx, name) | ||||||
|  |                         for name, idx in trait_methods.items())) | ||||||
|  |  | ||||||
|  |         # Scripts | ||||||
|  |         script_count = u30() | ||||||
|  |         for _c in range(script_count): | ||||||
|  |             u30()  # init | ||||||
|  |             trait_count = u30() | ||||||
|  |             for _c2 in range(trait_count): | ||||||
|  |                 parse_traits_info() | ||||||
|  |  | ||||||
|  |         # Method bodies | ||||||
|  |         method_body_count = u30() | ||||||
|  |         Method = collections.namedtuple('Method', ['code', 'local_count']) | ||||||
|  |         methods = {} | ||||||
|  |         for _c in range(method_body_count): | ||||||
|  |             method_idx = u30() | ||||||
|  |             u30()  # max_stack | ||||||
|  |             local_count = u30() | ||||||
|  |             u30()  # init_scope_depth | ||||||
|  |             u30()  # max_scope_depth | ||||||
|  |             code_length = u30() | ||||||
|  |             code = read_bytes(code_length) | ||||||
|  |             if method_idx in method_idxs: | ||||||
|  |                 m = Method(code, local_count) | ||||||
|  |                 methods[method_idxs[method_idx]] = m | ||||||
|  |             exception_count = u30() | ||||||
|  |             for _c2 in range(exception_count): | ||||||
|  |                 u30()  # from | ||||||
|  |                 u30()  # to | ||||||
|  |                 u30()  # target | ||||||
|  |                 u30()  # exc_type | ||||||
|  |                 u30()  # var_name | ||||||
|  |             trait_count = u30() | ||||||
|  |             for _c2 in range(trait_count): | ||||||
|  |                 parse_traits_info() | ||||||
|  |  | ||||||
|  |         assert p + code_reader.tell() == len(code_tag) | ||||||
|  |         assert len(methods) == len(method_idxs) | ||||||
|  |  | ||||||
|  |         method_pyfunctions = {} | ||||||
|  |  | ||||||
|  |         def extract_function(func_name): | ||||||
|  |             if func_name in method_pyfunctions: | ||||||
|  |                 return method_pyfunctions[func_name] | ||||||
|  |             if func_name not in methods: | ||||||
|  |                 raise ExtractorError(u'Cannot find function %r' % func_name) | ||||||
|  |             m = methods[func_name] | ||||||
|  |  | ||||||
|  |             def resfunc(args): | ||||||
|  |                 registers = ['(this)'] + list(args) + [None] * m.local_count | ||||||
|  |                 stack = [] | ||||||
|  |                 coder = io.BytesIO(m.code) | ||||||
|  |                 while True: | ||||||
|  |                     opcode = struct.unpack('!B', coder.read(1))[0] | ||||||
|  |                     if opcode == 36:  # pushbyte | ||||||
|  |                         v = struct.unpack('!B', coder.read(1))[0] | ||||||
|  |                         stack.append(v) | ||||||
|  |                     elif opcode == 44:  # pushstring | ||||||
|  |                         idx = u30(coder) | ||||||
|  |                         stack.append(constant_strings[idx]) | ||||||
|  |                     elif opcode == 48:  # pushscope | ||||||
|  |                         # We don't implement the scope register, so we'll just | ||||||
|  |                         # ignore the popped value | ||||||
|  |                         stack.pop() | ||||||
|  |                     elif opcode == 70:  # callproperty | ||||||
|  |                         index = u30(coder) | ||||||
|  |                         mname = multinames[index] | ||||||
|  |                         arg_count = u30(coder) | ||||||
|  |                         args = list(reversed( | ||||||
|  |                             [stack.pop() for _ in range(arg_count)])) | ||||||
|  |                         obj = stack.pop() | ||||||
|  |                         if mname == u'split': | ||||||
|  |                             assert len(args) == 1 | ||||||
|  |                             assert isinstance(args[0], compat_str) | ||||||
|  |                             assert isinstance(obj, compat_str) | ||||||
|  |                             if args[0] == u'': | ||||||
|  |                                 res = list(obj) | ||||||
|  |                             else: | ||||||
|  |                                 res = obj.split(args[0]) | ||||||
|  |                             stack.append(res) | ||||||
|  |                         elif mname == u'slice': | ||||||
|  |                             assert len(args) == 1 | ||||||
|  |                             assert isinstance(args[0], int) | ||||||
|  |                             assert isinstance(obj, list) | ||||||
|  |                             res = obj[args[0]:] | ||||||
|  |                             stack.append(res) | ||||||
|  |                         elif mname == u'join': | ||||||
|  |                             assert len(args) == 1 | ||||||
|  |                             assert isinstance(args[0], compat_str) | ||||||
|  |                             assert isinstance(obj, list) | ||||||
|  |                             res = args[0].join(obj) | ||||||
|  |                             stack.append(res) | ||||||
|  |                         elif mname in method_pyfunctions: | ||||||
|  |                             stack.append(method_pyfunctions[mname](args)) | ||||||
|  |                         else: | ||||||
|  |                             raise NotImplementedError( | ||||||
|  |                                 u'Unsupported property %r on %r' | ||||||
|  |                                 % (mname, obj)) | ||||||
|  |                     elif opcode == 72:  # returnvalue | ||||||
|  |                         res = stack.pop() | ||||||
|  |                         return res | ||||||
|  |                     elif opcode == 79:  # callpropvoid | ||||||
|  |                         index = u30(coder) | ||||||
|  |                         mname = multinames[index] | ||||||
|  |                         arg_count = u30(coder) | ||||||
|  |                         args = list(reversed( | ||||||
|  |                             [stack.pop() for _ in range(arg_count)])) | ||||||
|  |                         obj = stack.pop() | ||||||
|  |                         if mname == u'reverse': | ||||||
|  |                             assert isinstance(obj, list) | ||||||
|  |                             obj.reverse() | ||||||
|  |                         else: | ||||||
|  |                             raise NotImplementedError( | ||||||
|  |                                 u'Unsupported (void) property %r on %r' | ||||||
|  |                                 % (mname, obj)) | ||||||
|  |                     elif opcode == 93:  # findpropstrict | ||||||
|  |                         index = u30(coder) | ||||||
|  |                         mname = multinames[index] | ||||||
|  |                         res = extract_function(mname) | ||||||
|  |                         stack.append(res) | ||||||
|  |                     elif opcode == 97:  # setproperty | ||||||
|  |                         index = u30(coder) | ||||||
|  |                         value = stack.pop() | ||||||
|  |                         idx = stack.pop() | ||||||
|  |                         obj = stack.pop() | ||||||
|  |                         assert isinstance(obj, list) | ||||||
|  |                         assert isinstance(idx, int) | ||||||
|  |                         obj[idx] = value | ||||||
|  |                     elif opcode == 98:  # getlocal | ||||||
|  |                         index = u30(coder) | ||||||
|  |                         stack.append(registers[index]) | ||||||
|  |                     elif opcode == 99:  # setlocal | ||||||
|  |                         index = u30(coder) | ||||||
|  |                         value = stack.pop() | ||||||
|  |                         registers[index] = value | ||||||
|  |                     elif opcode == 102:  # getproperty | ||||||
|  |                         index = u30(coder) | ||||||
|  |                         pname = multinames[index] | ||||||
|  |                         if pname == u'length': | ||||||
|  |                             obj = stack.pop() | ||||||
|  |                             assert isinstance(obj, list) | ||||||
|  |                             stack.append(len(obj)) | ||||||
|  |                         else:  # Assume attribute access | ||||||
|  |                             idx = stack.pop() | ||||||
|  |                             assert isinstance(idx, int) | ||||||
|  |                             obj = stack.pop() | ||||||
|  |                             assert isinstance(obj, list) | ||||||
|  |                             stack.append(obj[idx]) | ||||||
|  |                     elif opcode == 128:  # coerce | ||||||
|  |                         u30(coder) | ||||||
|  |                     elif opcode == 133:  # coerce_s | ||||||
|  |                         assert isinstance(stack[-1], (type(None), compat_str)) | ||||||
|  |                     elif opcode == 164:  # modulo | ||||||
|  |                         value2 = stack.pop() | ||||||
|  |                         value1 = stack.pop() | ||||||
|  |                         res = value1 % value2 | ||||||
|  |                         stack.append(res) | ||||||
|  |                     elif opcode == 208:  # getlocal_0 | ||||||
|  |                         stack.append(registers[0]) | ||||||
|  |                     elif opcode == 209:  # getlocal_1 | ||||||
|  |                         stack.append(registers[1]) | ||||||
|  |                     elif opcode == 210:  # getlocal_2 | ||||||
|  |                         stack.append(registers[2]) | ||||||
|  |                     elif opcode == 211:  # getlocal_3 | ||||||
|  |                         stack.append(registers[3]) | ||||||
|  |                     elif opcode == 214:  # setlocal_2 | ||||||
|  |                         registers[2] = stack.pop() | ||||||
|  |                     elif opcode == 215:  # setlocal_3 | ||||||
|  |                         registers[3] = stack.pop() | ||||||
|  |                     else: | ||||||
|  |                         raise NotImplementedError( | ||||||
|  |                             u'Unsupported opcode %d' % opcode) | ||||||
|  |  | ||||||
|  |             method_pyfunctions[func_name] = resfunc | ||||||
|  |             return resfunc | ||||||
|  |  | ||||||
|  |         initial_function = extract_function(u'decipher') | ||||||
|  |         return lambda s: initial_function([s]) | ||||||
|  |  | ||||||
|  |     def _decrypt_signature(self, s, video_id, player_url, age_gate=False): | ||||||
|         """Turn the encrypted s field into a working signature""" |         """Turn the encrypted s field into a working signature""" | ||||||
|  |  | ||||||
|  |         if player_url is not None: | ||||||
|  |             try: | ||||||
|  |                 if player_url not in self._player_cache: | ||||||
|  |                     func = self._extract_signature_function( | ||||||
|  |                         video_id, player_url, len(s) | ||||||
|  |                     ) | ||||||
|  |                     self._player_cache[player_url] = func | ||||||
|  |                 func = self._player_cache[player_url] | ||||||
|  |                 if self._downloader.params.get('youtube_print_sig_code'): | ||||||
|  |                     self._print_sig_code(func, len(s)) | ||||||
|  |                 return func(s) | ||||||
|  |             except Exception: | ||||||
|  |                 tb = traceback.format_exc() | ||||||
|  |                 self._downloader.report_warning( | ||||||
|  |                     u'Automatic signature extraction failed: ' + tb) | ||||||
|  |  | ||||||
|  |             self._downloader.report_warning( | ||||||
|  |                 u'Warning: Falling back to static signature algorithm') | ||||||
|  |         return self._static_decrypt_signature( | ||||||
|  |             s, video_id, player_url, age_gate) | ||||||
|  |  | ||||||
|  |     def _static_decrypt_signature(self, s, video_id, player_url, age_gate): | ||||||
|  |         if age_gate: | ||||||
|  |             # The videos with age protection use another player, so the | ||||||
|  |             # algorithms can be different. | ||||||
|  |             if len(s) == 86: | ||||||
|  |                 return s[2:63] + s[82] + s[64:82] + s[63] | ||||||
|  |  | ||||||
|         if len(s) == 93: |         if len(s) == 93: | ||||||
|             return s[86:29:-1] + s[88] + s[28:5:-1] |             return s[86:29:-1] + s[88] + s[28:5:-1] | ||||||
|         elif len(s) == 92: |         elif len(s) == 92: | ||||||
| @@ -431,7 +1092,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         elif len(s) == 87: |         elif len(s) == 87: | ||||||
|             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] |             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] | ||||||
|         elif len(s) == 86: |         elif len(s) == 86: | ||||||
|             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[:16][::-1] |             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] | ||||||
|         elif len(s) == 85: |         elif len(s) == 85: | ||||||
|             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] |             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] | ||||||
|         elif len(s) == 84: |         elif len(s) == 84: | ||||||
| @@ -450,15 +1111,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         else: |         else: | ||||||
|             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) |             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) | ||||||
|  |  | ||||||
|     def _decrypt_signature_age_gate(self, s): |  | ||||||
|         # The videos with age protection use another player, so the algorithms |  | ||||||
|         # can be different. |  | ||||||
|         if len(s) == 86: |  | ||||||
|             return s[2:63] + s[82] + s[64:82] + s[63] |  | ||||||
|         else: |  | ||||||
|             # Fallback to the other algortihms |  | ||||||
|             return self._decrypt_signature(s) |  | ||||||
|  |  | ||||||
|     def _get_available_subtitles(self, video_id): |     def _get_available_subtitles(self, video_id): | ||||||
|         try: |         try: | ||||||
|             sub_list = self._download_webpage( |             sub_list = self._download_webpage( | ||||||
| @@ -631,7 +1283,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore') |         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore') | ||||||
|  |  | ||||||
|         # Attempt to extract SWF player URL |         # Attempt to extract SWF player URL | ||||||
|         mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) |         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) | ||||||
|         if mobj is not None: |         if mobj is not None: | ||||||
|             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1)) |             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1)) | ||||||
|         else: |         else: | ||||||
| @@ -784,21 +1436,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|                     if 'sig' in url_data: |                     if 'sig' in url_data: | ||||||
|                         url += '&signature=' + url_data['sig'][0] |                         url += '&signature=' + url_data['sig'][0] | ||||||
|                     elif 's' in url_data: |                     elif 's' in url_data: | ||||||
|                         if self._downloader.params.get('verbose'): |  | ||||||
|                             s = url_data['s'][0] |  | ||||||
|                             if age_gate: |  | ||||||
|                                 player = 'flash player' |  | ||||||
|                             else: |  | ||||||
|                                 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, |  | ||||||
|                                     'html5 player', fatal=False) |  | ||||||
|                             parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.')) |  | ||||||
|                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % |  | ||||||
|                                 (len(s), parts_sizes, url_data['itag'][0], player)) |  | ||||||
|                         encrypted_sig = url_data['s'][0] |                         encrypted_sig = url_data['s'][0] | ||||||
|  |                         if self._downloader.params.get('verbose'): | ||||||
|                             if age_gate: |                             if age_gate: | ||||||
|                             signature = self._decrypt_signature_age_gate(encrypted_sig) |                                 if player_url is None: | ||||||
|  |                                     player_version = 'unknown' | ||||||
|                                 else: |                                 else: | ||||||
|                             signature = self._decrypt_signature(encrypted_sig) |                                     player_version = self._search_regex( | ||||||
|  |                                         r'-(.+)\.swf$', player_url, | ||||||
|  |                                         u'flash player', fatal=False) | ||||||
|  |                                 player_desc = 'flash player %s' % player_version | ||||||
|  |                             else: | ||||||
|  |                                 player_version = self._search_regex( | ||||||
|  |                                     r'html5player-(.+?)\.js', video_webpage, | ||||||
|  |                                     'html5 player', fatal=False) | ||||||
|  |                                 player_desc = u'html5 player %s' % player_version | ||||||
|  |  | ||||||
|  |                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.')) | ||||||
|  |                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % | ||||||
|  |                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) | ||||||
|  |  | ||||||
|  |                         if not age_gate: | ||||||
|  |                             jsplayer_url_json = self._search_regex( | ||||||
|  |                                 r'"assets":.+?"js":\s*("[^"]+")', | ||||||
|  |                                 video_webpage, u'JS player URL') | ||||||
|  |                             player_url = json.loads(jsplayer_url_json) | ||||||
|  |  | ||||||
|  |                         signature = self._decrypt_signature( | ||||||
|  |                             encrypted_sig, video_id, player_url, age_gate) | ||||||
|                         url += '&signature=' + signature |                         url += '&signature=' + signature | ||||||
|                     if 'ratebypass' not in url: |                     if 'ratebypass' not in url: | ||||||
|                         url += '&ratebypass=yes' |                         url += '&ratebypass=yes' | ||||||
|   | |||||||
| @@ -66,6 +66,12 @@ try: | |||||||
| except ImportError:  # Python 2 | except ImportError:  # Python 2 | ||||||
|     from urllib2 import HTTPError as compat_HTTPError |     from urllib2 import HTTPError as compat_HTTPError | ||||||
|  |  | ||||||
|  | try: | ||||||
|  |     from urllib.request import urlretrieve as compat_urlretrieve | ||||||
|  | except ImportError:  # Python 2 | ||||||
|  |     from urllib import urlretrieve as compat_urlretrieve | ||||||
|  |  | ||||||
|  |  | ||||||
| try: | try: | ||||||
|     from subprocess import DEVNULL |     from subprocess import DEVNULL | ||||||
|     compat_subprocess_get_DEVNULL = lambda: DEVNULL |     compat_subprocess_get_DEVNULL = lambda: DEVNULL | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister