Merge pull request #6392 from dstftw/generalized-fragmented-fd
Generalized fragmented media file downloader
This commit is contained in:
		| @@ -7,8 +7,7 @@ import os | ||||
| import time | ||||
| import xml.etree.ElementTree as etree | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from .http import HttpFD | ||||
| from .fragment import FragmentFD | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     compat_urllib_error, | ||||
| @@ -16,8 +15,6 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     struct_pack, | ||||
|     struct_unpack, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
| @@ -226,16 +223,13 @@ def _add_ns(prop): | ||||
|     return '{http://ns.adobe.com/f4m/1.0}%s' % prop | ||||
|  | ||||
|  | ||||
| class HttpQuietDownloader(HttpFD): | ||||
|     def to_screen(self, *args, **kargs): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class F4mFD(FileDownloader): | ||||
| class F4mFD(FragmentFD): | ||||
|     """ | ||||
|     A downloader for f4m manifests or AdobeHDS. | ||||
|     """ | ||||
|  | ||||
|     FD_NAME = 'f4m' | ||||
|  | ||||
|     def _get_unencrypted_media(self, doc): | ||||
|         media = doc.findall(_add_ns('media')) | ||||
|         if not media: | ||||
| @@ -288,7 +282,7 @@ class F4mFD(FileDownloader): | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
|         requested_bitrate = info_dict.get('tbr') | ||||
|         self.to_screen('[download] Downloading f4m manifest') | ||||
|         self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) | ||||
|         manifest = self.ydl.urlopen(man_url).read() | ||||
|  | ||||
|         doc = etree.fromstring(manifest) | ||||
| @@ -320,67 +314,20 @@ class F4mFD(FileDownloader): | ||||
|         # For some akamai manifests we'll need to add a query to the fragment url | ||||
|         akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | ||||
|  | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit', None), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': total_frags, | ||||
|         } | ||||
|  | ||||
|         self._prepare_frag_download(ctx) | ||||
|  | ||||
|         dest_stream = ctx['dest_stream'] | ||||
|  | ||||
|         write_flv_header(dest_stream) | ||||
|         if not live: | ||||
|             write_metadata_tag(dest_stream, metadata) | ||||
|  | ||||
|         # This dict stores the download progress, it's updated by the progress | ||||
|         # hook | ||||
|         state = { | ||||
|             'status': 'downloading', | ||||
|             'downloaded_bytes': 0, | ||||
|             'frag_index': 0, | ||||
|             'frag_count': total_frags, | ||||
|             'filename': filename, | ||||
|             'tmpfilename': tmpfilename, | ||||
|         } | ||||
|         start = time.time() | ||||
|  | ||||
|         def frag_progress_hook(s): | ||||
|             if s['status'] not in ('downloading', 'finished'): | ||||
|                 return | ||||
|  | ||||
|             frag_total_bytes = s.get('total_bytes', 0) | ||||
|             if s['status'] == 'finished': | ||||
|                 state['downloaded_bytes'] += frag_total_bytes | ||||
|                 state['frag_index'] += 1 | ||||
|  | ||||
|             estimated_size = ( | ||||
|                 (state['downloaded_bytes'] + frag_total_bytes) / | ||||
|                 (state['frag_index'] + 1) * total_frags) | ||||
|             time_now = time.time() | ||||
|             state['total_bytes_estimate'] = estimated_size | ||||
|             state['elapsed'] = time_now - start | ||||
|  | ||||
|             if s['status'] == 'finished': | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|             else: | ||||
|                 frag_downloaded_bytes = s['downloaded_bytes'] | ||||
|                 frag_progress = self.calc_percent(frag_downloaded_bytes, | ||||
|                                                   frag_total_bytes) | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|                 progress += frag_progress / float(total_frags) | ||||
|  | ||||
|                 state['eta'] = self.calc_eta( | ||||
|                     start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) | ||||
|                 state['speed'] = s.get('speed') | ||||
|             self._hook_progress(state) | ||||
|  | ||||
|         http_dl.add_progress_hook(frag_progress_hook) | ||||
|         self._start_frag_download(ctx) | ||||
|  | ||||
|         frags_filenames = [] | ||||
|         while fragments_list: | ||||
| @@ -391,9 +338,9 @@ class F4mFD(FileDownloader): | ||||
|                 url += '?' + akamai_pv.strip(';') | ||||
|             if info_dict.get('extra_param_to_segment_url'): | ||||
|                 url += info_dict.get('extra_param_to_segment_url') | ||||
|             frag_filename = '%s-%s' % (tmpfilename, name) | ||||
|             frag_filename = '%s-%s' % (ctx['tmpfilename'], name) | ||||
|             try: | ||||
|                 success = http_dl.download(frag_filename, {'url': url}) | ||||
|                 success = ctx['dl'].download(frag_filename, {'url': url}) | ||||
|                 if not success: | ||||
|                     return False | ||||
|                 with open(frag_filename, 'rb') as down: | ||||
| @@ -425,20 +372,9 @@ class F4mFD(FileDownloader): | ||||
|                     msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) | ||||
|                     self.report_warning(msg) | ||||
|  | ||||
|         dest_stream.close() | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         elapsed = time.time() - start | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
|  | ||||
|         fsize = os.path.getsize(encodeFilename(filename)) | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': fsize, | ||||
|             'total_bytes': fsize, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|             'elapsed': elapsed, | ||||
|         }) | ||||
|  | ||||
|         return True | ||||
|   | ||||
							
								
								
									
										110
									
								
								youtube_dl/downloader/fragment.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								youtube_dl/downloader/fragment.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import os | ||||
| import time | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from .http import HttpFD | ||||
| from ..utils import ( | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HttpQuietDownloader(HttpFD): | ||||
|     def to_screen(self, *args, **kargs): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class FragmentFD(FileDownloader): | ||||
|     """ | ||||
|     A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). | ||||
|     """ | ||||
|  | ||||
|     def _prepare_and_start_frag_download(self, ctx): | ||||
|         self._prepare_frag_download(ctx) | ||||
|         self._start_frag_download(ctx) | ||||
|  | ||||
|     def _prepare_frag_download(self, ctx): | ||||
|         self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags'])) | ||||
|         self.report_destination(ctx['filename']) | ||||
|         dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit', None), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|         tmpfilename = self.temp_name(ctx['filename']) | ||||
|         dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb') | ||||
|         ctx.update({ | ||||
|             'dl': dl, | ||||
|             'dest_stream': dest_stream, | ||||
|             'tmpfilename': tmpfilename, | ||||
|         }) | ||||
|  | ||||
|     def _start_frag_download(self, ctx): | ||||
|         total_frags = ctx['total_frags'] | ||||
|         # This dict stores the download progress, it's updated by the progress | ||||
|         # hook | ||||
|         state = { | ||||
|             'status': 'downloading', | ||||
|             'downloaded_bytes': 0, | ||||
|             'frag_index': 0, | ||||
|             'frag_count': total_frags, | ||||
|             'filename': ctx['filename'], | ||||
|             'tmpfilename': ctx['tmpfilename'], | ||||
|         } | ||||
|         start = time.time() | ||||
|         ctx['started'] = start | ||||
|  | ||||
|         def frag_progress_hook(s): | ||||
|             if s['status'] not in ('downloading', 'finished'): | ||||
|                 return | ||||
|  | ||||
|             frag_total_bytes = s.get('total_bytes', 0) | ||||
|             if s['status'] == 'finished': | ||||
|                 state['downloaded_bytes'] += frag_total_bytes | ||||
|                 state['frag_index'] += 1 | ||||
|  | ||||
|             estimated_size = ( | ||||
|                 (state['downloaded_bytes'] + frag_total_bytes) / | ||||
|                 (state['frag_index'] + 1) * total_frags) | ||||
|             time_now = time.time() | ||||
|             state['total_bytes_estimate'] = estimated_size | ||||
|             state['elapsed'] = time_now - start | ||||
|  | ||||
|             if s['status'] == 'finished': | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|             else: | ||||
|                 frag_downloaded_bytes = s['downloaded_bytes'] | ||||
|                 frag_progress = self.calc_percent(frag_downloaded_bytes, | ||||
|                                                   frag_total_bytes) | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|                 progress += frag_progress / float(total_frags) | ||||
|  | ||||
|                 state['eta'] = self.calc_eta( | ||||
|                     start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) | ||||
|                 state['speed'] = s.get('speed') | ||||
|             self._hook_progress(state) | ||||
|  | ||||
|         ctx['dl'].add_progress_hook(frag_progress_hook) | ||||
|  | ||||
|         return start | ||||
|  | ||||
|     def _finish_frag_download(self, ctx): | ||||
|         ctx['dest_stream'].close() | ||||
|         elapsed = time.time() - ctx['started'] | ||||
|         self.try_rename(ctx['tmpfilename'], ctx['filename']) | ||||
|         fsize = os.path.getsize(encodeFilename(ctx['filename'])) | ||||
|  | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': fsize, | ||||
|             'total_bytes': fsize, | ||||
|             'filename': ctx['filename'], | ||||
|             'status': 'finished', | ||||
|             'elapsed': elapsed, | ||||
|         }) | ||||
| @@ -4,12 +4,11 @@ import os | ||||
| import re | ||||
| import subprocess | ||||
|  | ||||
| from ..postprocessor.ffmpeg import FFmpegPostProcessor | ||||
| from .common import FileDownloader | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from .fragment import FragmentFD | ||||
|  | ||||
| from ..compat import compat_urlparse | ||||
| from ..postprocessor.ffmpeg import FFmpegPostProcessor | ||||
| from ..utils import ( | ||||
|     encodeArgument, | ||||
|     encodeFilename, | ||||
| @@ -51,54 +50,50 @@ class HlsFD(FileDownloader): | ||||
|             return False | ||||
|  | ||||
|  | ||||
| class NativeHlsFD(FileDownloader): | ||||
| class NativeHlsFD(FragmentFD): | ||||
|     """ A more limited implementation that does not require ffmpeg """ | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|     FD_NAME = 'hlsnative' | ||||
|  | ||||
|         self.to_screen( | ||||
|             '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) | ||||
|         data = self.ydl.urlopen(url).read() | ||||
|         s = data.decode('utf-8', 'ignore') | ||||
|         segment_urls = [] | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
|         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) | ||||
|         manifest = self.ydl.urlopen(man_url).read() | ||||
|  | ||||
|         s = manifest.decode('utf-8', 'ignore') | ||||
|         fragment_urls = [] | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line and not line.startswith('#'): | ||||
|                 segment_url = ( | ||||
|                     line | ||||
|                     if re.match(r'^https?://', line) | ||||
|                     else compat_urlparse.urljoin(url, line)) | ||||
|                 segment_urls.append(segment_url) | ||||
|  | ||||
|         is_test = self.params.get('test', False) | ||||
|         remaining_bytes = self._TEST_FILE_SIZE if is_test else None | ||||
|         byte_counter = 0 | ||||
|         with open(tmpfilename, 'wb') as outf: | ||||
|             for i, segurl in enumerate(segment_urls): | ||||
|                 self.to_screen( | ||||
|                     '[hlsnative] %s: Downloading segment %d / %d' % | ||||
|                     (info_dict['id'], i + 1, len(segment_urls))) | ||||
|                 seg_req = compat_urllib_request.Request(segurl) | ||||
|                 if remaining_bytes is not None: | ||||
|                     seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) | ||||
|  | ||||
|                 segment = self.ydl.urlopen(seg_req).read() | ||||
|                 if remaining_bytes is not None: | ||||
|                     segment = segment[:remaining_bytes] | ||||
|                     remaining_bytes -= len(segment) | ||||
|                 outf.write(segment) | ||||
|                 byte_counter += len(segment) | ||||
|                 if remaining_bytes is not None and remaining_bytes <= 0: | ||||
|                     else compat_urlparse.urljoin(man_url, line)) | ||||
|                 fragment_urls.append(segment_url) | ||||
|                 # We only download the first fragment during the test | ||||
|                 if self.params.get('test', False): | ||||
|                     break | ||||
|  | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': byte_counter, | ||||
|             'total_bytes': byte_counter, | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|         }) | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|             'total_frags': len(fragment_urls), | ||||
|         } | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         frags_filenames = [] | ||||
|         for i, frag_url in enumerate(fragment_urls): | ||||
|             frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||
|             success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|             if not success: | ||||
|                 return False | ||||
|             with open(frag_filename, 'rb') as down: | ||||
|                 ctx['dest_stream'].write(down.read()) | ||||
|             frags_filenames.append(frag_filename) | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
|  | ||||
|         return True | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M.
					Sergey M.