Merge pull request #6392 from dstftw/generalized-fragmented-fd
Generalized fragmented media file downloader
This commit is contained in:
		| @@ -7,8 +7,7 @@ import os | |||||||
| import time | import time | ||||||
| import xml.etree.ElementTree as etree | import xml.etree.ElementTree as etree | ||||||
|  |  | ||||||
| from .common import FileDownloader | from .fragment import FragmentFD | ||||||
| from .http import HttpFD |  | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
|     compat_urllib_error, |     compat_urllib_error, | ||||||
| @@ -16,8 +15,6 @@ from ..compat import ( | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     struct_pack, |     struct_pack, | ||||||
|     struct_unpack, |     struct_unpack, | ||||||
|     encodeFilename, |  | ||||||
|     sanitize_open, |  | ||||||
|     xpath_text, |     xpath_text, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -226,16 +223,13 @@ def _add_ns(prop): | |||||||
|     return '{http://ns.adobe.com/f4m/1.0}%s' % prop |     return '{http://ns.adobe.com/f4m/1.0}%s' % prop | ||||||
|  |  | ||||||
|  |  | ||||||
| class HttpQuietDownloader(HttpFD): | class F4mFD(FragmentFD): | ||||||
|     def to_screen(self, *args, **kargs): |  | ||||||
|         pass |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class F4mFD(FileDownloader): |  | ||||||
|     """ |     """ | ||||||
|     A downloader for f4m manifests or AdobeHDS. |     A downloader for f4m manifests or AdobeHDS. | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|  |     FD_NAME = 'f4m' | ||||||
|  |  | ||||||
|     def _get_unencrypted_media(self, doc): |     def _get_unencrypted_media(self, doc): | ||||||
|         media = doc.findall(_add_ns('media')) |         media = doc.findall(_add_ns('media')) | ||||||
|         if not media: |         if not media: | ||||||
| @@ -288,7 +282,7 @@ class F4mFD(FileDownloader): | |||||||
|     def real_download(self, filename, info_dict): |     def real_download(self, filename, info_dict): | ||||||
|         man_url = info_dict['url'] |         man_url = info_dict['url'] | ||||||
|         requested_bitrate = info_dict.get('tbr') |         requested_bitrate = info_dict.get('tbr') | ||||||
|         self.to_screen('[download] Downloading f4m manifest') |         self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) | ||||||
|         manifest = self.ydl.urlopen(man_url).read() |         manifest = self.ydl.urlopen(man_url).read() | ||||||
|  |  | ||||||
|         doc = etree.fromstring(manifest) |         doc = etree.fromstring(manifest) | ||||||
| @@ -320,67 +314,20 @@ class F4mFD(FileDownloader): | |||||||
|         # For some akamai manifests we'll need to add a query to the fragment url |         # For some akamai manifests we'll need to add a query to the fragment url | ||||||
|         akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) |         akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | ||||||
|  |  | ||||||
|         self.report_destination(filename) |         ctx = { | ||||||
|         http_dl = HttpQuietDownloader( |             'filename': filename, | ||||||
|             self.ydl, |             'total_frags': total_frags, | ||||||
|             { |         } | ||||||
|                 'continuedl': True, |  | ||||||
|                 'quiet': True, |         self._prepare_frag_download(ctx) | ||||||
|                 'noprogress': True, |  | ||||||
|                 'ratelimit': self.params.get('ratelimit', None), |         dest_stream = ctx['dest_stream'] | ||||||
|                 'test': self.params.get('test', False), |  | ||||||
|             } |  | ||||||
|         ) |  | ||||||
|         tmpfilename = self.temp_name(filename) |  | ||||||
|         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') |  | ||||||
|  |  | ||||||
|         write_flv_header(dest_stream) |         write_flv_header(dest_stream) | ||||||
|         if not live: |         if not live: | ||||||
|             write_metadata_tag(dest_stream, metadata) |             write_metadata_tag(dest_stream, metadata) | ||||||
|  |  | ||||||
|         # This dict stores the download progress, it's updated by the progress |         self._start_frag_download(ctx) | ||||||
|         # hook |  | ||||||
|         state = { |  | ||||||
|             'status': 'downloading', |  | ||||||
|             'downloaded_bytes': 0, |  | ||||||
|             'frag_index': 0, |  | ||||||
|             'frag_count': total_frags, |  | ||||||
|             'filename': filename, |  | ||||||
|             'tmpfilename': tmpfilename, |  | ||||||
|         } |  | ||||||
|         start = time.time() |  | ||||||
|  |  | ||||||
|         def frag_progress_hook(s): |  | ||||||
|             if s['status'] not in ('downloading', 'finished'): |  | ||||||
|                 return |  | ||||||
|  |  | ||||||
|             frag_total_bytes = s.get('total_bytes', 0) |  | ||||||
|             if s['status'] == 'finished': |  | ||||||
|                 state['downloaded_bytes'] += frag_total_bytes |  | ||||||
|                 state['frag_index'] += 1 |  | ||||||
|  |  | ||||||
|             estimated_size = ( |  | ||||||
|                 (state['downloaded_bytes'] + frag_total_bytes) / |  | ||||||
|                 (state['frag_index'] + 1) * total_frags) |  | ||||||
|             time_now = time.time() |  | ||||||
|             state['total_bytes_estimate'] = estimated_size |  | ||||||
|             state['elapsed'] = time_now - start |  | ||||||
|  |  | ||||||
|             if s['status'] == 'finished': |  | ||||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) |  | ||||||
|             else: |  | ||||||
|                 frag_downloaded_bytes = s['downloaded_bytes'] |  | ||||||
|                 frag_progress = self.calc_percent(frag_downloaded_bytes, |  | ||||||
|                                                   frag_total_bytes) |  | ||||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) |  | ||||||
|                 progress += frag_progress / float(total_frags) |  | ||||||
|  |  | ||||||
|                 state['eta'] = self.calc_eta( |  | ||||||
|                     start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) |  | ||||||
|                 state['speed'] = s.get('speed') |  | ||||||
|             self._hook_progress(state) |  | ||||||
|  |  | ||||||
|         http_dl.add_progress_hook(frag_progress_hook) |  | ||||||
|  |  | ||||||
|         frags_filenames = [] |         frags_filenames = [] | ||||||
|         while fragments_list: |         while fragments_list: | ||||||
| @@ -391,9 +338,9 @@ class F4mFD(FileDownloader): | |||||||
|                 url += '?' + akamai_pv.strip(';') |                 url += '?' + akamai_pv.strip(';') | ||||||
|             if info_dict.get('extra_param_to_segment_url'): |             if info_dict.get('extra_param_to_segment_url'): | ||||||
|                 url += info_dict.get('extra_param_to_segment_url') |                 url += info_dict.get('extra_param_to_segment_url') | ||||||
|             frag_filename = '%s-%s' % (tmpfilename, name) |             frag_filename = '%s-%s' % (ctx['tmpfilename'], name) | ||||||
|             try: |             try: | ||||||
|                 success = http_dl.download(frag_filename, {'url': url}) |                 success = ctx['dl'].download(frag_filename, {'url': url}) | ||||||
|                 if not success: |                 if not success: | ||||||
|                     return False |                     return False | ||||||
|                 with open(frag_filename, 'rb') as down: |                 with open(frag_filename, 'rb') as down: | ||||||
| @@ -425,20 +372,9 @@ class F4mFD(FileDownloader): | |||||||
|                     msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) |                     msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) | ||||||
|                     self.report_warning(msg) |                     self.report_warning(msg) | ||||||
|  |  | ||||||
|         dest_stream.close() |         self._finish_frag_download(ctx) | ||||||
|  |  | ||||||
|         elapsed = time.time() - start |  | ||||||
|         self.try_rename(tmpfilename, filename) |  | ||||||
|         for frag_file in frags_filenames: |         for frag_file in frags_filenames: | ||||||
|             os.remove(frag_file) |             os.remove(frag_file) | ||||||
|  |  | ||||||
|         fsize = os.path.getsize(encodeFilename(filename)) |  | ||||||
|         self._hook_progress({ |  | ||||||
|             'downloaded_bytes': fsize, |  | ||||||
|             'total_bytes': fsize, |  | ||||||
|             'filename': filename, |  | ||||||
|             'status': 'finished', |  | ||||||
|             'elapsed': elapsed, |  | ||||||
|         }) |  | ||||||
|  |  | ||||||
|         return True |         return True | ||||||
|   | |||||||
							
								
								
									
										110
									
								
								youtube_dl/downloader/fragment.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								youtube_dl/downloader/fragment.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | |||||||
|  | from __future__ import division, unicode_literals | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import time | ||||||
|  |  | ||||||
|  | from .common import FileDownloader | ||||||
|  | from .http import HttpFD | ||||||
|  | from ..utils import ( | ||||||
|  |     encodeFilename, | ||||||
|  |     sanitize_open, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class HttpQuietDownloader(HttpFD): | ||||||
|  |     def to_screen(self, *args, **kargs): | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class FragmentFD(FileDownloader): | ||||||
|  |     """ | ||||||
|  |     A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def _prepare_and_start_frag_download(self, ctx): | ||||||
|  |         self._prepare_frag_download(ctx) | ||||||
|  |         self._start_frag_download(ctx) | ||||||
|  |  | ||||||
|  |     def _prepare_frag_download(self, ctx): | ||||||
|  |         self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags'])) | ||||||
|  |         self.report_destination(ctx['filename']) | ||||||
|  |         dl = HttpQuietDownloader( | ||||||
|  |             self.ydl, | ||||||
|  |             { | ||||||
|  |                 'continuedl': True, | ||||||
|  |                 'quiet': True, | ||||||
|  |                 'noprogress': True, | ||||||
|  |                 'ratelimit': self.params.get('ratelimit', None), | ||||||
|  |                 'test': self.params.get('test', False), | ||||||
|  |             } | ||||||
|  |         ) | ||||||
|  |         tmpfilename = self.temp_name(ctx['filename']) | ||||||
|  |         dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb') | ||||||
|  |         ctx.update({ | ||||||
|  |             'dl': dl, | ||||||
|  |             'dest_stream': dest_stream, | ||||||
|  |             'tmpfilename': tmpfilename, | ||||||
|  |         }) | ||||||
|  |  | ||||||
|  |     def _start_frag_download(self, ctx): | ||||||
|  |         total_frags = ctx['total_frags'] | ||||||
|  |         # This dict stores the download progress, it's updated by the progress | ||||||
|  |         # hook | ||||||
|  |         state = { | ||||||
|  |             'status': 'downloading', | ||||||
|  |             'downloaded_bytes': 0, | ||||||
|  |             'frag_index': 0, | ||||||
|  |             'frag_count': total_frags, | ||||||
|  |             'filename': ctx['filename'], | ||||||
|  |             'tmpfilename': ctx['tmpfilename'], | ||||||
|  |         } | ||||||
|  |         start = time.time() | ||||||
|  |         ctx['started'] = start | ||||||
|  |  | ||||||
|  |         def frag_progress_hook(s): | ||||||
|  |             if s['status'] not in ('downloading', 'finished'): | ||||||
|  |                 return | ||||||
|  |  | ||||||
|  |             frag_total_bytes = s.get('total_bytes', 0) | ||||||
|  |             if s['status'] == 'finished': | ||||||
|  |                 state['downloaded_bytes'] += frag_total_bytes | ||||||
|  |                 state['frag_index'] += 1 | ||||||
|  |  | ||||||
|  |             estimated_size = ( | ||||||
|  |                 (state['downloaded_bytes'] + frag_total_bytes) / | ||||||
|  |                 (state['frag_index'] + 1) * total_frags) | ||||||
|  |             time_now = time.time() | ||||||
|  |             state['total_bytes_estimate'] = estimated_size | ||||||
|  |             state['elapsed'] = time_now - start | ||||||
|  |  | ||||||
|  |             if s['status'] == 'finished': | ||||||
|  |                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||||
|  |             else: | ||||||
|  |                 frag_downloaded_bytes = s['downloaded_bytes'] | ||||||
|  |                 frag_progress = self.calc_percent(frag_downloaded_bytes, | ||||||
|  |                                                   frag_total_bytes) | ||||||
|  |                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||||
|  |                 progress += frag_progress / float(total_frags) | ||||||
|  |  | ||||||
|  |                 state['eta'] = self.calc_eta( | ||||||
|  |                     start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) | ||||||
|  |                 state['speed'] = s.get('speed') | ||||||
|  |             self._hook_progress(state) | ||||||
|  |  | ||||||
|  |         ctx['dl'].add_progress_hook(frag_progress_hook) | ||||||
|  |  | ||||||
|  |         return start | ||||||
|  |  | ||||||
|  |     def _finish_frag_download(self, ctx): | ||||||
|  |         ctx['dest_stream'].close() | ||||||
|  |         elapsed = time.time() - ctx['started'] | ||||||
|  |         self.try_rename(ctx['tmpfilename'], ctx['filename']) | ||||||
|  |         fsize = os.path.getsize(encodeFilename(ctx['filename'])) | ||||||
|  |  | ||||||
|  |         self._hook_progress({ | ||||||
|  |             'downloaded_bytes': fsize, | ||||||
|  |             'total_bytes': fsize, | ||||||
|  |             'filename': ctx['filename'], | ||||||
|  |             'status': 'finished', | ||||||
|  |             'elapsed': elapsed, | ||||||
|  |         }) | ||||||
| @@ -4,12 +4,11 @@ import os | |||||||
| import re | import re | ||||||
| import subprocess | import subprocess | ||||||
|  |  | ||||||
| from ..postprocessor.ffmpeg import FFmpegPostProcessor |  | ||||||
| from .common import FileDownloader | from .common import FileDownloader | ||||||
| from ..compat import ( | from .fragment import FragmentFD | ||||||
|     compat_urlparse, |  | ||||||
|     compat_urllib_request, | from ..compat import compat_urlparse | ||||||
| ) | from ..postprocessor.ffmpeg import FFmpegPostProcessor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     encodeArgument, |     encodeArgument, | ||||||
|     encodeFilename, |     encodeFilename, | ||||||
| @@ -51,54 +50,50 @@ class HlsFD(FileDownloader): | |||||||
|             return False |             return False | ||||||
|  |  | ||||||
|  |  | ||||||
| class NativeHlsFD(FileDownloader): | class NativeHlsFD(FragmentFD): | ||||||
|     """ A more limited implementation that does not require ffmpeg """ |     """ A more limited implementation that does not require ffmpeg """ | ||||||
|  |  | ||||||
|     def real_download(self, filename, info_dict): |     FD_NAME = 'hlsnative' | ||||||
|         url = info_dict['url'] |  | ||||||
|         self.report_destination(filename) |  | ||||||
|         tmpfilename = self.temp_name(filename) |  | ||||||
|  |  | ||||||
|         self.to_screen( |     def real_download(self, filename, info_dict): | ||||||
|             '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) |         man_url = info_dict['url'] | ||||||
|         data = self.ydl.urlopen(url).read() |         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) | ||||||
|         s = data.decode('utf-8', 'ignore') |         manifest = self.ydl.urlopen(man_url).read() | ||||||
|         segment_urls = [] |  | ||||||
|  |         s = manifest.decode('utf-8', 'ignore') | ||||||
|  |         fragment_urls = [] | ||||||
|         for line in s.splitlines(): |         for line in s.splitlines(): | ||||||
|             line = line.strip() |             line = line.strip() | ||||||
|             if line and not line.startswith('#'): |             if line and not line.startswith('#'): | ||||||
|                 segment_url = ( |                 segment_url = ( | ||||||
|                     line |                     line | ||||||
|                     if re.match(r'^https?://', line) |                     if re.match(r'^https?://', line) | ||||||
|                     else compat_urlparse.urljoin(url, line)) |                     else compat_urlparse.urljoin(man_url, line)) | ||||||
|                 segment_urls.append(segment_url) |                 fragment_urls.append(segment_url) | ||||||
|  |                 # We only download the first fragment during the test | ||||||
|         is_test = self.params.get('test', False) |                 if self.params.get('test', False): | ||||||
|         remaining_bytes = self._TEST_FILE_SIZE if is_test else None |  | ||||||
|         byte_counter = 0 |  | ||||||
|         with open(tmpfilename, 'wb') as outf: |  | ||||||
|             for i, segurl in enumerate(segment_urls): |  | ||||||
|                 self.to_screen( |  | ||||||
|                     '[hlsnative] %s: Downloading segment %d / %d' % |  | ||||||
|                     (info_dict['id'], i + 1, len(segment_urls))) |  | ||||||
|                 seg_req = compat_urllib_request.Request(segurl) |  | ||||||
|                 if remaining_bytes is not None: |  | ||||||
|                     seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) |  | ||||||
|  |  | ||||||
|                 segment = self.ydl.urlopen(seg_req).read() |  | ||||||
|                 if remaining_bytes is not None: |  | ||||||
|                     segment = segment[:remaining_bytes] |  | ||||||
|                     remaining_bytes -= len(segment) |  | ||||||
|                 outf.write(segment) |  | ||||||
|                 byte_counter += len(segment) |  | ||||||
|                 if remaining_bytes is not None and remaining_bytes <= 0: |  | ||||||
|                     break |                     break | ||||||
|  |  | ||||||
|         self._hook_progress({ |         ctx = { | ||||||
|             'downloaded_bytes': byte_counter, |  | ||||||
|             'total_bytes': byte_counter, |  | ||||||
|             'filename': filename, |             'filename': filename, | ||||||
|             'status': 'finished', |             'total_frags': len(fragment_urls), | ||||||
|         }) |         } | ||||||
|         self.try_rename(tmpfilename, filename) |  | ||||||
|  |         self._prepare_and_start_frag_download(ctx) | ||||||
|  |  | ||||||
|  |         frags_filenames = [] | ||||||
|  |         for i, frag_url in enumerate(fragment_urls): | ||||||
|  |             frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||||
|  |             success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||||
|  |             if not success: | ||||||
|  |                 return False | ||||||
|  |             with open(frag_filename, 'rb') as down: | ||||||
|  |                 ctx['dest_stream'].write(down.read()) | ||||||
|  |             frags_filenames.append(frag_filename) | ||||||
|  |  | ||||||
|  |         self._finish_frag_download(ctx) | ||||||
|  |  | ||||||
|  |         for frag_file in frags_filenames: | ||||||
|  |             os.remove(frag_file) | ||||||
|  |  | ||||||
|         return True |         return True | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M.
					Sergey M.