[extractor/common] Modernize
This commit is contained in:
		| @@ -1,3 +1,5 @@ | |||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import base64 | import base64 | ||||||
| import hashlib | import hashlib | ||||||
| import json | import json | ||||||
| @@ -202,17 +204,17 @@ class InfoExtractor(object): | |||||||
|             self.report_download_webpage(video_id) |             self.report_download_webpage(video_id) | ||||||
|         elif note is not False: |         elif note is not False: | ||||||
|             if video_id is None: |             if video_id is None: | ||||||
|                 self.to_screen(u'%s' % (note,)) |                 self.to_screen('%s' % (note,)) | ||||||
|             else: |             else: | ||||||
|                 self.to_screen(u'%s: %s' % (video_id, note)) |                 self.to_screen('%s: %s' % (video_id, note)) | ||||||
|         try: |         try: | ||||||
|             return self._downloader.urlopen(url_or_request) |             return self._downloader.urlopen(url_or_request) | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|             if errnote is False: |             if errnote is False: | ||||||
|                 return False |                 return False | ||||||
|             if errnote is None: |             if errnote is None: | ||||||
|                 errnote = u'Unable to download webpage' |                 errnote = 'Unable to download webpage' | ||||||
|             errmsg = u'%s: %s' % (errnote, compat_str(err)) |             errmsg = '%s: %s' % (errnote, compat_str(err)) | ||||||
|             if fatal: |             if fatal: | ||||||
|                 raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) |                 raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) | ||||||
|             else: |             else: | ||||||
| @@ -249,7 +251,7 @@ class InfoExtractor(object): | |||||||
|                 url = url_or_request.get_full_url() |                 url = url_or_request.get_full_url() | ||||||
|             except AttributeError: |             except AttributeError: | ||||||
|                 url = url_or_request |                 url = url_or_request | ||||||
|             self.to_screen(u'Dumping request to ' + url) |             self.to_screen('Dumping request to ' + url) | ||||||
|             dump = base64.b64encode(webpage_bytes).decode('ascii') |             dump = base64.b64encode(webpage_bytes).decode('ascii') | ||||||
|             self._downloader.to_screen(dump) |             self._downloader.to_screen(dump) | ||||||
|         if self._downloader.params.get('write_pages', False): |         if self._downloader.params.get('write_pages', False): | ||||||
| @@ -259,11 +261,11 @@ class InfoExtractor(object): | |||||||
|                 url = url_or_request |                 url = url_or_request | ||||||
|             basen = '%s_%s' % (video_id, url) |             basen = '%s_%s' % (video_id, url) | ||||||
|             if len(basen) > 240: |             if len(basen) > 240: | ||||||
|                 h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest() |                 h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() | ||||||
|                 basen = basen[:240 - len(h)] + h |                 basen = basen[:240 - len(h)] + h | ||||||
|             raw_filename = basen + '.dump' |             raw_filename = basen + '.dump' | ||||||
|             filename = sanitize_filename(raw_filename, restricted=True) |             filename = sanitize_filename(raw_filename, restricted=True) | ||||||
|             self.to_screen(u'Saving request to ' + filename) |             self.to_screen('Saving request to ' + filename) | ||||||
|             with open(filename, 'wb') as outf: |             with open(filename, 'wb') as outf: | ||||||
|                 outf.write(webpage_bytes) |                 outf.write(webpage_bytes) | ||||||
|  |  | ||||||
| @@ -272,14 +274,14 @@ class InfoExtractor(object): | |||||||
|         except LookupError: |         except LookupError: | ||||||
|             content = webpage_bytes.decode('utf-8', 'replace') |             content = webpage_bytes.decode('utf-8', 'replace') | ||||||
|  |  | ||||||
|         if (u'<title>Access to this site is blocked</title>' in content and |         if ('<title>Access to this site is blocked</title>' in content and | ||||||
|                 u'Websense' in content[:512]): |                 'Websense' in content[:512]): | ||||||
|             msg = u'Access to this webpage has been blocked by Websense filtering software in your network.' |             msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' | ||||||
|             blocked_iframe = self._html_search_regex( |             blocked_iframe = self._html_search_regex( | ||||||
|                 r'<iframe src="([^"]+)"', content, |                 r'<iframe src="([^"]+)"', content, | ||||||
|                 u'Websense information URL', default=None) |                 'Websense information URL', default=None) | ||||||
|             if blocked_iframe: |             if blocked_iframe: | ||||||
|                 msg += u' Visit %s for more details' % blocked_iframe |                 msg += ' Visit %s for more details' % blocked_iframe | ||||||
|             raise ExtractorError(msg, expected=True) |             raise ExtractorError(msg, expected=True) | ||||||
|  |  | ||||||
|         return (content, urlh) |         return (content, urlh) | ||||||
| @@ -294,7 +296,7 @@ class InfoExtractor(object): | |||||||
|             return content |             return content | ||||||
|  |  | ||||||
|     def _download_xml(self, url_or_request, video_id, |     def _download_xml(self, url_or_request, video_id, | ||||||
|                       note=u'Downloading XML', errnote=u'Unable to download XML', |                       note='Downloading XML', errnote='Unable to download XML', | ||||||
|                       transform_source=None, fatal=True): |                       transform_source=None, fatal=True): | ||||||
|         """Return the xml as an xml.etree.ElementTree.Element""" |         """Return the xml as an xml.etree.ElementTree.Element""" | ||||||
|         xml_string = self._download_webpage( |         xml_string = self._download_webpage( | ||||||
| @@ -306,8 +308,8 @@ class InfoExtractor(object): | |||||||
|         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) |         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) | ||||||
|  |  | ||||||
|     def _download_json(self, url_or_request, video_id, |     def _download_json(self, url_or_request, video_id, | ||||||
|                        note=u'Downloading JSON metadata', |                        note='Downloading JSON metadata', | ||||||
|                        errnote=u'Unable to download JSON metadata', |                        errnote='Unable to download JSON metadata', | ||||||
|                        transform_source=None, |                        transform_source=None, | ||||||
|                        fatal=True): |                        fatal=True): | ||||||
|         json_string = self._download_webpage( |         json_string = self._download_webpage( | ||||||
| @@ -322,29 +324,29 @@ class InfoExtractor(object): | |||||||
|             raise ExtractorError('Failed to download JSON', cause=ve) |             raise ExtractorError('Failed to download JSON', cause=ve) | ||||||
|  |  | ||||||
|     def report_warning(self, msg, video_id=None): |     def report_warning(self, msg, video_id=None): | ||||||
|         idstr = u'' if video_id is None else u'%s: ' % video_id |         idstr = '' if video_id is None else '%s: ' % video_id | ||||||
|         self._downloader.report_warning( |         self._downloader.report_warning( | ||||||
|             u'[%s] %s%s' % (self.IE_NAME, idstr, msg)) |             '[%s] %s%s' % (self.IE_NAME, idstr, msg)) | ||||||
|  |  | ||||||
|     def to_screen(self, msg): |     def to_screen(self, msg): | ||||||
|         """Print msg to screen, prefixing it with '[ie_name]'""" |         """Print msg to screen, prefixing it with '[ie_name]'""" | ||||||
|         self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) |         self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg)) | ||||||
|  |  | ||||||
|     def report_extraction(self, id_or_name): |     def report_extraction(self, id_or_name): | ||||||
|         """Report information extraction.""" |         """Report information extraction.""" | ||||||
|         self.to_screen(u'%s: Extracting information' % id_or_name) |         self.to_screen('%s: Extracting information' % id_or_name) | ||||||
|  |  | ||||||
|     def report_download_webpage(self, video_id): |     def report_download_webpage(self, video_id): | ||||||
|         """Report webpage download.""" |         """Report webpage download.""" | ||||||
|         self.to_screen(u'%s: Downloading webpage' % video_id) |         self.to_screen('%s: Downloading webpage' % video_id) | ||||||
|  |  | ||||||
|     def report_age_confirmation(self): |     def report_age_confirmation(self): | ||||||
|         """Report attempt to confirm age.""" |         """Report attempt to confirm age.""" | ||||||
|         self.to_screen(u'Confirming age') |         self.to_screen('Confirming age') | ||||||
|  |  | ||||||
|     def report_login(self): |     def report_login(self): | ||||||
|         """Report attempt to log in.""" |         """Report attempt to log in.""" | ||||||
|         self.to_screen(u'Logging in') |         self.to_screen('Logging in') | ||||||
|  |  | ||||||
|     #Methods for following #608 |     #Methods for following #608 | ||||||
|     @staticmethod |     @staticmethod | ||||||
| @@ -384,7 +386,7 @@ class InfoExtractor(object): | |||||||
|                     break |                     break | ||||||
|  |  | ||||||
|         if os.name != 'nt' and sys.stderr.isatty(): |         if os.name != 'nt' and sys.stderr.isatty(): | ||||||
|             _name = u'\033[0;34m%s\033[0m' % name |             _name = '\033[0;34m%s\033[0m' % name | ||||||
|         else: |         else: | ||||||
|             _name = name |             _name = name | ||||||
|  |  | ||||||
| @@ -394,10 +396,10 @@ class InfoExtractor(object): | |||||||
|         elif default is not _NO_DEFAULT: |         elif default is not _NO_DEFAULT: | ||||||
|             return default |             return default | ||||||
|         elif fatal: |         elif fatal: | ||||||
|             raise RegexNotFoundError(u'Unable to extract %s' % _name) |             raise RegexNotFoundError('Unable to extract %s' % _name) | ||||||
|         else: |         else: | ||||||
|             self._downloader.report_warning(u'unable to extract %s; ' |             self._downloader.report_warning('unable to extract %s; ' | ||||||
|                 u'please report this issue on http://yt-dl.org/bug' % _name) |                 'please report this issue on http://yt-dl.org/bug' % _name) | ||||||
|             return None |             return None | ||||||
|  |  | ||||||
|     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): |     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): | ||||||
| @@ -436,7 +438,7 @@ class InfoExtractor(object): | |||||||
|                 else: |                 else: | ||||||
|                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) |                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) | ||||||
|             except (IOError, netrc.NetrcParseError) as err: |             except (IOError, netrc.NetrcParseError) as err: | ||||||
|                 self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) |                 self._downloader.report_warning('parsing .netrc: %s' % compat_str(err)) | ||||||
|          |          | ||||||
|         return (username, password) |         return (username, password) | ||||||
|  |  | ||||||
| @@ -476,7 +478,7 @@ class InfoExtractor(object): | |||||||
|         return unescapeHTML(escaped) |         return unescapeHTML(escaped) | ||||||
|  |  | ||||||
|     def _og_search_thumbnail(self, html, **kargs): |     def _og_search_thumbnail(self, html, **kargs): | ||||||
|         return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs) |         return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs) | ||||||
|  |  | ||||||
|     def _og_search_description(self, html, **kargs): |     def _og_search_description(self, html, **kargs): | ||||||
|         return self._og_search_property('description', html, fatal=False, **kargs) |         return self._og_search_property('description', html, fatal=False, **kargs) | ||||||
| @@ -535,7 +537,7 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|     def _sort_formats(self, formats): |     def _sort_formats(self, formats): | ||||||
|         if not formats: |         if not formats: | ||||||
|             raise ExtractorError(u'No video formats found') |             raise ExtractorError('No video formats found') | ||||||
|  |  | ||||||
|         def _formats_key(f): |         def _formats_key(f): | ||||||
|             # TODO remove the following workaround |             # TODO remove the following workaround | ||||||
| @@ -555,9 +557,9 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|             if f.get('vcodec') == 'none':  # audio only |             if f.get('vcodec') == 'none':  # audio only | ||||||
|                 if self._downloader.params.get('prefer_free_formats'): |                 if self._downloader.params.get('prefer_free_formats'): | ||||||
|                     ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus'] |                     ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus'] | ||||||
|                 else: |                 else: | ||||||
|                     ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a'] |                     ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a'] | ||||||
|                 ext_preference = 0 |                 ext_preference = 0 | ||||||
|                 try: |                 try: | ||||||
|                     audio_ext_preference = ORDER.index(f['ext']) |                     audio_ext_preference = ORDER.index(f['ext']) | ||||||
| @@ -565,9 +567,9 @@ class InfoExtractor(object): | |||||||
|                     audio_ext_preference = -1 |                     audio_ext_preference = -1 | ||||||
|             else: |             else: | ||||||
|                 if self._downloader.params.get('prefer_free_formats'): |                 if self._downloader.params.get('prefer_free_formats'): | ||||||
|                     ORDER = [u'flv', u'mp4', u'webm'] |                     ORDER = ['flv', 'mp4', 'webm'] | ||||||
|                 else: |                 else: | ||||||
|                     ORDER = [u'webm', u'flv', u'mp4'] |                     ORDER = ['webm', 'flv', 'mp4'] | ||||||
|                 try: |                 try: | ||||||
|                     ext_preference = ORDER.index(f['ext']) |                     ext_preference = ORDER.index(f['ext']) | ||||||
|                 except ValueError: |                 except ValueError: | ||||||
| @@ -609,7 +611,7 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|     def _sleep(self, timeout, video_id, msg_template=None): |     def _sleep(self, timeout, video_id, msg_template=None): | ||||||
|         if msg_template is None: |         if msg_template is None: | ||||||
|             msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' |             msg_template = '%(video_id)s: Waiting for %(timeout)s seconds' | ||||||
|         msg = msg_template % {'video_id': video_id, 'timeout': timeout} |         msg = msg_template % {'video_id': video_id, 'timeout': timeout} | ||||||
|         self.to_screen(msg) |         self.to_screen(msg) | ||||||
|         time.sleep(timeout) |         time.sleep(timeout) | ||||||
| @@ -704,7 +706,7 @@ class SearchInfoExtractor(InfoExtractor): | |||||||
|     def _real_extract(self, query): |     def _real_extract(self, query): | ||||||
|         mobj = re.match(self._make_valid_url(), query) |         mobj = re.match(self._make_valid_url(), query) | ||||||
|         if mobj is None: |         if mobj is None: | ||||||
|             raise ExtractorError(u'Invalid search query "%s"' % query) |             raise ExtractorError('Invalid search query "%s"' % query) | ||||||
|  |  | ||||||
|         prefix = mobj.group('prefix') |         prefix = mobj.group('prefix') | ||||||
|         query = mobj.group('query') |         query = mobj.group('query') | ||||||
| @@ -715,9 +717,9 @@ class SearchInfoExtractor(InfoExtractor): | |||||||
|         else: |         else: | ||||||
|             n = int(prefix) |             n = int(prefix) | ||||||
|             if n <= 0: |             if n <= 0: | ||||||
|                 raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query)) |                 raise ExtractorError('invalid download number %s for query "%s"' % (n, query)) | ||||||
|             elif n > self._MAX_RESULTS: |             elif n > self._MAX_RESULTS: | ||||||
|                 self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) |                 self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) | ||||||
|                 n = self._MAX_RESULTS |                 n = self._MAX_RESULTS | ||||||
|             return self._get_n_results(query, n) |             return self._get_n_results(query, n) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister