Deal with implicitly UTF-16 decoded webpages
These webpages don't specify an encoding and rely on the BOM
This commit is contained in:
		| @@ -220,6 +220,8 @@ class InfoExtractor(object): | ||||
|                           webpage_bytes[:1024]) | ||||
|             if m: | ||||
|                 encoding = m.group(1).decode('ascii') | ||||
|             elif webpage_bytes.startswith(b'\xff\xfe'): | ||||
|                 encoding = 'utf-16' | ||||
|             else: | ||||
|                 encoding = 'utf-8' | ||||
|         if self._downloader.params.get('dump_intermediate_pages', False): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister