Deal with implicitly UTF-16 decoded webpages
These webpages don't specify an encoding and rely on the BOM
This commit is contained in:
		| @@ -220,6 +220,8 @@ class InfoExtractor(object): | |||||||
|                           webpage_bytes[:1024]) |                           webpage_bytes[:1024]) | ||||||
|             if m: |             if m: | ||||||
|                 encoding = m.group(1).decode('ascii') |                 encoding = m.group(1).decode('ascii') | ||||||
|  |             elif webpage_bytes.startswith(b'\xff\xfe'): | ||||||
|  |                 encoding = 'utf-16' | ||||||
|             else: |             else: | ||||||
|                 encoding = 'utf-8' |                 encoding = 'utf-8' | ||||||
|         if self._downloader.params.get('dump_intermediate_pages', False): |         if self._downloader.params.get('dump_intermediate_pages', False): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister