[extractor/common] Add the encoding parameter
The QQMusic info extractor need forced encoding for correct working.
This commit is contained in:
		| @@ -324,7 +324,7 @@ class InfoExtractor(object): | ||||
|                 self._downloader.report_warning(errmsg) | ||||
|                 return False | ||||
|  | ||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): | ||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None): | ||||
|         """ Returns a tuple (page content as string, URL handle) """ | ||||
|         # Strip hashes from the URL (#1038) | ||||
|         if isinstance(url_or_request, (compat_str, str)): | ||||
| @@ -334,14 +334,11 @@ class InfoExtractor(object): | ||||
|         if urlh is False: | ||||
|             assert not fatal | ||||
|             return False | ||||
|         content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) | ||||
|         content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding) | ||||
|         return (content, urlh) | ||||
|  | ||||
|     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None): | ||||
|         content_type = urlh.headers.get('Content-Type', '') | ||||
|         webpage_bytes = urlh.read() | ||||
|         if prefix is not None: | ||||
|             webpage_bytes = prefix + webpage_bytes | ||||
|     @staticmethod | ||||
|     def _guess_encoding_from_content(content_type, webpage_bytes): | ||||
|         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) | ||||
|         if m: | ||||
|             encoding = m.group(1) | ||||
| @@ -354,6 +351,16 @@ class InfoExtractor(object): | ||||
|                 encoding = 'utf-16' | ||||
|             else: | ||||
|                 encoding = 'utf-8' | ||||
|  | ||||
|         return encoding | ||||
|  | ||||
|     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): | ||||
|         content_type = urlh.headers.get('Content-Type', '') | ||||
|         webpage_bytes = urlh.read() | ||||
|         if prefix is not None: | ||||
|             webpage_bytes = prefix + webpage_bytes | ||||
|         if not encoding: | ||||
|             encoding = self._guess_encoding_from_content(content_type, webpage_bytes) | ||||
|         if self._downloader.params.get('dump_intermediate_pages', False): | ||||
|             try: | ||||
|                 url = url_or_request.get_full_url() | ||||
| @@ -410,13 +417,13 @@ class InfoExtractor(object): | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5): | ||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): | ||||
|         """ Returns the data of the page as a string """ | ||||
|         success = False | ||||
|         try_count = 0 | ||||
|         while success is False: | ||||
|             try: | ||||
|                 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) | ||||
|                 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding) | ||||
|                 success = True | ||||
|             except compat_http_client.IncompleteRead as e: | ||||
|                 try_count += 1 | ||||
| @@ -431,10 +438,10 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def _download_xml(self, url_or_request, video_id, | ||||
|                       note='Downloading XML', errnote='Unable to download XML', | ||||
|                       transform_source=None, fatal=True): | ||||
|                       transform_source=None, fatal=True, encoding=None): | ||||
|         """Return the xml as an xml.etree.ElementTree.Element""" | ||||
|         xml_string = self._download_webpage( | ||||
|             url_or_request, video_id, note, errnote, fatal=fatal) | ||||
|             url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding) | ||||
|         if xml_string is False: | ||||
|             return xml_string | ||||
|         if transform_source: | ||||
| @@ -445,9 +452,10 @@ class InfoExtractor(object): | ||||
|                        note='Downloading JSON metadata', | ||||
|                        errnote='Unable to download JSON metadata', | ||||
|                        transform_source=None, | ||||
|                        fatal=True): | ||||
|                        fatal=True, encoding=None): | ||||
|         json_string = self._download_webpage( | ||||
|             url_or_request, video_id, note, errnote, fatal=fatal) | ||||
|             url_or_request, video_id, note, errnote, fatal=fatal, | ||||
|             encoding=encoding) | ||||
|         if (not fatal) and json_string is False: | ||||
|             return None | ||||
|         return self._parse_json( | ||||
|   | ||||
| @@ -24,7 +24,7 @@ class QQMusicIE(InfoExtractor): | ||||
|             'title': '可惜没如果', | ||||
|             'upload_date': '20141227', | ||||
|             'creator': '林俊杰', | ||||
|             'description': 'md5:242c97c2847e0495583b7b13764f7106', | ||||
|             'description': 'md5:4348ff1dd24036906baa7b6f973f8d30', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
| @@ -41,7 +41,7 @@ class QQMusicIE(InfoExtractor): | ||||
|         detail_info_page = self._download_webpage( | ||||
|             'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, | ||||
|             mid, note='Download song detail info', | ||||
|             errnote='Unable to get song detail info') | ||||
|             errnote='Unable to get song detail info', encoding='gbk') | ||||
|  | ||||
|         song_name = self._html_search_regex( | ||||
|             r"songname:\s*'([^']+)'", detail_info_page, 'song name') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan