[YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457)
This commit is contained in:
		| @@ -1860,27 +1860,6 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def urlopen(self, req): | ||||
|         """ Start an HTTP download """ | ||||
|  | ||||
|         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not | ||||
|         # always respected by websites, some tend to give out URLs with non percent-encoded | ||||
|         # non-ASCII characters (see telemb.py, ard.py [#3412]) | ||||
|         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) | ||||
|         # To work around aforementioned issue we will replace request's original URL with | ||||
|         # percent-encoded one | ||||
|         req_is_string = isinstance(req, compat_basestring) | ||||
|         url = req if req_is_string else req.get_full_url() | ||||
|         url_escaped = escape_url(url) | ||||
|  | ||||
|         # Substitute URL if any change after escaping | ||||
|         if url != url_escaped: | ||||
|             if req_is_string: | ||||
|                 req = url_escaped | ||||
|             else: | ||||
|                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request | ||||
|                 req = req_type( | ||||
|                     url_escaped, data=req.data, headers=req.headers, | ||||
|                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) | ||||
|  | ||||
|         return self._opener.open(req, timeout=self._socket_timeout) | ||||
|  | ||||
|     def print_debug_header(self): | ||||
|   | ||||
| @@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | ||||
|         return ret | ||||
|  | ||||
|     def http_request(self, req): | ||||
|         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not | ||||
|         # always respected by websites, some tend to give out URLs with non percent-encoded | ||||
|         # non-ASCII characters (see telemb.py, ard.py [#3412]) | ||||
|         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) | ||||
|         # To work around aforementioned issue we will replace request's original URL with | ||||
|         # percent-encoded one | ||||
|         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) | ||||
|         # the code of this workaround has been moved here from YoutubeDL.urlopen() | ||||
|         url = req.get_full_url() | ||||
|         url_escaped = escape_url(url) | ||||
|  | ||||
|         # Substitute URL if any change after escaping | ||||
|         if url != url_escaped: | ||||
|             req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request | ||||
|             new_req = req_type( | ||||
|                 url_escaped, data=req.data, headers=req.headers, | ||||
|                 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) | ||||
|             new_req.timeout = req.timeout | ||||
|             req = new_req | ||||
|  | ||||
|         for h, v in std_headers.items(): | ||||
|             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 | ||||
|             # The dict keys are capitalized because of this bug by urllib | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․