[YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457)
This commit is contained in:
		| @@ -1860,27 +1860,6 @@ class YoutubeDL(object): | |||||||
|  |  | ||||||
|     def urlopen(self, req): |     def urlopen(self, req): | ||||||
|         """ Start an HTTP download """ |         """ Start an HTTP download """ | ||||||
|  |  | ||||||
|         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not |  | ||||||
|         # always respected by websites, some tend to give out URLs with non percent-encoded |  | ||||||
|         # non-ASCII characters (see telemb.py, ard.py [#3412]) |  | ||||||
|         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) |  | ||||||
|         # To work around aforementioned issue we will replace request's original URL with |  | ||||||
|         # percent-encoded one |  | ||||||
|         req_is_string = isinstance(req, compat_basestring) |  | ||||||
|         url = req if req_is_string else req.get_full_url() |  | ||||||
|         url_escaped = escape_url(url) |  | ||||||
|  |  | ||||||
|         # Substitute URL if any change after escaping |  | ||||||
|         if url != url_escaped: |  | ||||||
|             if req_is_string: |  | ||||||
|                 req = url_escaped |  | ||||||
|             else: |  | ||||||
|                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request |  | ||||||
|                 req = req_type( |  | ||||||
|                     url_escaped, data=req.data, headers=req.headers, |  | ||||||
|                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) |  | ||||||
|  |  | ||||||
|         return self._opener.open(req, timeout=self._socket_timeout) |         return self._opener.open(req, timeout=self._socket_timeout) | ||||||
|  |  | ||||||
|     def print_debug_header(self): |     def print_debug_header(self): | ||||||
|   | |||||||
| @@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | |||||||
|         return ret |         return ret | ||||||
|  |  | ||||||
|     def http_request(self, req): |     def http_request(self, req): | ||||||
|  |         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not | ||||||
|  |         # always respected by websites, some tend to give out URLs with non percent-encoded | ||||||
|  |         # non-ASCII characters (see telemb.py, ard.py [#3412]) | ||||||
|  |         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) | ||||||
|  |         # To work around aforementioned issue we will replace request's original URL with | ||||||
|  |         # percent-encoded one | ||||||
|  |         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) | ||||||
|  |         # the code of this workaround has been moved here from YoutubeDL.urlopen() | ||||||
|  |         url = req.get_full_url() | ||||||
|  |         url_escaped = escape_url(url) | ||||||
|  |  | ||||||
|  |         # Substitute URL if any change after escaping | ||||||
|  |         if url != url_escaped: | ||||||
|  |             req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request | ||||||
|  |             new_req = req_type( | ||||||
|  |                 url_escaped, data=req.data, headers=req.headers, | ||||||
|  |                 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) | ||||||
|  |             new_req.timeout = req.timeout | ||||||
|  |             req = new_req | ||||||
|  |  | ||||||
|         for h, v in std_headers.items(): |         for h, v in std_headers.items(): | ||||||
|             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 |             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 | ||||||
|             # The dict keys are capitalized because of this bug by urllib |             # The dict keys are capitalized because of this bug by urllib | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․