Extract original URL from next_url parameter of verify_age page, before actual extract
This commit is contained in:
		
							
								
								
									
										12
									
								
								youtube-dl
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								youtube-dl
									
									
									
									
									
								
							| @@ -1171,7 +1171,9 @@ class InfoExtractor(object): | |||||||
| class YoutubeIE(InfoExtractor): | class YoutubeIE(InfoExtractor): | ||||||
| 	"""Information extractor for youtube.com.""" | 	"""Information extractor for youtube.com.""" | ||||||
|  |  | ||||||
| 	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' | 	_PREFIX = r'(?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)' | ||||||
|  | 	_VALID_URL = r'^('+_PREFIX+r'(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' | ||||||
|  | 	_VALID_URL_WITH_AGE = r'^('+_PREFIX+')verify_age\?next_url=([^&]+)(?:.+)?$' | ||||||
| 	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' | 	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' | ||||||
| 	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' | 	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' | ||||||
| 	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | 	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | ||||||
| @@ -1335,6 +1337,14 @@ class YoutubeIE(InfoExtractor): | |||||||
| 			return | 			return | ||||||
|  |  | ||||||
| 	def _real_extract(self, url): | 	def _real_extract(self, url): | ||||||
|  | 		# Extract original video URL from URL with age verification, using next_url parameter | ||||||
|  | 		mobj = re.match(self._VALID_URL_WITH_AGE, url) | ||||||
|  | 		if mobj: | ||||||
|  | 			urldecode = lambda x: re.sub(r'%([0-9a-hA-H][0-9a-hA-H])', lambda m: chr(int(m.group(1), 16)), x) | ||||||
|  | 			# Keep original domain. We can probably change to www.youtube.com, but it should not hurt so keep it. | ||||||
|  | 			# We just make sure we do not have double //, in URL, so we strip starting slash in next_url. | ||||||
|  | 			url = mobj.group(1) + re.sub(r'^/', '', urldecode(mobj.group(2))) | ||||||
|  |  | ||||||
| 		# Extract video id from URL | 		# Extract video id from URL | ||||||
| 		mobj = re.match(self._VALID_URL, url) | 		mobj = re.match(self._VALID_URL, url) | ||||||
| 		if mobj is None: | 		if mobj is None: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Witold Baryluk
					Witold Baryluk