[youtube] Improve tags extraction and add test
This commit is contained in:
		| @@ -329,6 +329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'upload_date': '20121002', | ||||
|                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', | ||||
|                 'categories': ['Science & Technology'], | ||||
|                 'tags': ['youtube-dl'], | ||||
|                 'like_count': int, | ||||
|                 'dislike_count': int, | ||||
|                 'start_time': 1, | ||||
| @@ -343,7 +344,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20120506', | ||||
|                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', | ||||
|                 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', | ||||
|                 'description': 'md5:782e8651347686cba06e58f71ab51773', | ||||
|                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', | ||||
|                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', | ||||
|                          'iconic ep', 'iconic', 'love', 'it'], | ||||
|                 'uploader': 'Icona Pop', | ||||
|                 'uploader_id': 'IconaPop', | ||||
|             } | ||||
| @@ -1072,8 +1076,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         else: | ||||
|             video_categories = None | ||||
|  | ||||
|         video_tags = re.findall(r'''<meta(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+property=['"]?og:video:tag['"]?(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?\s+content=['"]?([^>'"]+?)['"]?\s*>''' | ||||
|         , video_webpage, re.DOTALL | re.IGNORECASE); | ||||
|         video_tags = [ | ||||
|             unescapeHTML(m.group('content')) | ||||
|             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] | ||||
|  | ||||
|         # description | ||||
|         video_description = get_element_by_id("eow-description", video_webpage) | ||||
|         if video_description: | ||||
| @@ -1261,8 +1267,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             'title': video_title, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'description': video_description, | ||||
|             'tags' : video_tags, | ||||
|             'categories': video_categories, | ||||
|             'tags': video_tags, | ||||
|             'subtitles': video_subtitles, | ||||
|             'automatic_captions': automatic_captions, | ||||
|             'duration': video_duration, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․