Merge branch 'purdeaandrei-save_tags_simpler_only_saves_tags_to_info_json'
This commit is contained in:
		| @@ -181,6 +181,7 @@ class InfoExtractor(object): | |||||||
|                     by YoutubeDL if it's missing) |                     by YoutubeDL if it's missing) | ||||||
|     categories:     A list of categories that the video falls in, for example |     categories:     A list of categories that the video falls in, for example | ||||||
|                     ["Sports", "Berlin"] |                     ["Sports", "Berlin"] | ||||||
|  |     tags:           A list of tags assigned to the video, e.g. ["sweden", "pop music"] | ||||||
|     is_live:        True, False, or None (=unknown). Whether this video is a |     is_live:        True, False, or None (=unknown). Whether this video is a | ||||||
|                     live stream that goes on instead of a fixed-length video. |                     live stream that goes on instead of a fixed-length video. | ||||||
|     start_time:     Time in seconds where the reproduction should start, as |     start_time:     Time in seconds where the reproduction should start, as | ||||||
| @@ -630,6 +631,12 @@ class InfoExtractor(object): | |||||||
|             template % (content_re, property_re), |             template % (content_re, property_re), | ||||||
|         ] |         ] | ||||||
|  |  | ||||||
|  |     @staticmethod | ||||||
|  |     def _meta_regex(prop): | ||||||
|  |         return r'''(?isx)<meta | ||||||
|  |                     (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) | ||||||
|  |                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) | ||||||
|  |  | ||||||
|     def _og_search_property(self, prop, html, name=None, **kargs): |     def _og_search_property(self, prop, html, name=None, **kargs): | ||||||
|         if name is None: |         if name is None: | ||||||
|             name = 'OpenGraph %s' % prop |             name = 'OpenGraph %s' % prop | ||||||
| @@ -660,9 +667,7 @@ class InfoExtractor(object): | |||||||
|         if display_name is None: |         if display_name is None: | ||||||
|             display_name = name |             display_name = name | ||||||
|         return self._html_search_regex( |         return self._html_search_regex( | ||||||
|             r'''(?isx)<meta |             self._meta_regex(name), | ||||||
|                     (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) |  | ||||||
|                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name), |  | ||||||
|             html, display_name, fatal=fatal, group='content', **kwargs) |             html, display_name, fatal=fatal, group='content', **kwargs) | ||||||
|  |  | ||||||
|     def _dc_search_uploader(self, html): |     def _dc_search_uploader(self, html): | ||||||
|   | |||||||
| @@ -329,6 +329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||||||
|                 'upload_date': '20121002', |                 'upload_date': '20121002', | ||||||
|                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', |                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', | ||||||
|                 'categories': ['Science & Technology'], |                 'categories': ['Science & Technology'], | ||||||
|  |                 'tags': ['youtube-dl'], | ||||||
|                 'like_count': int, |                 'like_count': int, | ||||||
|                 'dislike_count': int, |                 'dislike_count': int, | ||||||
|                 'start_time': 1, |                 'start_time': 1, | ||||||
| @@ -343,7 +344,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||||||
|                 'ext': 'mp4', |                 'ext': 'mp4', | ||||||
|                 'upload_date': '20120506', |                 'upload_date': '20120506', | ||||||
|                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', |                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', | ||||||
|                 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', |                 'description': 'md5:782e8651347686cba06e58f71ab51773', | ||||||
|  |                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', | ||||||
|  |                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', | ||||||
|  |                          'iconic ep', 'iconic', 'love', 'it'], | ||||||
|                 'uploader': 'Icona Pop', |                 'uploader': 'Icona Pop', | ||||||
|                 'uploader_id': 'IconaPop', |                 'uploader_id': 'IconaPop', | ||||||
|             } |             } | ||||||
| @@ -1072,6 +1076,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||||||
|         else: |         else: | ||||||
|             video_categories = None |             video_categories = None | ||||||
|  |  | ||||||
|  |         video_tags = [ | ||||||
|  |             unescapeHTML(m.group('content')) | ||||||
|  |             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] | ||||||
|  |  | ||||||
|         # description |         # description | ||||||
|         video_description = get_element_by_id("eow-description", video_webpage) |         video_description = get_element_by_id("eow-description", video_webpage) | ||||||
|         if video_description: |         if video_description: | ||||||
| @@ -1260,6 +1268,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||||||
|             'thumbnail': video_thumbnail, |             'thumbnail': video_thumbnail, | ||||||
|             'description': video_description, |             'description': video_description, | ||||||
|             'categories': video_categories, |             'categories': video_categories, | ||||||
|  |             'tags': video_tags, | ||||||
|             'subtitles': video_subtitles, |             'subtitles': video_subtitles, | ||||||
|             'automatic_captions': automatic_captions, |             'automatic_captions': automatic_captions, | ||||||
|             'duration': video_duration, |             'duration': video_duration, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․