[extractor/common] Allow angle brackets in attributes in _og_regexes (#7215)
This commit is contained in:
		| @@ -37,12 +37,16 @@ class TestInfoExtractor(unittest.TestCase): | |||||||
|             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> |             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> | ||||||
|             <meta content='application/x-shockwave-flash' property='og:video:type'> |             <meta content='application/x-shockwave-flash' property='og:video:type'> | ||||||
|             <meta content='Foo' property=og:foobar> |             <meta content='Foo' property=og:foobar> | ||||||
|  |             <meta name="og:test1" content='foo > < bar'/> | ||||||
|  |             <meta name="og:test2" content="foo >//< bar"/> | ||||||
|             ''' |             ''' | ||||||
|         self.assertEqual(ie._og_search_title(html), 'Foo') |         self.assertEqual(ie._og_search_title(html), 'Foo') | ||||||
|         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') |         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') | ||||||
|         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') |         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') | ||||||
|         self.assertEqual(ie._og_search_video_url(html, default=None), None) |         self.assertEqual(ie._og_search_video_url(html, default=None), None) | ||||||
|         self.assertEqual(ie._og_search_property('foobar', html), 'Foo') |         self.assertEqual(ie._og_search_property('foobar', html), 'Foo') | ||||||
|  |         self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') | ||||||
|  |         self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') | ||||||
|  |  | ||||||
|     def test_html_search_meta(self): |     def test_html_search_meta(self): | ||||||
|         ie = self.ie |         ie = self.ie | ||||||
|   | |||||||
| @@ -645,7 +645,7 @@ class InfoExtractor(object): | |||||||
|     # Helper functions for extracting OpenGraph info |     # Helper functions for extracting OpenGraph info | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def _og_regexes(prop): |     def _og_regexes(prop): | ||||||
|         content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))' |         content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' | ||||||
|         property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)' |         property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)' | ||||||
|                        % {'prop': re.escape(prop)}) |                        % {'prop': re.escape(prop)}) | ||||||
|         template = r'<meta[^>]+?%s[^>]+?%s' |         template = r'<meta[^>]+?%s[^>]+?%s' | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․