[utils] Handle HTMLParseError in extract_attributes (closes #13349)
This commit is contained in:
		| @@ -916,6 +916,8 @@ class TestUtil(unittest.TestCase): | ||||
|             supports_outside_bmp = False | ||||
|         if supports_outside_bmp: | ||||
|             self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'}) | ||||
|         # Malformed HTML should not break attributes extraction on older Python | ||||
|         self.assertEqual(extract_attributes('<mal"formed/>'), {}) | ||||
|  | ||||
|     def test_clean_html(self): | ||||
|         self.assertEqual(clean_html('a:\nb'), 'a: b') | ||||
|   | ||||
| @@ -36,6 +36,7 @@ import xml.etree.ElementTree | ||||
| import zlib | ||||
|  | ||||
| from .compat import ( | ||||
|     compat_HTMLParseError, | ||||
|     compat_HTMLParser, | ||||
|     compat_basestring, | ||||
|     compat_chr, | ||||
| @@ -409,8 +410,12 @@ def extract_attributes(html_element): | ||||
|     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. | ||||
|     """ | ||||
|     parser = HTMLAttributeParser() | ||||
|     parser.feed(html_element) | ||||
|     parser.close() | ||||
|     try: | ||||
|         parser.feed(html_element) | ||||
|         parser.close() | ||||
|     # Older Python may throw HTMLParseError in case of malformed HTML | ||||
|     except compat_HTMLParseError: | ||||
|         pass | ||||
|     return parser.attrs | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․