[utils] Handle HTMLParseError in extract_attributes (closes #13349)
This commit is contained in:
		| @@ -916,6 +916,8 @@ class TestUtil(unittest.TestCase): | |||||||
|             supports_outside_bmp = False |             supports_outside_bmp = False | ||||||
|         if supports_outside_bmp: |         if supports_outside_bmp: | ||||||
|             self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'}) |             self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'}) | ||||||
|  |         # Malformed HTML should not break attributes extraction on older Python | ||||||
|  |         self.assertEqual(extract_attributes('<mal"formed/>'), {}) | ||||||
|  |  | ||||||
|     def test_clean_html(self): |     def test_clean_html(self): | ||||||
|         self.assertEqual(clean_html('a:\nb'), 'a: b') |         self.assertEqual(clean_html('a:\nb'), 'a: b') | ||||||
|   | |||||||
| @@ -36,6 +36,7 @@ import xml.etree.ElementTree | |||||||
| import zlib | import zlib | ||||||
|  |  | ||||||
| from .compat import ( | from .compat import ( | ||||||
|  |     compat_HTMLParseError, | ||||||
|     compat_HTMLParser, |     compat_HTMLParser, | ||||||
|     compat_basestring, |     compat_basestring, | ||||||
|     compat_chr, |     compat_chr, | ||||||
| @@ -409,8 +410,12 @@ def extract_attributes(html_element): | |||||||
|     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. |     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. | ||||||
|     """ |     """ | ||||||
|     parser = HTMLAttributeParser() |     parser = HTMLAttributeParser() | ||||||
|     parser.feed(html_element) |     try: | ||||||
|     parser.close() |         parser.feed(html_element) | ||||||
|  |         parser.close() | ||||||
|  |     # Older Python may throw HTMLParseError in case of malformed HTML | ||||||
|  |     except compat_HTMLParseError: | ||||||
|  |         pass | ||||||
|     return parser.attrs |     return parser.attrs | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․