[utils] Support TTML without default namespace
In a strict sense such TTML is invalid, but Yahoo uses it.
This commit is contained in:
		| @@ -621,6 +621,21 @@ Line | |||||||
| ''' | ''' | ||||||
|         self.assertEqual(dfxp2srt(dfxp_data), srt_data) |         self.assertEqual(dfxp2srt(dfxp_data), srt_data) | ||||||
|  |  | ||||||
|  |         dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?> | ||||||
|  |             <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> | ||||||
|  |             <body> | ||||||
|  |                 <div xml:lang="en"> | ||||||
|  |                     <p begin="0" end="1">The first line</p> | ||||||
|  |                 </div> | ||||||
|  |             </body> | ||||||
|  |             </tt>''' | ||||||
|  |         srt_data = '''1 | ||||||
|  | 00:00:00,000 --> 00:00:01,000 | ||||||
|  | The first line | ||||||
|  |  | ||||||
|  | ''' | ||||||
|  |         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -1848,9 +1848,9 @@ def dfxp2srt(dfxp_data): | |||||||
|         out = str_or_empty(node.text) |         out = str_or_empty(node.text) | ||||||
|  |  | ||||||
|         for child in node: |         for child in node: | ||||||
|             if child.tag == _x('ttml:br'): |             if child.tag in (_x('ttml:br'), 'br'): | ||||||
|                 out += '\n' + str_or_empty(child.tail) |                 out += '\n' + str_or_empty(child.tail) | ||||||
|             elif child.tag == _x('ttml:span'): |             elif child.tag in (_x('ttml:span'), 'span'): | ||||||
|                 out += str_or_empty(parse_node(child)) |                 out += str_or_empty(parse_node(child)) | ||||||
|             else: |             else: | ||||||
|                 out += str_or_empty(xml.etree.ElementTree.tostring(child)) |                 out += str_or_empty(xml.etree.ElementTree.tostring(child)) | ||||||
| @@ -1859,7 +1859,10 @@ def dfxp2srt(dfxp_data): | |||||||
|  |  | ||||||
|     dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) |     dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) | ||||||
|     out = [] |     out = [] | ||||||
|     paras = dfxp.findall(_x('.//ttml:p')) |     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') | ||||||
|  |  | ||||||
|  |     if not paras: | ||||||
|  |         raise ValueError('Invalid dfxp/TTML subtitle') | ||||||
|  |  | ||||||
|     for para, index in zip(paras, itertools.count(1)): |     for para, index in zip(paras, itertools.count(1)): | ||||||
|         begin_time = parse_dfxp_time_expr(para.attrib['begin']) |         begin_time = parse_dfxp_time_expr(para.attrib['begin']) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan