1
0
mirror of https://github.com/TeamNewPipe/NewPipe synced 2025-11-16 23:27:15 +00:00

[Bug] Fix missing subtitle text in manually downloaded *.SRT files. (issue #10030)

- Previously, *.SRT files only contained timestamps and sequence numbers, without the actual text content.
- Added recursive text extraction to handle nested tags in TTML
  files.(e.g.: <span> tags)
This commit is contained in:
TransZAllen
2025-08-26 18:19:10 +08:00
parent 9bc8139b8c
commit 2c35db7a07

View File

@@ -54,6 +54,30 @@ public class SrtFromTtmlWriter {
out.write(text.getBytes(charset));
}
/*
* Recursive method to extract text from all nodes
* - This method processes TextNode and <br> tags, recursively
* extracting text from nested tags.
* For example: extract text from nested <span> tags
* - Appends newlines for <br> tags.
*/
private void extractText(final Node node, final StringBuilder text) {
if (node instanceof TextNode) {
text.append(((TextNode) node).text());
} else if (node instanceof Element) {
final Element element = (Element) node;
// <br> is a self-closing HTML tag used to insert a line break.
if (element.tagName().equalsIgnoreCase("br")) {
// Add a newline for <br> tags
text.append(NEW_LINE);
}
}
// Recursively process child nodes
for (final Node child : node.childNodes()) {
extractText(child, text);
}
}
public void build(final SharpStream ttml) throws IOException {
/*
* TTML parser with BASIC support
@@ -81,14 +105,8 @@ public class SrtFromTtmlWriter {
for (final Element paragraph : paragraphList) {
text.setLength(0);
for (final Node children : paragraph.childNodes()) {
if (children instanceof TextNode) {
text.append(((TextNode) children).text());
} else if (children instanceof Element
&& ((Element) children).tagName().equalsIgnoreCase("br")) {
text.append(NEW_LINE);
}
}
// Recursively extract text from all child nodes
extractText(paragraph, text);
if (ignoreEmptyFrames && text.length() < 1) {
continue;