1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2025-12-08 01:38:06 +00:00

Merging #7866: Add start and end properties to WikiText AST nodes

commit 5687d9f44b
Author: Gk0Wk <nmg_wk@yeah.net>
Date:   Wed Dec 6 11:33:43 2023 +0800

    Fix for html parser

commit df0a1b184e
Author: Gk0Wk <nmg_wk@yeah.net>
Date:   Wed Dec 6 02:47:47 2023 +0800

    Fix HTML AST node boundary parsing in WikiText

commit ac8dda0a1a
Author: Gk0Wk <nmg_wk@yeah.net>
Date:   Sat Dec 2 13:02:52 2023 +0800

    update test-wikitext-parser.js, change for-const-of -to .utils.each, add more range attributes

commit e2b9a4ed57
Author: Gk0Wk <nmg_wk@yeah.net>
Date:   Wed Nov 29 22:35:39 2023 +0800

    Add more start-end range attributes for AST

commit d3e62ec56a
Author: Gk0Wk <nmg_wk@yeah.net>
Date:   Wed Nov 29 20:45:00 2023 +0800

    Add rule attribute for WikiText AST nodes

commit 4200495055
Author: Gk0Wk <nmg_wk@yeah.net>
Date:   Wed Nov 29 15:48:38 2023 +0800

    Add start and end properties to AST nodes for list, codeblock, and all other elements
This commit is contained in:
Jeremy Ruston
2024-06-08 16:40:20 +01:00
parent 240496d85c
commit e4c682d04b
21 changed files with 245 additions and 99 deletions

View File

@@ -114,7 +114,7 @@ exports.parseStringLiteral = function(source,pos) {
var match = reString.exec(source);
if(match && match.index === pos) {
node.value = match[1] !== undefined ? match[1] :(
match[2] !== undefined ? match[2] : match[3]
match[2] !== undefined ? match[2] : match[3]
);
node.end = pos + match[0].length;
return node;

View File

@@ -29,13 +29,16 @@ exports.init = function(parser) {
exports.parse = function() {
var reEnd = /(\r?\n```$)/mg;
var languageStart = this.parser.pos + 3,
languageEnd = languageStart + this.match[1].length;
// Move past the match
this.parser.pos = this.matchRegExp.lastIndex;
// Look for the end of the block
reEnd.lastIndex = this.parser.pos;
var match = reEnd.exec(this.parser.source),
text;
text,
codeStart = this.parser.pos;
// Process the block
if(match) {
text = this.parser.source.substring(this.parser.pos,match.index);
@@ -48,8 +51,8 @@ exports.parse = function() {
return [{
type: "codeblock",
attributes: {
code: {type: "string", value: text},
language: {type: "string", value: this.match[1]}
code: {type: "string", value: text, start: codeStart, end: this.parser.pos},
language: {type: "string", value: this.match[1], start: languageStart, end: languageEnd}
}
}];
};

View File

@@ -33,7 +33,8 @@ exports.parse = function() {
// Look for the end marker
reEnd.lastIndex = this.parser.pos;
var match = reEnd.exec(this.parser.source),
text;
text,
start = this.parser.pos;
// Process the text
if(match) {
text = this.parser.source.substring(this.parser.pos,match.index);
@@ -47,7 +48,9 @@ exports.parse = function() {
tag: "code",
children: [{
type: "text",
text: text
text: text,
start: start,
end: this.parser.pos
}]
}];
};

View File

@@ -31,6 +31,7 @@ exports.init = function(parser) {
exports.parse = function() {
// Move past the match
var start = this.parser.pos;
this.parser.pos = this.matchRegExp.lastIndex;
// Create the link unless it is suppressed
if(this.match[0].substr(0,1) === "~") {
@@ -46,7 +47,7 @@ exports.parse = function() {
rel: {type: "string", value: "noopener noreferrer"}
},
children: [{
type: "text", text: this.match[0]
type: "text", text: this.match[0], start: start, end: this.parser.pos
}]
}];
}

View File

@@ -31,6 +31,16 @@ exports.init = function(parser) {
exports.parse = function() {
// Move past the match
var filterStart = this.parser.pos + 3;
var filterEnd = filterStart + this.match[1].length;
var toolTipStart = filterEnd + 1;
var toolTipEnd = toolTipStart + (this.match[2] ? this.match[2].length : 0);
var templateStart = toolTipEnd + 2;
var templateEnd = templateStart + (this.match[3] ? this.match[3].length : 0);
var styleStart = templateEnd + 2;
var styleEnd = styleStart + (this.match[4] ? this.match[4].length : 0);
var classesStart = styleEnd + 1;
var classesEnd = classesStart + (this.match[5] ? this.match[5].length : 0);
this.parser.pos = this.matchRegExp.lastIndex;
// Get the match details
var filter = this.match[1],
@@ -42,21 +52,21 @@ exports.parse = function() {
var node = {
type: "list",
attributes: {
filter: {type: "string", value: filter}
filter: {type: "string", value: filter, start: filterStart, end: filterEnd},
},
isBlock: true
};
if(tooltip) {
node.attributes.tooltip = {type: "string", value: tooltip};
node.attributes.tooltip = {type: "string", value: tooltip, start: toolTipStart, end: toolTipEnd};
}
if(template) {
node.attributes.template = {type: "string", value: template};
node.attributes.template = {type: "string", value: template, start: templateStart, end: templateEnd};
}
if(style) {
node.attributes.style = {type: "string", value: style};
node.attributes.style = {type: "string", value: style, start: styleStart, end: styleEnd};
}
if(classes) {
node.attributes.itemClass = {type: "string", value: classes.split(".").join(" ")};
node.attributes.itemClass = {type: "string", value: classes.split(".").join(" "), start: classesStart, end: classesEnd};
}
return [node];
};

View File

@@ -30,6 +30,16 @@ exports.init = function(parser) {
};
exports.parse = function() {
var filterStart = this.parser.pos + 3;
var filterEnd = filterStart + this.match[1].length;
var toolTipStart = filterEnd + 1;
var toolTipEnd = toolTipStart + (this.match[2] ? this.match[2].length : 0);
var templateStart = toolTipEnd + 2;
var templateEnd = templateStart + (this.match[3] ? this.match[3].length : 0);
var styleStart = templateEnd + 2;
var styleEnd = styleStart + (this.match[4] ? this.match[4].length : 0);
var classesStart = styleEnd + 1;
var classesEnd = classesStart + (this.match[5] ? this.match[5].length : 0);
// Move past the match
this.parser.pos = this.matchRegExp.lastIndex;
// Get the match details
@@ -42,20 +52,20 @@ exports.parse = function() {
var node = {
type: "list",
attributes: {
filter: {type: "string", value: filter}
filter: {type: "string", value: filter, start: filterStart, end: filterEnd},
}
};
if(tooltip) {
node.attributes.tooltip = {type: "string", value: tooltip};
node.attributes.tooltip = {type: "string", value: tooltip, start: toolTipStart, end: toolTipEnd};
}
if(template) {
node.attributes.template = {type: "string", value: template};
node.attributes.template = {type: "string", value: template, start: templateStart, end: templateEnd};
}
if(style) {
node.attributes.style = {type: "string", value: style};
node.attributes.style = {type: "string", value: style, start: styleStart, end: styleEnd};
}
if(classes) {
node.attributes.itemClass = {type: "string", value: classes.split(".").join(" ")};
node.attributes.itemClass = {type: "string", value: classes.split(".").join(" "), start: classesStart, end: classesEnd};
}
return [node];
};

View File

@@ -45,10 +45,11 @@ exports.parse = function() {
reEnd.lastIndex = this.parser.pos;
match = reEnd.exec(this.parser.source);
if(match) {
var start = this.parser.pos;
this.parser.pos = reEnd.lastIndex;
// Add a line break if the terminator was a line break
if(match[2]) {
tree.push({type: "element", tag: "br"});
tree.push({type: "element", tag: "br", start: start, end: this.parser.pos});
}
}
} while(match && !match[1]);

View File

@@ -30,15 +30,17 @@ exports.parse = function() {
// Move past the !s
this.parser.pos = this.matchRegExp.lastIndex;
// Parse any classes, whitespace and then the heading itself
var classStart = this.parser.pos;
var classes = this.parser.parseClasses();
var classEnd = this.parser.pos;
this.parser.skipWhitespace({treatNewlinesAsNonWhitespace: true});
var tree = this.parser.parseInlineRun(/(\r?\n)/mg);
// Return the heading
return [{
type: "element",
tag: "h" + headingLevel,
tag: "h" + headingLevel,
attributes: {
"class": {type: "string", value: classes.join(" ")}
"class": {type: "string", value: classes.join(" "), start: classStart, end: classEnd}
},
children: tree
}];

View File

@@ -44,6 +44,10 @@ Parse the most recent match
exports.parse = function() {
// Retrieve the most recent match so that recursive calls don't overwrite it
var tag = this.nextTag;
if (!tag.isSelfClosing) {
tag.openTagStart = tag.start;
tag.openTagEnd = tag.end;
}
this.nextTag = null;
// Advance the parser position to past the tag
this.parser.pos = tag.end;
@@ -60,6 +64,27 @@ exports.parse = function() {
var reEnd = new RegExp("(" + reEndString + ")","mg");
tag.children = this.parser.parseInlineRun(reEnd,{eatTerminator: true});
}
tag.end = this.parser.pos;
tag.closeTagEnd = tag.end;
if (tag.closeTagEnd === tag.openTagEnd || this.parser.source[tag.closeTagEnd - 1] !== '>') {
tag.closeTagStart = tag.end;
} else {
tag.closeTagStart = tag.closeTagEnd - 2;
var closeTagMinPos = tag.children.length > 0 ? tag.children[tag.children.length-1].end : tag.openTagEnd;
if (!Number.isSafeInteger(closeTagMinPos)) closeTagMinPos = tag.openTagEnd;
while (tag.closeTagStart >= closeTagMinPos) {
var char = this.parser.source[tag.closeTagStart];
if (char === '>') {
tag.closeTagStart = -1;
break;
}
if (char === '<') break;
tag.closeTagStart -= 1;
}
if (tag.closeTagStart < closeTagMinPos) {
tag.closeTagStart = tag.end;
}
}
}
// Return the tag
return [tag];

View File

@@ -122,9 +122,9 @@ exports.parseImage = function(source,pos) {
}
pos = token.end;
if(token.match[1]) {
node.attributes.tooltip = {type: "string", value: token.match[1].trim()};
node.attributes.tooltip = {type: "string", value: token.match[1].trim(),start: token.start,end:token.start + token.match[1].length - 1};
}
node.attributes.source = {type: "string", value: (token.match[2] || "").trim()};
node.attributes.source = {type: "string", value: (token.match[2] || "").trim(), start: token.start + (token.match[1] ? token.match[1].length : 0), end: token.end - 2};
// Update the end position
node.end = pos;
return node;

View File

@@ -38,13 +38,14 @@ exports.parse = function() {
// Parse the filter terminated by a line break
var reMatch = /(.*)(?:$|\r?\n)/mg;
reMatch.lastIndex = this.parser.pos;
var filterStart = this.parser.source;
var match = reMatch.exec(this.parser.source);
this.parser.pos = reMatch.lastIndex;
// Parse tree nodes to return
return [{
type: "importvariables",
attributes: {
filter: {type: "string", value: match[1]}
filter: {type: "string", value: match[1], start: filterStart, end: this.parser.pos}
},
children: []
}];

View File

@@ -74,6 +74,7 @@ exports.parse = function() {
// Match the list marker
var reMatch = /([\*#;:>]+)/mg;
reMatch.lastIndex = this.parser.pos;
var start = this.parser.pos;
var match = reMatch.exec(this.parser.source);
if(!match || match.index !== this.parser.pos) {
break;
@@ -94,9 +95,21 @@ exports.parse = function() {
}
// Construct the list element or reuse the previous one at this level
if(listStack.length <= t) {
var listElement = {type: "element", tag: listInfo.listTag, children: [
{type: "element", tag: listInfo.itemTag, children: []}
]};
var listElement = {
type: "element",
tag: listInfo.listTag,
children: [
{
type: "element",
tag: listInfo.itemTag,
children: [],
start: start,
end: this.parser.pos,
}
],
start: start,
end: this.parser.pos,
};
// Link this list element into the last child item of the parent list item
if(t) {
var prevListItem = listStack[t-1].children[listStack[t-1].children.length-1];
@@ -105,21 +118,33 @@ exports.parse = function() {
// Save this element in the stack
listStack[t] = listElement;
} else if(t === (match[0].length - 1)) {
listStack[t].children.push({type: "element", tag: listInfo.itemTag, children: []});
listStack[t].children.push({
type: "element",
tag: listInfo.itemTag,
children: [],
start: start,
end: this.parser.pos,
});
}
}
if(listStack.length > match[0].length) {
listStack.splice(match[0].length,listStack.length - match[0].length);
}
// Process the body of the list item into the last list item
var classStart = this.parser.pos;
var lastListChildren = listStack[listStack.length-1].children,
lastListItem = lastListChildren[lastListChildren.length-1],
classes = this.parser.parseClasses();
var classEnd = this.parser.pos;
this.parser.skipWhitespace({treatNewlinesAsNonWhitespace: true});
var tree = this.parser.parseInlineRun(/(\r?\n)/mg);
lastListItem.children.push.apply(lastListItem.children,tree);
lastListItem.end = this.parser.pos;
listStack[listStack.length-1].end = this.parser.pos;
if(classes.length > 0) {
$tw.utils.addClassToParseTreeNode(lastListItem,classes.join(" "));
lastListItem.attributes.class.start = classStart;
lastListItem.attributes.class.end = classEnd;
}
// Consume any whitespace following the list item
this.parser.skipWhitespace();

View File

@@ -96,15 +96,20 @@ exports.parseLink = function(source,pos) {
splitPos = null;
}
// Pull out the tooltip and URL
var tooltip, URL;
var tooltip, URL, urlStart;
textNode.start = pos;
if(splitPos) {
urlStart = splitPos + 1;
URL = source.substring(splitPos + 1,closePos).trim();
textNode.text = source.substring(pos,splitPos).trim();
textNode.end = splitPos;
} else {
urlStart = pos;
URL = source.substring(pos,closePos).trim();
textNode.text = URL;
textNode.end = closePos;
}
node.attributes.href = {type: "string", value: URL};
node.attributes.href = {type: "string", value: URL, start: urlStart, end: closePos};
node.attributes.target = {type: "string", value: "_blank"};
node.attributes.rel = {type: "string", value: "noopener noreferrer"};
// Update the end position

View File

@@ -29,32 +29,39 @@ exports.init = function(parser) {
exports.parse = function() {
// Move past the match
var start = this.parser.pos + 2;
this.parser.pos = this.matchRegExp.lastIndex;
// Process the link
var text = this.match[1],
link = this.match[2] || text;
link = this.match[2] || text,
textEndPos = this.parser.source.indexOf("|", start);
if (textEndPos < 0 || textEndPos > this.matchRegExp.lastIndex) {
textEndPos = this.matchRegExp.lastIndex - 2;
}
var linkStart = this.match[2] ? (start + this.match[1].length + 1) : start;
var linkEnd = linkStart + link.length;
if($tw.utils.isLinkExternal(link)) {
return [{
type: "element",
tag: "a",
attributes: {
href: {type: "string", value: link},
href: {type: "string", value: link, start: linkStart, end: linkEnd},
"class": {type: "string", value: "tc-tiddlylink-external"},
target: {type: "string", value: "_blank"},
rel: {type: "string", value: "noopener noreferrer"}
},
children: [{
type: "text", text: text
type: "text", text: text, start: start, end: textEndPos
}]
}];
} else {
return [{
type: "link",
attributes: {
to: {type: "string", value: link}
to: {type: "string", value: link, start: linkStart, end: linkEnd}
},
children: [{
type: "text", text: text
type: "text", text: text, start: start, end: textEndPos
}]
}];
}

View File

@@ -28,9 +28,13 @@ exports.parse = function() {
// Move past the <s
this.parser.pos = this.matchRegExp.lastIndex;
// Parse any classes, whitespace and then the optional cite itself
var classStart = this.parser.pos;
classes.push.apply(classes, this.parser.parseClasses());
var classEnd = this.parser.pos;
this.parser.skipWhitespace({treatNewlinesAsNonWhitespace: true});
var citeStart = this.parser.pos;
var cite = this.parser.parseInlineRun(/(\r?\n)/mg);
var citeEnd = this.parser.pos;
// before handling the cite, parse the body of the quote
var tree = this.parser.parseBlocks(reEndString);
// If we got a cite, put it before the text
@@ -38,18 +42,24 @@ exports.parse = function() {
tree.unshift({
type: "element",
tag: "cite",
children: cite
children: cite,
start: citeStart,
end: citeEnd
});
}
// Parse any optional cite
this.parser.skipWhitespace({treatNewlinesAsNonWhitespace: true});
citeStart = this.parser.pos;
cite = this.parser.parseInlineRun(/(\r?\n)/mg);
citeEnd = this.parser.pos;
// If we got a cite, push it
if(cite.length > 0) {
tree.push({
type: "element",
tag: "cite",
children: cite
children: cite,
start: citeStart,
end: citeEnd
});
}
// Return the blockquote element
@@ -57,7 +67,7 @@ exports.parse = function() {
type: "element",
tag: "blockquote",
attributes: {
class: { type: "string", value: classes.join(" ") },
class: { type: "string", value: classes.join(" "), start: classStart, end: classEnd },
},
children: tree
}];

View File

@@ -29,10 +29,11 @@ exports.init = function(parser) {
exports.parse = function() {
var match = this.match[0];
// Move past the match
var start = this.parser.pos;
this.parser.pos = this.matchRegExp.lastIndex;
// Create the link unless it is suppressed
if(match.substr(0,1) === "~") {
return [{type: "text", text: match.substr(1)}];
return [{type: "text", text: match.substr(1), start: start+1, end: this.parser.pos}];
} else {
return [{
type: "link",
@@ -41,10 +42,12 @@ exports.parse = function() {
},
children: [{
type: "text",
text: match
text: match,
start: start,
end: this.parser.pos
}]
}];
}
};
})();
})();

View File

@@ -150,7 +150,7 @@ exports.parse = function() {
} else {
// Otherwise, create a new row if this one is of a different type
if(rowType !== currRowType) {
rowContainer = {type: "element", tag: rowContainerTypes[rowType], children: []};
rowContainer = {type: "element", tag: rowContainerTypes[rowType], children: [], start: this.parser.pos, end: this.parser.pos};
table.children.push(rowContainer);
currRowType = rowType;
}
@@ -178,6 +178,7 @@ exports.parse = function() {
// Increment the row count
rowCount++;
}
rowContainer.end = this.parser.pos;
}
rowMatch = rowRegExp.exec(this.parser.source);
}

View File

@@ -46,6 +46,7 @@ exports.parse = function() {
renderType = this.match[2];
// Move past the match
this.parser.pos = this.matchRegExp.lastIndex;
var start = this.parser.pos;
// Look for the end of the block
reEnd.lastIndex = this.parser.pos;
var match = reEnd.exec(this.parser.source),
@@ -74,7 +75,9 @@ exports.parse = function() {
tag: "pre",
children: [{
type: "text",
text: text
text: text,
start: start,
end: this.parser.pos
}]
}];
}

View File

@@ -36,6 +36,7 @@ exports.parse = function() {
// Get the details of the match
var linkText = this.match[0];
// Move past the macro call
var start = this.parser.pos;
this.parser.pos = this.matchRegExp.lastIndex;
// If the link starts with the unwikilink character then just output it as plain text
if(linkText.substr(0,1) === $tw.config.textPrimitives.unWikiLink) {
@@ -57,7 +58,9 @@ exports.parse = function() {
},
children: [{
type: "text",
text: linkText
text: linkText,
start: start,
end: this.parser.pos
}]
}];
};

View File

@@ -91,6 +91,11 @@ var WikiParser = function(type,text,options) {
} else {
topBranch.push.apply(topBranch,this.parseBlocks());
}
// Build rules' name map
this.usingRuleMap = {};
$tw.utils.each(this.pragmaRules, function (ruleInfo) { self.usingRuleMap[ruleInfo.rule.name] = Object.getPrototypeOf(ruleInfo.rule); });
$tw.utils.each(this.blockRules, function (ruleInfo) { self.usingRuleMap[ruleInfo.rule.name] = Object.getPrototypeOf(ruleInfo.rule); });
$tw.utils.each(this.inlineRules, function (ruleInfo) { self.usingRuleMap[ruleInfo.rule.name] = Object.getPrototypeOf(ruleInfo.rule); });
// Return the parse tree
};
@@ -209,8 +214,13 @@ WikiParser.prototype.parsePragmas = function() {
break;
}
// Process the pragma rule
var start = this.pos;
var subTree = nextMatch.rule.parse();
if(subTree.length > 0) {
// Set the start and end positions of the pragma rule if
if (subTree[0].start === undefined) subTree[0].start = start;
if (subTree[subTree.length - 1].end === undefined) subTree[subTree.length - 1].end = this.pos;
$tw.utils.each(subTree, function (node) { node.rule = nextMatch.rule.name; });
// Quick hack; we only cope with a single parse tree node being returned, which is true at the moment
currentTreeBranch.push.apply(currentTreeBranch,subTree);
subTree[0].children = [];
@@ -235,13 +245,21 @@ WikiParser.prototype.parseBlock = function(terminatorRegExpString) {
// Look for a block rule that applies at the current position
var nextMatch = this.findNextMatch(this.blockRules,this.pos);
if(nextMatch && nextMatch.matchIndex === this.pos) {
return nextMatch.rule.parse();
var start = this.pos;
var subTree = nextMatch.rule.parse();
// Set the start and end positions of the first and last blocks if they're not already set
if (subTree.length > 0) {
if (subTree[0].start === undefined) subTree[0].start = start;
if (subTree[subTree.length - 1].end === undefined) subTree[subTree.length - 1].end = this.pos;
}
$tw.utils.each(subTree, function (node) { node.rule = nextMatch.rule.name; });
return subTree;
}
// Treat it as a paragraph if we didn't find a block rule
var start = this.pos;
var children = this.parseInlineRun(terminatorRegExp);
var end = this.pos;
return [{type: "element", tag: "p", children: children, start: start, end: end }];
return [{type: "element", tag: "p", children: children, start: start, end: end, rule: null }];
};
/*
@@ -332,7 +350,16 @@ WikiParser.prototype.parseInlineRunUnterminated = function(options) {
this.pos = nextMatch.matchIndex;
}
// Process the run rule
tree.push.apply(tree,nextMatch.rule.parse());
var start = this.pos;
var subTree = nextMatch.rule.parse();
// Set the start and end positions of the first and last child if they're not already set
if (subTree.length > 0) {
// Set the start and end positions of the first and last child if they're not already set
if (subTree[0].start === undefined) subTree[0].start = start;
if (subTree[subTree.length - 1].end === undefined) subTree[subTree.length - 1].end = this.pos;
}
$tw.utils.each(subTree, function (node) { node.rule = nextMatch.rule.name; });
tree.push.apply(tree,subTree);
// Look for the next run rule
nextMatch = this.findNextMatch(this.inlineRules,this.pos);
}
@@ -383,7 +410,15 @@ WikiParser.prototype.parseInlineRunTerminatedExtended = function(terminatorRegEx
this.pos = inlineRuleMatch.matchIndex;
}
// Process the inline rule
tree.push.apply(tree,inlineRuleMatch.rule.parse());
var start = this.pos;
var subTree = inlineRuleMatch.rule.parse();
// Set the start and end positions of the first and last child if they're not already set
if (subTree.length > 0) {
if (subTree[0].start === undefined) subTree[0].start = start;
if (subTree[subTree.length - 1].end === undefined) subTree[subTree.length - 1].end = this.pos;
}
$tw.utils.each(subTree, function (node) { node.rule = inlineRuleMatch.rule.name; });
tree.push.apply(tree,subTree);
// Look for the next inline rule
inlineRuleMatch = this.findNextMatch(this.inlineRules,this.pos);
// Look for the next terminator match
@@ -409,7 +444,7 @@ WikiParser.prototype.pushTextWidget = function(array,text,start,end) {
text = $tw.utils.trim(text);
}
if(text) {
array.push({type: "text", text: text, start: start, end: end});
array.push({type: "text", text: text, start: start, end: end, rule: null});
}
};
@@ -462,4 +497,3 @@ WikiParser.prototype.amendRules = function(type,names) {
exports["text/vnd.tiddlywiki"] = WikiParser;
})();