TiddlyWiki5/core/modules/parsers/parseutils.js

323 lines
8.6 KiB
JavaScript

/*\
title: $:/core/modules/utils/parseutils.js
type: application/javascript
module-type: utils
Utility functions concerned with parsing text into tokens.
Most functions have the following pattern:
* The parameters are:
** `source`: the source string being parsed
** `pos`: the current parse position within the string
** Any further parameters are used to identify the token that is being parsed
* The return value is:
** null if the token was not found at the specified position
** an object representing the token with the following standard fields:
*** `type`: string indicating the type of the token
*** `start`: start position of the token in the source string
*** `end`: end position of the token in the source string
*** Any further fields required to describe the token
The exception is `skipWhiteSpace`, which just returns the position after the whitespace.
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
/*
Look for a whitespace token. Returns null if not found, otherwise returns {type: "whitespace", start:, end:,}
*/
exports.parseWhiteSpace = function(source,pos) {
var p = pos,c;
while(true) {
c = source.charAt(p);
if((c === " ") || (c === "\f") || (c === "\n") || (c === "\r") || (c === "\t") || (c === "\v") || (c === "\u00a0")) { // Ignores some obscure unicode spaces
p++;
} else {
break;
}
}
if(p === pos) {
return null;
} else {
return {
type: "whitespace",
start: pos,
end: p
}
}
};
/*
Convenience wrapper for parseWhiteSpace. Returns the position after the whitespace
*/
exports.skipWhiteSpace = function(source,pos) {
var c;
while(true) {
c = source.charAt(pos);
if((c === " ") || (c === "\f") || (c === "\n") || (c === "\r") || (c === "\t") || (c === "\v") || (c === "\u00a0")) { // Ignores some obscure unicode spaces
pos++;
} else {
return pos;
}
}
};
/*
Look for a given string token. Returns null if not found, otherwise returns {type: "token", value:, start:, end:,}
*/
exports.parseTokenString = function(source,pos,token) {
var match = source.indexOf(token,pos) === pos;
if(match) {
return {
type: "token",
value: token,
start: pos,
end: pos + token.length
};
}
return null;
};
/*
Look for a token matching a regex at a specified position. Returns null if not found, otherwise returns {type: "regexp", match:, start:, end:,}
Use the "Y" (sticky) flag to avoid searching the entire rest of the string
*/
exports.parseTokenRegExp = function(source,pos,reToken) {
var node = {
type: "regexp",
start: pos
};
reToken.lastIndex = pos;
node.match = reToken.exec(source);
if(node.match && node.match.index === pos) {
node.end = pos + node.match[0].length;
return node;
} else {
return null;
}
};
/*
Look for a string literal. Returns null if not found, otherwise returns {type: "string", value:, start:, end:,}
*/
exports.parseStringLiteral = function(source,pos) {
var node = {
type: "string",
start: pos
};
var reString = /(?:"""([\s\S]*?)"""|"([^"]*)")|(?:'([^']*)')/g;
reString.lastIndex = pos;
var match = reString.exec(source);
if(match && match.index === pos) {
node.value = match[1] !== undefined ? match[1] :(
match[2] !== undefined ? match[2] : match[3]
);
node.end = pos + match[0].length;
return node;
} else {
return null;
}
};
exports.parseMacroParameters = function(node,source,pos) {
// Process parameters
var parameter = $tw.utils.parseMacroParameter(source,pos);
while(parameter) {
node.params.push(parameter);
pos = parameter.end;
// Get the next parameter
parameter = $tw.utils.parseMacroParameter(source,pos);
}
node.end = pos;
return node;
}
/*
Look for a macro invocation parameter. Returns null if not found, or {type: "macro-parameter", name:, value:, start:, end:}
*/
exports.parseMacroParameter = function(source,pos) {
var node = {
type: "macro-parameter",
start: pos
};
// Define our regexp
var reMacroParameter = /(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"""([\s\S]*?)"""|"([^"]*)"|'([^']*)'|\[\[([^\]]*)\]\]|((?:(?:>(?!>))|[^\s>"'])+)))/y;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for the parameter
var token = $tw.utils.parseTokenRegExp(source,pos,reMacroParameter);
if(!token) {
return null;
}
pos = token.end;
// Get the parameter details
node.value = token.match[2] !== undefined ? token.match[2] : (
token.match[3] !== undefined ? token.match[3] : (
token.match[4] !== undefined ? token.match[4] : (
token.match[5] !== undefined ? token.match[5] : (
token.match[6] !== undefined ? token.match[6] : (
""
)
)
)
)
);
if(token.match[1]) {
node.name = token.match[1];
}
// Update the end position
node.end = pos;
return node;
};
/*
Look for a macro invocation. Returns null if not found, or {type: "macrocall", name:, parameters:, start:, end:}
*/
exports.parseMacroInvocation = function(source,pos) {
var node = {
type: "macrocall",
start: pos,
params: []
};
// Define our regexps
var reMacroName = /([^\s>"'=]+)/y;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for a double less than sign
var token = $tw.utils.parseTokenString(source,pos,"<<");
if(!token) {
return null;
}
pos = token.end;
// Get the macro name
var name = $tw.utils.parseTokenRegExp(source,pos,reMacroName);
if(!name) {
return null;
}
node.name = name.match[1];
pos = name.end;
node = $tw.utils.parseMacroParameters(node,source,pos);
pos = node.end;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for a double greater than sign
token = $tw.utils.parseTokenString(source,pos,">>");
if(!token) {
return null;
}
pos = token.end;
// Update the end position
node.end = pos;
return node;
};
exports.parseFilterVariable = function(source) {
var node = {
name: "",
params: [],
},
pos = 0,
reName = /([^\s"']+)/y;
// If there is no whitespace or it is an empty string then there are no macro parameters
if(/^\S*$/.test(source)) {
node.name = source;
return node;
}
// Get the variable name
var nameMatch = $tw.utils.parseTokenRegExp(source,pos,reName);
if(nameMatch) {
node.name = nameMatch.match[1];
pos = nameMatch.end;
node = $tw.utils.parseMacroParameters(node,source,pos);
delete node.end;
}
return node;
};
/*
Look for an HTML attribute definition. Returns null if not found, otherwise returns {type: "attribute", name:, type: "filtered|string|indirect|macro", value|filter|textReference:, start:, end:,}
*/
exports.parseAttribute = function(source,pos) {
var node = {
start: pos
};
// Define our regexps
var reAttributeName = /([^\/\s>"'=]+)/y,
reUnquotedAttribute = /([^\/\s<>"'=]+)/y,
reFilteredValue = /\{\{\{([\S\s]+?)\}\}\}/y,
reIndirectValue = /\{\{([^\}]+)\}\}/y;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Get the attribute name
var name = $tw.utils.parseTokenRegExp(source,pos,reAttributeName);
if(!name) {
return null;
}
node.name = name.match[1];
pos = name.end;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for an equals sign
var token = $tw.utils.parseTokenString(source,pos,"=");
if(token) {
pos = token.end;
// Skip whitespace
pos = $tw.utils.skipWhiteSpace(source,pos);
// Look for a string literal
var stringLiteral = $tw.utils.parseStringLiteral(source,pos);
if(stringLiteral) {
pos = stringLiteral.end;
node.type = "string";
node.value = stringLiteral.value;
} else {
// Look for a filtered value
var filteredValue = $tw.utils.parseTokenRegExp(source,pos,reFilteredValue);
if(filteredValue) {
pos = filteredValue.end;
node.type = "filtered";
node.filter = filteredValue.match[1];
} else {
// Look for an indirect value
var indirectValue = $tw.utils.parseTokenRegExp(source,pos,reIndirectValue);
if(indirectValue) {
pos = indirectValue.end;
node.type = "indirect";
node.textReference = indirectValue.match[1];
} else {
// Look for a unquoted value
var unquotedValue = $tw.utils.parseTokenRegExp(source,pos,reUnquotedAttribute);
if(unquotedValue) {
pos = unquotedValue.end;
node.type = "string";
node.value = unquotedValue.match[1];
} else {
// Look for a macro invocation value
var macroInvocation = $tw.utils.parseMacroInvocation(source,pos);
if(macroInvocation) {
pos = macroInvocation.end;
node.type = "macro";
node.value = macroInvocation;
} else {
node.type = "string";
node.value = "true";
}
}
}
}
}
} else {
node.type = "string";
node.value = "true";
}
// Update the end position
node.end = pos;
return node;
};
})();