/*\ title: $:/core/modules/utils/parseutils.js type: application/javascript module-type: utils Utility functions concerned with parsing text into tokens. Most functions have the following pattern: * The parameters are: ** `source`: the source string being parsed ** `pos`: the current parse position within the string ** Any further parameters are used to identify the token that is being parsed * The return value is: ** null if the token was not found at the specified position ** an object representing the token with the following standard fields: *** `type`: string indicating the type of the token *** `start`: start position of the token in the source string *** `end`: end position of the token in the source string *** Any further fields required to describe the token The exception is `skipWhiteSpace`, which just returns the position after the whitespace. \*/ "use strict"; /* Look for a whitespace token. Returns null if not found, otherwise returns {type: "whitespace", start:, end:,} */ exports.parseWhiteSpace = function(source,pos) { var p = pos,c; while(true) { c = source.charAt(p); if((c === " ") || (c === "\f") || (c === "\n") || (c === "\r") || (c === "\t") || (c === "\v") || (c === "\u00a0")) { // Ignores some obscure unicode spaces p++; } else { break; } } if(p === pos) { return null; } else { return { type: "whitespace", start: pos, end: p }; } }; /* Convenience wrapper for parseWhiteSpace. Returns the position after the whitespace */ exports.skipWhiteSpace = function(source,pos) { var c; while(true) { c = source.charAt(pos); if((c === " ") || (c === "\f") || (c === "\n") || (c === "\r") || (c === "\t") || (c === "\v") || (c === "\u00a0")) { // Ignores some obscure unicode spaces pos++; } else { return pos; } } }; /* Look for a given string token. Returns null if not found, otherwise returns {type: "token", value:, start:, end:,} */ exports.parseTokenString = function(source,pos,token) { var match = source.indexOf(token,pos) === pos; if(match) { return { type: "token", value: token, start: pos, end: pos + token.length }; } return null; }; /* Look for a token matching a regex. Returns null if not found, otherwise returns {type: "regexp", match:, start:, end:,} Use the "Y" (sticky) flag to avoid searching the entire rest of the string */ exports.parseTokenRegExp = function(source,pos,reToken) { var node = { type: "regexp", start: pos }; reToken.lastIndex = pos; node.match = reToken.exec(source); if(node.match && node.match.index === pos) { node.end = pos + node.match[0].length; return node; } else { return null; } }; /* Look for a string literal. Returns null if not found, otherwise returns {type: "string", value:, start:, end:,} */ exports.parseStringLiteral = function(source,pos) { var node = { type: "string", start: pos }; var reString = /(?:"""([\s\S]*?)"""|"([^"]*)")|(?:'([^']*)')|\[\[((?:[^\]]|\](?!\]))*)\]\]/y; reString.lastIndex = pos; var match = reString.exec(source); if(match && match.index === pos) { node.value = match[1] !== undefined ? match[1] :( match[2] !== undefined ? match[2] : ( match[3] !== undefined ? match[3] : match[4] )); node.end = pos + match[0].length; return node; } else { return null; } }; /* Returns an array of {name:} with an optional "default" property. Options include: requireParenthesis: require the parameter definition to be wrapped in parenthesis */ exports.parseParameterDefinition = function(paramString,options) { options = options || {}; if(options.requireParenthesis) { var parenMatch = /^\s*\((.*)\)\s*$/g.exec(paramString); if(!parenMatch) { return []; } paramString = parenMatch[1]; } var params = [], reParam = /\s*([^:),\s]+)(?:\s*:\s*(?:"""([\s\S]*?)"""|"([^"]*)"|'([^']*)'|([^,"'\s]+)))?/mg, paramMatch = reParam.exec(paramString); while(paramMatch) { // Save the parameter details var paramInfo = {name: paramMatch[1]}, defaultValue = paramMatch[2] || paramMatch[3] || paramMatch[4] || paramMatch[5]; if(defaultValue !== undefined) { // Check for an MVV reference ((varname)) var mvvDefaultMatch = /^\(\(([^)|]+)\)\)$/.exec(defaultValue); if(mvvDefaultMatch) { paramInfo.defaultType = "multivalue-variable"; paramInfo.defaultVariable = mvvDefaultMatch[1]; } else { paramInfo["default"] = defaultValue; } } params.push(paramInfo); // Look for the next parameter paramMatch = reParam.exec(paramString); } return params; }; exports.parseMacroParameters = function(node,source,pos) { // Process parameters var parameter = $tw.utils.parseMacroParameter(source,pos); while(parameter) { node.params.push(parameter); pos = parameter.end; // Get the next parameter parameter = $tw.utils.parseMacroParameter(source,pos); } node.end = pos; return node; }; /* Look for a macro invocation parameter. Returns null if not found, or {type: "macro-parameter", name:, value:, start:, end:} */ exports.parseMacroParameter = function(source,pos) { var node = { type: "macro-parameter", start: pos }; // Define our regexp const reMacroParameter = /(?:([A-Za-z0-9\-_]+)\s*:)?(?:\s*(?:"""([\s\S]*?)"""|"([^"]*)"|'([^']*)'|\[\[((?:[^\]]|\](?!\]))*)\]\]|((?:(?:>(?!>))|[^\s>"'])+)))/y; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for the parameter var token = $tw.utils.parseTokenRegExp(source,pos,reMacroParameter); if(!token) { return null; } pos = token.end; // Get the parameter details node.value = token.match[2] !== undefined ? token.match[2] : ( token.match[3] !== undefined ? token.match[3] : ( token.match[4] !== undefined ? token.match[4] : ( token.match[5] !== undefined ? token.match[5] : ( token.match[6] !== undefined ? token.match[6] : ( "" ) ) ) ) ); if(token.match[1]) { node.name = token.match[1]; } // Update the end position node.end = pos; return node; }; /* Look for a macro invocation. Returns null if not found, or {type: "transclude", attributes:, start:, end:} */ exports.parseMacroInvocationAsTransclusion = function(source,pos) { var node = { type: "transclude", start: pos, attributes: {}, orderedAttributes: [] }; // Define our regexps var reVarName = /([^\s>"'=:]+)/y; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for a double opening angle bracket var token = $tw.utils.parseTokenString(source,pos,"<<"); if(!token) { return null; } pos = token.end; // Get the variable name for the macro token = $tw.utils.parseTokenRegExp(source,pos,reVarName); if(!token) { return null; } $tw.utils.addAttributeToParseTreeNode(node,"$variable",token.match[1]); pos = token.end; // Check that the tag is terminated by a space or >>, and that there is a closing >> somewhere ahead if(!(source.charAt(pos) === ">" && source.charAt(pos + 1) === ">") ) { if(source.indexOf(">>",pos) === -1) { return null; } } // Process attributes pos = $tw.utils.parseMacroParametersAsAttributes(node,source,pos); // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for a double closing angle bracket token = $tw.utils.parseTokenString(source,pos,">>"); if(!token) { return null; } node.end = token.end; return node; }; /* Look for an MVV (multi-valued variable) reference as a transclusion, i.e. ((varname)) or ((varname params)) Returns null if not found, or a parse tree node of type "transclude" with isMVV: true */ exports.parseMVVReferenceAsTransclusion = function(source,pos) { var node = { type: "transclude", isMVV: true, start: pos, attributes: {}, orderedAttributes: [] }; // Define our regexps var reVarName = /([^\s>"'=:)]+)/y; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for a double opening parenthesis var token = $tw.utils.parseTokenString(source,pos,"(("); if(!token) { return null; } pos = token.end; // Get the variable name token = $tw.utils.parseTokenRegExp(source,pos,reVarName); if(!token) { return null; } $tw.utils.addAttributeToParseTreeNode(node,"$variable",token.match[1]); pos = token.end; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for a double closing parenthesis token = $tw.utils.parseTokenString(source,pos,"))"); if(!token) { return null; } node.end = token.end; return node; }; /* Parse macro parameters as attributes. Returns the position after the last attribute */ exports.parseMacroParametersAsAttributes = function(node,source,pos) { var position = 0, attribute = $tw.utils.parseMacroParameterAsAttribute(source,pos); while(attribute) { if(!attribute.name) { attribute.name = (position++) + ""; attribute.isPositional = true; } node.orderedAttributes.push(attribute); node.attributes[attribute.name] = attribute; pos = attribute.end; // Get the next attribute attribute = $tw.utils.parseMacroParameterAsAttribute(source,pos); } node.end = pos; return pos; }; /* Parse a macro parameter as an attribute. Returns null if not found, otherwise returns {name:, type: "filtered|string|indirect|macro", value|filter|textReference:, start:, end:,}, with the name being optional */ exports.parseMacroParameterAsAttribute = function(source,pos) { var node = { start: pos }; // Define our regexps var reAttributeName = /([^\/\s>"'`=:]+)/y, reStrictIdentifier = /^[A-Za-z0-9\-_]+$/, reUnquotedAttribute = /(?!<<)((?:(?:>(?!>))|[^\s>"'])+)/y, reFilteredValue = /\{\{\{([\S\s]+?)\}\}\}/y, reIndirectValue = /\{\{([^\}]+)\}\}/y, reSubstitutedValue = /(?:```([\s\S]*?)```|`([^`]|[\S\s]*?)`)/y; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Get the attribute name and the separator token var nameToken = $tw.utils.parseTokenRegExp(source,pos,reAttributeName), namePos = nameToken && $tw.utils.skipWhiteSpace(source,nameToken.end), separatorToken = nameToken && $tw.utils.parseTokenRegExp(source,namePos,/=|:/y), isNewStyleSeparator = false; // If there is no separator then we don't allow new style values // Colon separator requires a strict identifier name to avoid mis-parsing values like $:/foo if(nameToken && separatorToken && separatorToken.match[0] === ":" && !reStrictIdentifier.test(nameToken.match[1])) { nameToken = null; separatorToken = null; } // If we have a name and a separator then we have a named attribute if(nameToken && separatorToken) { node.name = nameToken.match[1]; // key value separator is `=` or `:` node.assignmentOperator = separatorToken.match[0]; pos = separatorToken.end; isNewStyleSeparator = (node.assignmentOperator === "="); } // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); do { // Look for a string literal var stringLiteral = $tw.utils.parseStringLiteral(source,pos); if(stringLiteral) { pos = stringLiteral.end; node.type = "string"; node.value = stringLiteral.value; // Mark the value as having been quoted in the source node.quoted = true; break; } if(isNewStyleSeparator) { // Look for a filtered value var filteredValue = $tw.utils.parseTokenRegExp(source,pos,reFilteredValue); if(filteredValue) { pos = filteredValue.end; node.type = "filtered"; node.filter = filteredValue.match[1]; break; } // Look for an indirect value var indirectValue = $tw.utils.parseTokenRegExp(source,pos,reIndirectValue); if(indirectValue) { pos = indirectValue.end; node.type = "indirect"; node.textReference = indirectValue.match[1]; break; } // Look for a macro invocation value var macroInvocation = $tw.utils.parseMacroInvocationAsTransclusion(source,pos); if(macroInvocation) { pos = macroInvocation.end; node.type = "macro"; node.value = macroInvocation; break; } // Look for an MVV reference value var mvvReference = $tw.utils.parseMVVReferenceAsTransclusion(source,pos); if(mvvReference) { pos = mvvReference.end; node.type = "macro"; node.value = mvvReference; node.isMVV = true; break; } // Look for a substituted value var substitutedValue = $tw.utils.parseTokenRegExp(source,pos,reSubstitutedValue); if(substitutedValue) { pos = substitutedValue.end; node.type = "substituted"; node.rawValue = substitutedValue.match[1] || substitutedValue.match[2]; break; } } // Look for a unquoted value var unquotedValue = $tw.utils.parseTokenRegExp(source,pos,reUnquotedAttribute); if(unquotedValue) { pos = unquotedValue.end; node.type = "string"; node.value = unquotedValue.match[1]; break; // redundant, but leaving for consistency } } while(false); // Bail if we don't have a value if(!node.type) { return null; } // Update the end position node.end = pos; return node; }; /* Look for a macro invocation. Returns null if not found, or {type: "macrocall", name:, params:, start:, end:} */ exports.parseMacroInvocation = function(source,pos) { var node = { type: "macrocall", start: pos, params: [] }; // Define our regexps const reMacroName = /([^\s>"'=]+)/y; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for a double less than sign var token = $tw.utils.parseTokenString(source,pos,"<<"); if(!token) { return null; } pos = token.end; // Get the macro name var name = $tw.utils.parseTokenRegExp(source,pos,reMacroName); if(!name) { return null; } node.name = name.match[1]; pos = name.end; node = $tw.utils.parseMacroParameters(node,source,pos); pos = node.end; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for a double greater than sign token = $tw.utils.parseTokenString(source,pos,">>"); if(!token) { return null; } pos = token.end; // Update the end position node.end = pos; return node; }; exports.parseFilterVariable = function(source) { var node = { name: "", params: [], }, pos = 0, reName = /([^\s"']+)/y; // If there is no whitespace or it is an empty string then there are no macro parameters if(/^\S*$/.test(source)) { node.name = source; return node; } // Get the variable name var nameMatch = $tw.utils.parseTokenRegExp(source,pos,reName); if(nameMatch) { node.name = nameMatch.match[1]; pos = nameMatch.end; node = $tw.utils.parseMacroParameters(node,source,pos); delete node.end; } return node; }; /* Look for an HTML attribute definition. Returns null if not found, otherwise returns {name:, type: "filtered|string|indirect|macro", value|filter|textReference:, start:, end:,} */ exports.parseAttribute = function(source,pos) { var node = { start: pos }; // Define our regexps const reAttributeName = /([^\/\s>"'`=]+)/y, reUnquotedAttribute = /([^\/\s<>"'`=]+)/y, reFilteredValue = /\{\{\{([\S\s]+?)\}\}\}/y, reIndirectValue = /\{\{([^\}]+)\}\}/y, reSubstitutedValue = /(?:```([\s\S]*?)```|`([^`]|[\S\s]*?)`)/y; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Get the attribute name var name = $tw.utils.parseTokenRegExp(source,pos,reAttributeName); if(!name) { return null; } node.name = name.match[1]; pos = name.end; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for an equals sign var token = $tw.utils.parseTokenString(source,pos,"="); if(token) { pos = token.end; // Skip whitespace pos = $tw.utils.skipWhiteSpace(source,pos); // Look for a string literal var stringLiteral = $tw.utils.parseStringLiteral(source,pos); if(stringLiteral) { pos = stringLiteral.end; node.type = "string"; node.value = stringLiteral.value; } else { // Look for a filtered value var filteredValue = $tw.utils.parseTokenRegExp(source,pos,reFilteredValue); if(filteredValue) { pos = filteredValue.end; node.type = "filtered"; node.filter = filteredValue.match[1]; } else { // Look for an indirect value var indirectValue = $tw.utils.parseTokenRegExp(source,pos,reIndirectValue); if(indirectValue) { pos = indirectValue.end; node.type = "indirect"; node.textReference = indirectValue.match[1]; } else { // Look for a macro invocation value var macroInvocation = $tw.utils.parseMacroInvocationAsTransclusion(source,pos); if(macroInvocation) { pos = macroInvocation.end; node.type = "macro"; node.value = macroInvocation; } else { // Look for an MVV reference value var mvvReference = $tw.utils.parseMVVReferenceAsTransclusion(source,pos); if(mvvReference) { pos = mvvReference.end; node.type = "macro"; node.value = mvvReference; node.isMVV = true; } else { var substitutedValue = $tw.utils.parseTokenRegExp(source,pos,reSubstitutedValue); if(substitutedValue) { pos = substitutedValue.end; node.type = "substituted"; node.rawValue = substitutedValue.match[1] || substitutedValue.match[2]; } else { // Look for a unquoted value var unquotedValue = $tw.utils.parseTokenRegExp(source,pos,reUnquotedAttribute); if(unquotedValue) { pos = unquotedValue.end; node.type = "string"; node.value = unquotedValue.match[1]; } else if(source.charAt(pos) === "<" && source.charAt(pos + 1) === "<" && source.indexOf(">>",pos) !== -1) { // Value looks like a macro invocation (starts with << with a closing >> ahead) but does not parse as one. Return null so the enclosing tag fails to parse rather than silently binding the attribute to "true" and treating the remainder as further attributes (restores v5.3.8 behaviour) return null; } else { node.type = "string"; node.value = "true"; } } } } } } } } else { // If there is no equals sign or colon, then this is an attribute with no value, defaulting to "true" node.type = "string"; node.value = "true"; } // Update the end position node.end = pos; return node; };