1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2025-04-08 19:56:39 +00:00

simplify sanitation logic and fix inline docs

This commit is contained in:
pmario 2024-06-09 14:06:49 +02:00
parent 04a916d4a3
commit 8a04a09a68
3 changed files with 31 additions and 47 deletions

View File

@ -38,15 +38,16 @@ exports.htmlBlockElements = "address,article,aside,audio,blockquote,canvas,dd,de
exports.htmlUnsafeElements = "script".split(",");
// See: https://html.spec.whatwg.org/#valid-custom-element-name
// Custom Web Components: https://html.spec.whatwg.org/#valid-custom-element-name
exports.htmlForbiddenTags = "annotation-xml,color-profile,font-face,font-face-src,font-face-uri,font-face-format,font-face-name,missing-glyph".split(",");
// (EBNF notation) - PotentialCustomElementName ::= [a-z] (PCENChar)* '-' (PCENChar)*
// Unicode table with ranges see: https://symbl.cc/en/unicode-table
// The "prefix" is obligatory!
exports.htmlCustomPrimitives = {
prefix: "[a-z]",
validPCENChar: ".|[0-9]|_|[a-z]|\xB7|[\xC0-\xD6]|[\xD8-\xF6]|[\u00F8-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u203F-\u2040]|[\u2070-\u218F]|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]",
nonValidPCENChar: "[\x00-\x2C]|\x2F|[\x3A-\x40]|[\x5B-\x60]|[\x7B-\xB6]|[\xB8-\xBF]|\xD7|\xF7|\x37E|[\u2000\u200B]|[\u200E-\u203E]|[\u2041-\u206F]|[\u2190-\u2BFF]|[\u2FF0-\u3000]|[\uD800-\uF8FF]|[\uFDD0-\uFDEF]|[\uFFFE-\uFFFF]"
};
sanitizePCENChar: "[\x00-\x2C]|\x2F|[\x3A-\x40]|[\x5B-\x60]|[\x7B-\xB6]|[\xB8-\xBF]|\xD7|\xF7|\x37E|[\u2000\u200B]|[\u200E-\u203E]|[\u2041-\u206F]|[\u2190-\u2BFF]|[\u2FF0-\u3000]|[\uD800-\uF8FF]|[\uFDD0-\uFDEF]|[\uFFFE-\uFFFF]"
};
exports.htmlCustomPrimitives.nonValidPCENChar = "[A-Z]|" + exports.htmlCustomPrimitives.sanitizePCENChar;
})();

View File

@ -387,56 +387,44 @@ exports.querySelectorAllSafe = function(selector,baseElement) {
};
/*
Make sure HTML tag names are valid
Sanitize HTML tag- and custom web component names
1. Check the string, if it is a valid html tag - using this spec: https://html.spec.whatwg.org/#syntax-tag-name
1.1 Tag names are "case-insensitive"
1.1 Tag names are "case insensitive"
2. Extend 1. and allow hyphens: "-"
2.1 Browsers allow "AA-AA", so do we
2.2 AA-AA cannot be used for styling
2.3 aa-aa can be used for styling
2.1 Browsers allow "AA-AA", so do we. Be aware there may be styling problems
3. Implement a forbidden list according to https://html.spec.whatwg.org/#valid-custom-element-name
3.1 exports.htmlForbiddenTags = "annotation-xml,color-profile,font-face,font-face-src,font-face-uri,font-face-format,font-face-name,missing-glyph".split(",");
4. If 1., 2. and 3. pass return the HTML tag as a valid tag -- That should be fast and cover 90% of html tags users will use.
5. If 4. does not match, we check for invalid character ranges up to \uFFFF. So we can detect problems in a range JS RegExp can handle.
6. We assume that everything out of js RegExp-range is valid, which would be OK for \u10000-\uEFFFF according to the spec.
3. Sanitize input parameters - spec: https://html.spec.whatwg.org/#valid-custom-element-name
4. Implement a forbidden list: exports.htmlForbiddenTags - see: $:/core/modules/config.js
5. Check function parameters for invalid character ranges up to \uFFFF. This detects problems in a range JS RegExp can handle
6. We assume that everything out of js RegExp-range is valid, which is OK for \u10000-\uEFFFF according to the spec
Unicode overview: https://symbl.cc/en/unicode-table/
*/
exports.makeTagNameSafe = function(tag,defaultTag) {
defaultTag = defaultTag || "SPAN";
var result = defaultTag;
// Custom web-components need to be "lowercase()"
var regxSanitizeChars = new RegExp($tw.config.htmlCustomPrimitives.sanitizePCENChar,"mg");
// Sanitize inputs to make the logic simple
defaultTag = (defaultTag) ? defaultTag.replace(regxSanitizeChars,"") : "SPAN";
tag = (tag) ? tag.replace(regxSanitizeChars,"") : defaultTag;
// RegExp for valid standard HTML element, extended with hyphen "-"
var regexValidTag = /(?:[a-z]|[A-Z]|[0-9]|-)+/g;
// RegExp for valid standard HTML element, extended including hyphen "-"
var regexStandardChars = /(?:[a-z]|[A-Z]|[0-9]|-)+/g,
result = "";
// Custom web-components must to be "lowercase()"
var regxInvalidlTag = new RegExp($tw.config.htmlCustomPrimitives.nonValidPCENChar,"mg");
// Check if tag matches regexp as a whole
if(tag && (tag.match(regexValidTag)[0] === tag)) {
result = tag; // valid tag
// Check if tag matches standard HTML spec
if(tag.match(regexStandardChars)[0] === tag) {
result = tag;
}
// Sanitize invalid characters in result
if( regxInvalidlTag.exec(result.toLowerCase()) !== null) {
result = result.toLowerCase().replace(regxInvalidlTag,"");
// Check for unsafe tag and unsafe defaultTag
if($tw.config.htmlUnsafeElements.indexOf(result.toLowerCase()) !== -1) {
result = ($tw.config.htmlUnsafeElements.indexOf(defaultTag.toLowerCase()) !== -1) ? "safe-" + defaultTag : defaultTag;
}
// Check for unsafe tags
if( $tw.config.htmlUnsafeElements.indexOf(result) === -1) {
result = "safe-" + result;
}
// Check for forbidden tag names and log info
// Check for forbidden tag names according to spec and log info to help users
if($tw.config.htmlForbiddenTags.indexOf(result.toLowerCase()) >= 0) {
console.log("Forbidden custom element:\"" + result.toLowerCase() + "\" See: https://html.spec.whatwg.org/#valid-custom-element-name")
result = "safe-" + result;
}
return result;
};
})();

View File

@ -29,15 +29,10 @@ Render this widget into the DOM
ElementWidget.prototype.render = function(parent,nextSibling) {
this.parentDomNode = parent;
this.computeAttributes();
// Neuter blacklisted elements
// Eliminate blacklisted elements
this.tag = this.parseTreeNode.tag;
this.tag = $tw.utils.makeTagNameSafe(this.tag, "safe" + this.tag);
// Restrict tag name to digits, letts and dashes
// this.tag = this.tag.replace(/[^0-9a-zA-Z\-]/mg,"");
// Default to a span
this.tag = this.tag || "span";
// Sanitize tag name if needed according to Custom Web-Componenets spec
this.tag = $tw.utils.makeTagNameSafe(this.tag, "safe-" + this.tag);
// Adjust headings by the current base level
var headingLevel = ["h1","h2","h3","h4","h5","h6"].indexOf(this.tag);
if(headingLevel !== -1) {