diff --git a/core/modules/config.js b/core/modules/config.js index 9f5f7a78e..9d9dd3376 100644 --- a/core/modules/config.js +++ b/core/modules/config.js @@ -38,15 +38,16 @@ exports.htmlBlockElements = "address,article,aside,audio,blockquote,canvas,dd,de exports.htmlUnsafeElements = "script".split(","); -// See: https://html.spec.whatwg.org/#valid-custom-element-name +// Custom Web Components: https://html.spec.whatwg.org/#valid-custom-element-name exports.htmlForbiddenTags = "annotation-xml,color-profile,font-face,font-face-src,font-face-uri,font-face-format,font-face-name,missing-glyph".split(","); +// (EBNF notation) - PotentialCustomElementName ::= [a-z] (PCENChar)* '-' (PCENChar)* // Unicode table with ranges see: https://symbl.cc/en/unicode-table -// The "prefix" is obligatory! exports.htmlCustomPrimitives = { prefix: "[a-z]", validPCENChar: ".|[0-9]|_|[a-z]|\xB7|[\xC0-\xD6]|[\xD8-\xF6]|[\u00F8-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u203F-\u2040]|[\u2070-\u218F]|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]", - nonValidPCENChar: "[\x00-\x2C]|\x2F|[\x3A-\x40]|[\x5B-\x60]|[\x7B-\xB6]|[\xB8-\xBF]|\xD7|\xF7|\x37E|[\u2000\u200B]|[\u200E-\u203E]|[\u2041-\u206F]|[\u2190-\u2BFF]|[\u2FF0-\u3000]|[\uD800-\uF8FF]|[\uFDD0-\uFDEF]|[\uFFFE-\uFFFF]" -}; + sanitizePCENChar: "[\x00-\x2C]|\x2F|[\x3A-\x40]|[\x5B-\x60]|[\x7B-\xB6]|[\xB8-\xBF]|\xD7|\xF7|\x37E|[\u2000\u200B]|[\u200E-\u203E]|[\u2041-\u206F]|[\u2190-\u2BFF]|[\u2FF0-\u3000]|[\uD800-\uF8FF]|[\uFDD0-\uFDEF]|[\uFFFE-\uFFFF]" + }; +exports.htmlCustomPrimitives.nonValidPCENChar = "[A-Z]|" + exports.htmlCustomPrimitives.sanitizePCENChar; })(); diff --git a/core/modules/utils/dom/dom.js b/core/modules/utils/dom/dom.js index f8f779582..ac41b528d 100644 --- a/core/modules/utils/dom/dom.js +++ b/core/modules/utils/dom/dom.js @@ -387,56 +387,44 @@ exports.querySelectorAllSafe = function(selector,baseElement) { }; /* -Make sure HTML tag names are valid +Sanitize HTML tag- and custom web component names 1. Check the string, if it is a valid html tag - using this spec: https://html.spec.whatwg.org/#syntax-tag-name - 1.1 Tag names are "case-insensitive" + 1.1 Tag names are "case insensitive" 2. Extend 1. and allow hyphens: "-" - 2.1 Browsers allow "AA-AA", so do we - 2.2 AA-AA cannot be used for styling - 2.3 aa-aa can be used for styling + 2.1 Browsers allow "AA-AA", so do we. Be aware there may be styling problems - 3. Implement a forbidden list according to https://html.spec.whatwg.org/#valid-custom-element-name - 3.1 exports.htmlForbiddenTags = "annotation-xml,color-profile,font-face,font-face-src,font-face-uri,font-face-format,font-face-name,missing-glyph".split(","); - - 4. If 1., 2. and 3. pass return the HTML tag as a valid tag -- That should be fast and cover 90% of html tags users will use. - - 5. If 4. does not match, we check for invalid character ranges up to \uFFFF. So we can detect problems in a range JS RegExp can handle. - 6. We assume that everything out of js RegExp-range is valid, which would be OK for \u10000-\uEFFFF according to the spec. + 3. Sanitize input parameters - spec: https://html.spec.whatwg.org/#valid-custom-element-name + 4. Implement a forbidden list: exports.htmlForbiddenTags - see: $:/core/modules/config.js + 5. Check function parameters for invalid character ranges up to \uFFFF. This detects problems in a range JS RegExp can handle + 6. We assume that everything out of js RegExp-range is valid, which is OK for \u10000-\uEFFFF according to the spec + Unicode overview: https://symbl.cc/en/unicode-table/ */ exports.makeTagNameSafe = function(tag,defaultTag) { - defaultTag = defaultTag || "SPAN"; - var result = defaultTag; + // Custom web-components need to be "lowercase()" + var regxSanitizeChars = new RegExp($tw.config.htmlCustomPrimitives.sanitizePCENChar,"mg"); + // Sanitize inputs to make the logic simple + defaultTag = (defaultTag) ? defaultTag.replace(regxSanitizeChars,"") : "SPAN"; + tag = (tag) ? tag.replace(regxSanitizeChars,"") : defaultTag; - // RegExp for valid standard HTML element, extended with hyphen "-" - var regexValidTag = /(?:[a-z]|[A-Z]|[0-9]|-)+/g; + // RegExp for valid standard HTML element, extended including hyphen "-" + var regexStandardChars = /(?:[a-z]|[A-Z]|[0-9]|-)+/g, + result = ""; - // Custom web-components must to be "lowercase()" - var regxInvalidlTag = new RegExp($tw.config.htmlCustomPrimitives.nonValidPCENChar,"mg"); - - // Check if tag matches regexp as a whole - if(tag && (tag.match(regexValidTag)[0] === tag)) { - result = tag; // valid tag + // Check if tag matches standard HTML spec + if(tag.match(regexStandardChars)[0] === tag) { + result = tag; } - - // Sanitize invalid characters in result - if( regxInvalidlTag.exec(result.toLowerCase()) !== null) { - result = result.toLowerCase().replace(regxInvalidlTag,""); + // Check for unsafe tag and unsafe defaultTag + if($tw.config.htmlUnsafeElements.indexOf(result.toLowerCase()) !== -1) { + result = ($tw.config.htmlUnsafeElements.indexOf(defaultTag.toLowerCase()) !== -1) ? "safe-" + defaultTag : defaultTag; } - - // Check for unsafe tags - if( $tw.config.htmlUnsafeElements.indexOf(result) === -1) { - result = "safe-" + result; - } - - // Check for forbidden tag names and log info + // Check for forbidden tag names according to spec and log info to help users if($tw.config.htmlForbiddenTags.indexOf(result.toLowerCase()) >= 0) { console.log("Forbidden custom element:\"" + result.toLowerCase() + "\" See: https://html.spec.whatwg.org/#valid-custom-element-name") result = "safe-" + result; } - return result; }; - })(); diff --git a/core/modules/widgets/element.js b/core/modules/widgets/element.js index 534442759..3f8017960 100755 --- a/core/modules/widgets/element.js +++ b/core/modules/widgets/element.js @@ -29,15 +29,10 @@ Render this widget into the DOM ElementWidget.prototype.render = function(parent,nextSibling) { this.parentDomNode = parent; this.computeAttributes(); - // Neuter blacklisted elements + // Eliminate blacklisted elements this.tag = this.parseTreeNode.tag; - this.tag = $tw.utils.makeTagNameSafe(this.tag, "safe" + this.tag); - // Restrict tag name to digits, letts and dashes - - // this.tag = this.tag.replace(/[^0-9a-zA-Z\-]/mg,""); - - // Default to a span - this.tag = this.tag || "span"; + // Sanitize tag name if needed according to Custom Web-Componenets spec + this.tag = $tw.utils.makeTagNameSafe(this.tag, "safe-" + this.tag); // Adjust headings by the current base level var headingLevel = ["h1","h2","h3","h4","h5","h6"].indexOf(this.tag); if(headingLevel !== -1) {