From f0314cf1ad2d78914806cfe492e4d7752cdb9048 Mon Sep 17 00:00:00 2001 From: Scott Sauyet Date: Tue, 31 Oct 2023 22:26:20 -0400 Subject: [PATCH] Move the new code from boot to util --- boot/boot.js | 109 ----------------- core/modules/utils/utils.js | 110 ++++++++++++++++++ .../tw5.com/tiddlers/concepts/PermaLinks.tid | 2 +- 3 files changed, 111 insertions(+), 110 deletions(-) diff --git a/boot/boot.js b/boot/boot.js index 3df2b9c8b..aab017275 100644 --- a/boot/boot.js +++ b/boot/boot.js @@ -287,115 +287,6 @@ $tw.utils.decodeURIComponentSafe = function(s) { return v; }; -$tw.utils.map = function(fn) { - return function (xs) { - var results = []; - for (var i = 0; i < xs.length; i++) { - results.push (fn(xs[i])); - } - return results; - } -} - -$tw.utils.filter = function(fn) { - return function (xs) { - const results = [] - for (var i = 0; i < xs.length; i++) { - if (fn(xs[i])) { - results.push(xs[i]) - } - } - return results; - } -} - -// The character that will substitute for a space in the URL -var SPACE_SUBSTITUTE = "_"; - -// The character added to the end to avoid ending with `.`, `?`, `!` or the like -var TRAILER = "_"; - -// The character that will separate out the list elements in the URL -var CONJUNCTION = ";"; - -// Those of the allowed url characters claimed by TW -var CLAIMED = [SPACE_SUBSTITUTE, ":", CONJUNCTION]; - -// Non-alphanumeric characters allowed in a URL fragment -// More information at https://www.rfc-editor.org/rfc/rfc3986#appendix-A -var VALID_IN_URL_FRAGMENT = "-._~!$&'()*+,;=:@/?"; - -// The subset of the pchars we will not percent-encode in permalinks/permaviews -var SUBSTITUTES = VALID_IN_URL_FRAGMENT.split("").filter(function(c){return CLAIMED.indexOf(c) === -1}); - -// A regex to match the percent-encoded characters we will want to replace. -// Something similar to the following, depending on SPACE and CONJUNCTION -// /(%2D|%2E|%7E|%21|%24|%26|%27|%28|%29|%2A|%2B|%3B|%3D|%40|%2F|%3F)/g -var CHAR_MATCH = new RegExp( - "(" + SUBSTITUTES.map(function(c) { - return "%" + c.charCodeAt(0).toString(16).toUpperCase() - }).join("|") + ")", - "g" -); - -// A regex to match the SPACE_SUBSTITUTE character -var SPACE_MATCH = new RegExp("(\\" + SPACE_SUBSTITUTE + ")", "g"); - -// A regex to match URLs ending with sentence-ending punctuation -var SENTENCE_ENDING = new RegExp("(\\.|\\!|\\?|\\" + TRAILER + ")$", "g"); - -// A regex to match URLs ending with sentence-ending punctuation plus the TRAILER -var SENTENCE_TRAILING = new RegExp("(\\.|\\!|\\?|\\" + TRAILER + ")\\" + TRAILER + "$", "g"); - -// An object mapping the percent encodings back to their source characters -var PCT_CHAR_MAP = SUBSTITUTES.reduce(function (a, c) { - a["%" + c.charCodeAt(0).toString(16).toUpperCase()] = c - return a -}, {}); - -// Convert a URI List Component encoded string (with the `SPACE_SUBSTITUTE` -// value as an allowed replacement for the space character) to a string -$tw.utils.decodeTWURIList = function(s) { - var parts = s.replace(SENTENCE_TRAILING, "$1").split(CONJUNCTION); - var withSpaces = $tw.utils.map(function(s) {return s.replace(SPACE_MATCH, " ")})(parts); - var withBrackets = $tw.utils.map(function(s) {return s.indexOf(" ") >= 0 ? "[[" + s + "]]" : s})(withSpaces); - return $tw.utils.decodeURIComponentSafe(withBrackets.join(" ")); -}; - -// Convert a URI Target Component encoded string (with the `SPACE_SUBSTITUTE` -// value as an allowed replacement for the space character) to a string -$tw.utils.decodeTWURITarget = function(s) { - return $tw.utils.decodeURIComponentSafe( - s.replace(SENTENCE_TRAILING, "$1").replace(SPACE_MATCH, " ") - ) -}; - -// Convert a URIComponent encoded title string (with the `SPACE_SUBSTITUTE` -// value as an allowed replacement for the space character) to a string -$tw.utils.encodeTiddlerTitle = function(s) { - var extended = s.replace(SENTENCE_ENDING, "$1" + TRAILER) - var encoded = encodeURIComponent(extended); - var substituted = encoded.replace(/\%20/g, SPACE_SUBSTITUTE); - return substituted.replace(CHAR_MATCH, function(_, c) { - return PCT_CHAR_MAP[c]; - }); -}; - -// Convert a URIComponent encoded filter string (with the `SPACE_SUBSTITUTE` -// value as an allowed replacement for the space character) to a string -$tw.utils.encodeFilterPath = function(s) { - var parts = s.replace(SENTENCE_ENDING, "$1" + TRAILER) - .replace(/\[\[(.+?)\]\]/g, function (_, t) {return t.replace(/ /g, SPACE_SUBSTITUTE )}) - .split(" "); - var nonEmptyParts = $tw.utils.filter(Boolean)(parts); - var trimmed = $tw.utils.map(function(s) {return s.trim()})(nonEmptyParts); - var encoded = $tw.utils.map(function(s) {return encodeURIComponent(s)})(trimmed); - var substituted = $tw.utils.map(function(s) {return s.replace(/\%20/g, SPACE_SUBSTITUTE)})(encoded); - var replaced = $tw.utils.map(function(s) {return s.replace(CHAR_MATCH, function(_, c) { - return PCT_CHAR_MAP[c]}); - })(substituted); - return replaced.join(CONJUNCTION); -}; /* Convert a URI encoded string to a string safely diff --git a/core/modules/utils/utils.js b/core/modules/utils/utils.js index aaf83ae74..f21a74f92 100644 --- a/core/modules/utils/utils.js +++ b/core/modules/utils/utils.js @@ -1005,4 +1005,114 @@ exports.makeCompareFunction = function(type,options) { return (types[type] || types[options.defaultType] || types.number); }; +exports.map = function(fn) { + return function (xs) { + var results = []; + for (var i = 0; i < xs.length; i++) { + results.push (fn(xs[i])); + } + return results; + } +} + +exports.filter = function(fn) { + return function (xs) { + const results = [] + for (var i = 0; i < xs.length; i++) { + if (fn(xs[i])) { + results.push(xs[i]) + } + } + return results; + } +} + +// The character that will substitute for a space in the URL +var SPACE_SUBSTITUTE = "_"; + +// The character added to the end to avoid ending with `.`, `?`, `!` or the like +var TRAILER = "_"; + +// The character that will separate out the list elements in the URL +var CONJUNCTION = ";"; + +// Those of the allowed url characters claimed by TW +var CLAIMED = [SPACE_SUBSTITUTE, ":", CONJUNCTION]; + +// Non-alphanumeric characters allowed in a URL fragment +// More information at https://www.rfc-editor.org/rfc/rfc3986#appendix-A +var VALID_IN_URL_FRAGMENT = "-._~!$&'()*+,;=:@/?"; + +// The subset of the pchars we will not percent-encode in permalinks/permaviews +var SUBSTITUTES = VALID_IN_URL_FRAGMENT.split("").filter(function(c){return CLAIMED.indexOf(c) === -1}); + +// A regex to match the percent-encoded characters we will want to replace. +// Something similar to the following, depending on SPACE and CONJUNCTION +// /(%2D|%2E|%7E|%21|%24|%26|%27|%28|%29|%2A|%2B|%3B|%3D|%40|%2F|%3F)/g +var CHAR_MATCH = new RegExp( + "(" + SUBSTITUTES.map(function(c) { + return "%" + c.charCodeAt(0).toString(16).toUpperCase() + }).join("|") + ")", + "g" +); + +// A regex to match the SPACE_SUBSTITUTE character +var SPACE_MATCH = new RegExp("(\\" + SPACE_SUBSTITUTE + ")", "g"); + +// A regex to match URLs ending with sentence-ending punctuation +var SENTENCE_ENDING = new RegExp("(\\.|\\!|\\?|\\" + TRAILER + ")$", "g"); + +// A regex to match URLs ending with sentence-ending punctuation plus the TRAILER +var SENTENCE_TRAILING = new RegExp("(\\.|\\!|\\?|\\" + TRAILER + ")\\" + TRAILER + "$", "g"); + +// An object mapping the percent encodings back to their source characters +var PCT_CHAR_MAP = SUBSTITUTES.reduce(function (a, c) { + a["%" + c.charCodeAt(0).toString(16).toUpperCase()] = c + return a +}, {}); + +// Convert a URI List Component encoded string (with the `SPACE_SUBSTITUTE` +// value as an allowed replacement for the space character) to a string +exports.decodeTWURIList = function(s) { + var parts = s.replace(SENTENCE_TRAILING, "$1").split(CONJUNCTION); + var withSpaces = $tw.utils.map(function(s) {return s.replace(SPACE_MATCH, " ")})(parts); + var withBrackets = $tw.utils.map(function(s) {return s.indexOf(" ") >= 0 ? "[[" + s + "]]" : s})(withSpaces); + return $tw.utils.decodeURIComponentSafe(withBrackets.join(" ")); +}; + +// Convert a URI Target Component encoded string (with the `SPACE_SUBSTITUTE` +// value as an allowed replacement for the space character) to a string +exports.decodeTWURITarget = function(s) { + return $tw.utils.decodeURIComponentSafe( + s.replace(SENTENCE_TRAILING, "$1").replace(SPACE_MATCH, " ") + ) +}; + +// Convert a URIComponent encoded title string (with the `SPACE_SUBSTITUTE` +// value as an allowed replacement for the space character) to a string +exports.encodeTiddlerTitle = function(s) { + var extended = s.replace(SENTENCE_ENDING, "$1" + TRAILER) + var encoded = encodeURIComponent(extended); + var substituted = encoded.replace(/\%20/g, SPACE_SUBSTITUTE); + return substituted.replace(CHAR_MATCH, function(_, c) { + return PCT_CHAR_MAP[c]; + }); +}; + +// Convert a URIComponent encoded filter string (with the `SPACE_SUBSTITUTE` +// value as an allowed replacement for the space character) to a string +exports.encodeFilterPath = function(s) { + var parts = s.replace(SENTENCE_ENDING, "$1" + TRAILER) + .replace(/\[\[(.+?)\]\]/g, function (_, t) {return t.replace(/ /g, SPACE_SUBSTITUTE )}) + .split(" "); + var nonEmptyParts = $tw.utils.filter(Boolean)(parts); + var trimmed = $tw.utils.map(function(s) {return s.trim()})(nonEmptyParts); + var encoded = $tw.utils.map(function(s) {return encodeURIComponent(s)})(trimmed); + var substituted = $tw.utils.map(function(s) {return s.replace(/\%20/g, SPACE_SUBSTITUTE)})(encoded); + var replaced = $tw.utils.map(function(s) {return s.replace(CHAR_MATCH, function(_, c) { + return PCT_CHAR_MAP[c]}); + })(substituted); + return replaced.join(CONJUNCTION); +}; + })(); diff --git a/editions/tw5.com/tiddlers/concepts/PermaLinks.tid b/editions/tw5.com/tiddlers/concepts/PermaLinks.tid index 6c67efe2a..1b15460fa 100644 --- a/editions/tw5.com/tiddlers/concepts/PermaLinks.tid +++ b/editions/tw5.com/tiddlers/concepts/PermaLinks.tid @@ -42,7 +42,7 @@ Both the target tiddler title and the story filter should be URL encoded (but no !! Simpler URLS -<<.from-version "5.2.3">> The URLs generated are simplified from the hard-to-read percent encoding when feasible. Spaces are replaced with underscores (`_`), many punctuation characters are allowed to remain unencoded, and permaview filters receive a simpler encoding. For example the tiddler "Hard Linebreaks with CSS - Example", which percent-encoded would look like +<<.from-version "5.3.2">> The URLs generated are simplified from the hard-to-read percent encoding when feasible. Spaces are replaced with underscores (`_`), many punctuation characters are allowed to remain unencoded, and permaview filters receive a simpler encoding. For example the tiddler "Hard Linebreaks with CSS - Example", which percent-encoded would look like > @@font-family:monospace;#Hard%20Linebreaks%20with%20CSS%20-%20Example@@