1
0
mirror of https://github.com/Jermolene/TiddlyWiki5 synced 2024-11-27 03:57:21 +00:00

CSV parser improvements (#7042)

This commit is contained in:
Maurycy Zarzycki 2022-11-20 18:51:01 +01:00 committed by GitHub
parent b8a30091ee
commit 4f7b10e055
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 446 additions and 40 deletions

View File

@ -13,6 +13,11 @@ The CSV text parser processes CSV files into a table wrapped in a scrollable wid
"use strict"; "use strict";
var CsvParser = function(type,text,options) { var CsvParser = function(type,text,options) {
// Special handler for tab-delimited files
if (type === 'text/tab-delimited-values' && !options.separator) {
options.separator = "\t";
}
// Table framework // Table framework
this.tree = [{ this.tree = [{
"type": "scrollable", "children": [{ "type": "scrollable", "children": [{
@ -24,30 +29,33 @@ var CsvParser = function(type,text,options) {
}] }]
}]; }];
// Split the text into lines // Split the text into lines
var lines = text.split(/\r?\n/mg), var lines = $tw.utils.parseCsvString(text, options),
tag = "th"; tag = "th";
var maxColumns = 0;
$tw.utils.each(lines, function(columns) {
maxColumns = Math.max(columns.length, maxColumns);
});
for(var line=0; line<lines.length; line++) { for(var line=0; line<lines.length; line++) {
var lineText = lines[line]; var columns = lines[line];
if(lineText) { var row = {
var row = { "type": "element", "tag": "tr", "children": []
"type": "element", "tag": "tr", "children": [] };
}; for(var column=0; column<maxColumns; column++) {
var columns = lineText.split(","); row.children.push({
for(var column=0; column<columns.length; column++) { "type": "element", "tag": tag, "children": [{
row.children.push({ "type": "text",
"type": "element", "tag": tag, "children": [{ "text": columns[column] || ''
"type": "text", }]
"text": columns[column] });
}]
});
}
tag = "td";
this.tree[0].children[0].children[0].children.push(row);
} }
tag = "td";
this.tree[0].children[0].children[0].children.push(row);
} }
}; };
exports["text/csv"] = CsvParser; exports["text/csv"] = CsvParser;
exports["text/tab-delimited-values"] = CsvParser;
})(); })();

View File

@ -12,35 +12,113 @@ A barebones CSV parser
/*global $tw: false */ /*global $tw: false */
"use strict"; "use strict";
var QUOTE = '"';
var getCellInfo = function(text, start, length, SEPARATOR) {
var isCellQuoted = text.charAt(start) === QUOTE;
var cellStart = isCellQuoted ? start + 1 : start;
if (text.charAt(i) === SEPARATOR) {
return [cellStart, cellStart, false];
}
for (var i = cellStart; i < length; i++) {
var cellCharacter = text.charAt(i);
var isEOL = cellCharacter === "\n" || cellCharacter === "\r";
if (isEOL && !isCellQuoted) {
return [cellStart, i, false];
} else if (cellCharacter === SEPARATOR && !isCellQuoted) {
return [cellStart, i, false];
} else if (cellCharacter === QUOTE && isCellQuoted) {
var nextCharacter = i + 1 < length ? text.charAt(i + 1) : '';
if (nextCharacter !== QUOTE) {
return [cellStart, i, true];
} else {
i++;
}
}
}
return [cellStart, i, isCellQuoted];
}
exports.parseCsvString = function(text, options) {
if (!text) {
return [];
}
options = options || {};
var SEPARATOR = options.separator || ",",
length = text.length,
rows = [],
nextRow = [];
for (var i = 0; i < length; i++) {
var cellInfo = getCellInfo(text, i, length, SEPARATOR);
var cellText = text.substring(cellInfo[0], cellInfo[1]);
if (cellInfo[2]) {
cellText = cellText.replace(/""/g, '"');
cellInfo[1]++;
}
nextRow.push(cellText);
i = cellInfo[1];
var character = text.charAt(i);
var nextCharacter = i + 1 < length ? text.charAt(i + 1) : '';
if (character === "\r" || character === "\n") {
// Edge case for empty rows
if (nextRow.length === 1 && nextRow[0] === '') {
nextRow.length = 0;
}
rows.push(nextRow);
nextRow = [];
if (character === "\r") {
var nextCharacter = i + 1 < length ? text.charAt(i + 1) : '';
if (nextCharacter === "\n") {
i++;
}
}
}
}
// Special case if last cell in last row is an empty cell
if (text.charAt(length - 1) === SEPARATOR) {
nextRow.push("");
}
rows.push(nextRow);
return rows;
}
/* /*
Parse a CSV string with a header row and return an array of hashmaps. Parse a CSV string with a header row and return an array of hashmaps.
*/ */
exports.parseCsvStringWithHeader = function(text,options) { exports.parseCsvStringWithHeader = function(text,options) {
options = options || {}; var csv = $tw.utils.parseCsvString(text, options);
var separator = options.separator || ",", var headers = csv[0];
rows = text.split(/\r?\n/mg).map(function(row) {
return $tw.utils.trim(row); csv = csv.slice(1);
}).filter(function(row) { for (var i = 0; i < csv.length; i++) {
return row !== ""; var row = csv[i];
}); var rowObject = Object.create(null);
if(rows.length < 1) {
return "Missing header row"; for(var columnIndex=0; columnIndex<headers.length; columnIndex++) {
} var columnName = headers[columnIndex];
var headings = rows[0].split(separator), if (columnName) {
results = []; rowObject[columnName] = $tw.utils.trim(row[columnIndex] || "");
for(var row=1; row<rows.length; row++) { }
var columns = rows[row].split(separator),
columnResult = Object.create(null);
if(columns.length !== headings.length) {
return "Malformed CSV row '" + rows[row] + "'";
} }
for(var column=0; column<columns.length; column++) { csv[i] = rowObject;
var columnName = headings[column];
columnResult[columnName] = $tw.utils.trim(columns[column] || "");
}
results.push(columnResult);
} }
return results; return csv;
} }
})(); })();

View File

@ -0,0 +1,282 @@
title: csv-cases
type: text/plain
description: A file containing a JSON with test CSVs as string as well as expected results
[
{
"name": "Empty string",
"options": {},
"csv": "",
"json": [],
"jsonWithHeaders": []
},
{
"name": "Null value",
"options": {},
"csv": null,
"json": [],
"jsonWithHeaders": []
},
{
"name": "Simple CSV with no tricks",
"options": {},
"csv": "cell-11,cell-12,cell-13\r\ncell-21,cell-22,cell-23\r\ncell-31,cell-32,cell-33",
"json": [
["cell-11", "cell-12", "cell-13"],
["cell-21", "cell-22", "cell-23"],
["cell-31", "cell-32", "cell-33"]
],
"jsonWithHeaders": [
{"cell-11": "cell-21", "cell-12": "cell-22", "cell-13": "cell-23"},
{"cell-11": "cell-31", "cell-12": "cell-32", "cell-13": "cell-33"}
]
},
{
"name": "Custom separator",
"options": {"separator": "\t"},
"csv": ",cell-11,\t,cell-12,\t,cell-13,\r\n,cell-21,\t,cell-22,\t,cell-23,\r\n,cell-31,\t,cell-32,\t,cell-33,",
"json": [
[",cell-11,", ",cell-12,", ",cell-13,"],
[",cell-21,", ",cell-22,", ",cell-23,"],
[",cell-31,", ",cell-32,", ",cell-33,"]
],
"jsonWithHeaders": [
{",cell-11,": ",cell-21,", ",cell-12,": ",cell-22,", ",cell-13,": ",cell-23,"},
{",cell-11,": ",cell-31,", ",cell-12,": ",cell-32,", ",cell-13,": ",cell-33,"}
]
},
{
"name": "Support empty rows",
"options": {},
"csv": "cell-11,cell-12,cell-13\r\n\r\ncell-31,cell-32,cell-33",
"json": [
["cell-11", "cell-12", "cell-13"],
[],
["cell-31", "cell-32", "cell-33"]
],
"jsonWithHeaders": [
{"cell-11": "", "cell-12": "", "cell-13": ""},
{"cell-11": "cell-31", "cell-12": "cell-32", "cell-13": "cell-33"}
]
},
{
"name": "Support empty cells",
"options": {},
"csv": "cell-11,cell-12,cell-13\r\n,,\r\ncell-31,cell-32,cell-33",
"json": [
["cell-11", "cell-12", "cell-13"],
["", "", ""],
["cell-31", "cell-32", "cell-33"]
],
"jsonWithHeaders": [
{"cell-11": "", "cell-12": "", "cell-13": ""},
{"cell-11": "cell-31", "cell-12": "cell-32", "cell-13": "cell-33"}
]
},
{
"name": "Support LF line endings",
"options": {},
"csv": "cell-11,cell-12,cell-13\ncell-21,cell-22,cell-23\ncell-31,cell-32,cell-33",
"json": [
["cell-11", "cell-12", "cell-13"],
["cell-21", "cell-22", "cell-23"],
["cell-31", "cell-32", "cell-33"]
],
"jsonWithHeaders": [
{"cell-11": "cell-21", "cell-12": "cell-22", "cell-13": "cell-23"},
{"cell-11": "cell-31", "cell-12": "cell-32", "cell-13": "cell-33"}
]
},
{
"name": "Mixed line endings",
"options": {},
"csv": "cell-11,cell-12,cell-13\ncell-21,cell-22,cell-23\r\ncell-31,cell-32,cell-33",
"json": [
["cell-11", "cell-12", "cell-13"],
["cell-21", "cell-22", "cell-23"],
["cell-31", "cell-32", "cell-33"]
],
"jsonWithHeaders": [
{"cell-11": "cell-21", "cell-12": "cell-22", "cell-13": "cell-23"},
{"cell-11": "cell-31", "cell-12": "cell-32", "cell-13": "cell-33"}
]
},
{
"name": "Quoted cells",
"options": {},
"csv": "cell-11,\"cell-12\",cell-13\r\n\"cell-21\",cell-22,cell-23\r\ncell-31,cell-32,\"cell-33\"",
"json": [
["cell-11", "cell-12", "cell-13"],
["cell-21", "cell-22", "cell-23"],
["cell-31", "cell-32", "cell-33"]
],
"jsonWithHeaders": [
{"cell-11": "cell-21", "cell-12": "cell-22", "cell-13": "cell-23"},
{"cell-11": "cell-31", "cell-12": "cell-32", "cell-13": "cell-33"}
]
},
{
"name": "Escaped quotes in cells",
"options": {},
"csv": "cell-11,\"\"\"cell-12\"\"\",cell-13\r\n\"cell\"\"\"\"-21\",cell-22,cell-23\r\ncell-31,cell-32,\"\"\"\"\"cell\"\"\"\"-33\"\"\"\"\"",
"json": [
["cell-11", "\"cell-12\"", "cell-13"],
["cell\"\"-21", "cell-22", "cell-23"],
["cell-31", "cell-32", "\"\"cell\"\"-33\"\""]
],
"jsonWithHeaders": [
{"cell-11": "cell\"\"-21", "\"cell-12\"": "cell-22", "cell-13": "cell-23"},
{"cell-11": "cell-31", "\"cell-12\"": "cell-32", "cell-13": "\"\"cell\"\"-33\"\""}
]
},
{
"name": "Separator in quoted cells",
"options": {},
"csv": "cell-11,\",c,e,l,l,-,1,2,\",cell-13\r\n\",c,e,l,l,-,2,1,\",cell-22,cell-23\r\ncell-31,cell-32,\",c,e,l,l,-,3,3,\"",
"json": [
["cell-11", ",c,e,l,l,-,1,2,", "cell-13"],
[",c,e,l,l,-,2,1,", "cell-22", "cell-23"],
["cell-31", "cell-32", ",c,e,l,l,-,3,3,"]
],
"jsonWithHeaders": [
{"cell-11": ",c,e,l,l,-,2,1,", ",c,e,l,l,-,1,2,": "cell-22", "cell-13": "cell-23"},
{"cell-11": "cell-31", ",c,e,l,l,-,1,2,": "cell-32", "cell-13": ",c,e,l,l,-,3,3,"}
]
},
{
"name": "UTF-8 characters",
"options": {},
"csv": "ᑖcell-11™,°cell-12ą,ćcell-13ś\r\nżcell-21ę,łcell-22ó,Ócell-23↑\r\nŹcell-31Ż,Ącell-32Ń,Ęcell-33ę",
"json": [
["ᑖcell-11™", "°cell-12ą", "ćcell-13ś"],
["żcell-21ę", "łcell-22ó", "Ócell-23↑"],
["Źcell-31Ż", "Ącell-32Ń", "Ęcell-33ę"]
],
"jsonWithHeaders": [
{"ᑖcell-11™": "żcell-21ę", "°cell-12ą": "łcell-22ó", "ćcell-13ś": "Ócell-23↑"},
{"ᑖcell-11™": "Źcell-31Ż", "°cell-12ą": "Ącell-32Ń", "ćcell-13ś": "Ęcell-33ę"}
]
},
{
"name": "All in one",
"options": {},
"csv": "\"\"\",\r\n,\"\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"\"\",ҡ͟¼lj·˨Քƣйʊ͕Έӕ,😣👁🔵⛔️🌹\r\n\"\"\",\r\n,\"\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"\"\",ҡ͟¼lj·˨Քƣйʊ͕Έӕ,😣👁🔵⛔️🌹\n\"\"\",\r\n,\"\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"\"\",ҡ͟¼lj·˨Քƣйʊ͕Έӕ,😣👁🔵⛔️🌹",
"json": [
["\",\r\n,\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹"],
["\",\r\n,\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹"],
["\",\r\n,\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹"]
],
"jsonWithHeaders": [
{"\",\r\n,\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"": "\",\r\n,\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ": "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹": "😣👁🔵⛔️🌹"},
{"\",\r\n,\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"": "\",\r\n,\",\r\nĄŚĆżóŁ\n\n\n\r\n,\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ": "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹": "😣👁🔵⛔️🌹"}
]
},
{
"name": "All in one - custom separator",
"options": {"separator": "\t"},
"csv": "\"\"\"\t\r\n\t\"\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"\"\"\tҡ͟¼lj·˨Քƣйʊ͕Έӕ\t😣👁🔵⛔🌹\r\n\"\"\"\t\r\n\t\"\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"\"\"\tҡ͟¼lj·˨Քƣйʊ͕Έӕ\t😣👁🔵⛔🌹\n\"\"\"\t\r\n\t\"\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"\"\"\tҡ͟¼lj·˨Քƣйʊ͕Έӕ\t😣👁🔵⛔🌹",
"json": [
["\"\t\r\n\t\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹"],
["\"\t\r\n\t\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹"],
["\"\t\r\n\t\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹"]
],
"jsonWithHeaders": [
{"\"\t\r\n\t\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"": "\"\t\r\n\t\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ": "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹": "😣👁🔵⛔️🌹"},
{"\"\t\r\n\t\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"": "\"\t\r\n\t\"\t\r\nĄŚĆżóŁ\n\n\n\r\n\t\"", "ҡ͟¼lj·˨Քƣйʊ͕Έӕ": "ҡ͟¼lj·˨Քƣйʊ͕Έӕ", "😣👁🔵⛔️🌹": "😣👁🔵⛔️🌹"}
]
},
{
"name": "Edge case - only empty rows",
"options": {},
"csv": "\r\n\r\n",
"json": [
[],
[],
[]
],
"jsonWithHeaders": [
{},
{}
]
},
{
"name": "Edge case - only empty cells",
"options": {},
"csv": ",,\r\n,,\r\n,,",
"json": [
["", "", ""],
["", "", ""],
["", "", ""]
],
"jsonWithHeaders": [
{},
{}
]
},
{
"name": "Edge case - Newline -> Comma -> Text",
"options": {},
"csv": "A,B\r\n,C",
"json": [
["A", "B"],
["", "C"]
],
"jsonWithHeaders": [
{"A": "", "B": "C"}
]
},
{
"name": "Edge case - single comma",
"options": {},
"csv": ",",
"json": [
["", ""]
],
"jsonWithHeaders": []
},
{
"@comment": "The behavior here is undefined - the only thing that matters is it should not throw an exception, the result is free to make no sense.",
"name": "Edge case - quote separator",
"options": {"separator": "\""},
"csv": "cell-11,\"cell-12\",cell-13\r\n\"cell-21\",cell-22,cell-23\r\ncell-31,cell-32,\"cell-33\"",
"json": [
["cell-11,", "cell-12", ",cell-13"],
["cell-21", "cell-22,cell-23"],
["cell-31,cell-32,", "cell-33", ""]
],
"jsonWithHeaders": [
{"cell-11,": "cell-21", "cell-12": "cell-22,cell-23", ",cell-13": ""},
{"cell-11,": "cell-31,cell-32,", "cell-12": "cell-33", ",cell-13": ""}
]
},
{
"@comment": "The behavior here is undefined - the only thing that matters is it should not throw an exception, the result is free to make no sense.",
"name": "Edge case - carriage return separator",
"options": {"separator": "\r"},
"csv": "cell-11,\"cell-12\",cell-13\r\n\"cell-21\",cell-22,cell-23\r\ncell-31,cell-32,\"cell-33\"",
"json": [
["cell-11,\"cell-12\",cell-13"],
["cell-21", "cell-22,cell-23"],
["cell-31,cell-32,\"cell-33\""]
],
"jsonWithHeaders": [
{"cell-11,\"cell-12\",cell-13": "cell-21" },
{"cell-11,\"cell-12\",cell-13": "cell-31,cell-32,\"cell-33\""}
]
},
{
"@comment": "The behavior here is undefined - the only thing that matters is it should not throw an exception, the result is free to make no sense.",
"name": "Edge case - newline separator",
"options": {"separator": "\n"},
"csv": "cell-11,\"cell-12\",cell-13\r\n\"cell-21\",cell-22,cell-23\r\ncell-31,cell-32,\"cell-33\"",
"json": [
["cell-11,\"cell-12\",cell-13"],
["cell-21", "cell-22,cell-23"],
["cell-31,cell-32,\"cell-33\""]
],
"jsonWithHeaders": [
{"cell-11,\"cell-12\",cell-13": "cell-21" },
{"cell-11,\"cell-12\",cell-13": "cell-31,cell-32,\"cell-33\""}
]
}
]

View File

@ -0,0 +1,33 @@
/*\
title: modules/utils/test-csv.js
type: application/javascript
tags: [[$:/tags/test-spec]]
Tests the backlinks mechanism.
\*/
(function(){
/*jslint node: true, browser: true */
/*global $tw: false */
"use strict";
describe('CSV Parsing', function() {
var tid = $tw.wiki.getTiddler('csv-cases');
var testCases = JSON.parse(tid.fields.text);
$tw.utils.each(testCases, function(testCase) {
if (testCase.skip) {
return;
}
it("Test case: " + testCase.name, function() {
var parsedCsv = $tw.utils.parseCsvString(testCase.csv, testCase.options);
expect(parsedCsv).withContext("The generated CSV should match the expected one").toEqual(testCase.json);
var parsedCsvWithHeaders = $tw.utils.parseCsvStringWithHeader(testCase.csv, testCase.options);
expect(parsedCsvWithHeaders).withContext("The generated CSV with headers should match the expected one").toEqual(testCase.jsonWithHeaders);
});
})
});
})();

View File

@ -395,6 +395,11 @@ CSV parser plugin
white-space: nowrap; white-space: nowrap;
} }
.tc-csv-table th,
.tc-csv-table td {
white-space: pre-line;
}
/* /*
Tiddler frame in story river Tiddler frame in story river
*/ */