From 4f7b10e0550150b5099d587b9dc3aa551d6d02eb Mon Sep 17 00:00:00 2001 From: Maurycy Zarzycki Date: Sun, 20 Nov 2022 18:51:01 +0100 Subject: [PATCH] CSV parser improvements (#7042) --- core/modules/parsers/csvparser.js | 42 +-- core/modules/utils/csv.js | 124 ++++++-- .../test/tiddlers/tests/data/csv-cases.tid | 282 ++++++++++++++++++ .../tiddlers/tests/modules/utils/test-csv.js | 33 ++ themes/tiddlywiki/vanilla/base.tid | 5 + 5 files changed, 446 insertions(+), 40 deletions(-) create mode 100644 editions/test/tiddlers/tests/data/csv-cases.tid create mode 100644 editions/test/tiddlers/tests/modules/utils/test-csv.js diff --git a/core/modules/parsers/csvparser.js b/core/modules/parsers/csvparser.js index 0e6c9f7bc..40431d0ae 100644 --- a/core/modules/parsers/csvparser.js +++ b/core/modules/parsers/csvparser.js @@ -13,6 +13,11 @@ The CSV text parser processes CSV files into a table wrapped in a scrollable wid "use strict"; var CsvParser = function(type,text,options) { + // Special handler for tab-delimited files + if (type === 'text/tab-delimited-values' && !options.separator) { + options.separator = "\t"; + } + // Table framework this.tree = [{ "type": "scrollable", "children": [{ @@ -24,30 +29,33 @@ var CsvParser = function(type,text,options) { }] }]; // Split the text into lines - var lines = text.split(/\r?\n/mg), + var lines = $tw.utils.parseCsvString(text, options), tag = "th"; + var maxColumns = 0; + $tw.utils.each(lines, function(columns) { + maxColumns = Math.max(columns.length, maxColumns); + }); + for(var line=0; line Comma -> Text", + "options": {}, + "csv": "A,B\r\n,C", + "json": [ + ["A", "B"], + ["", "C"] + ], + "jsonWithHeaders": [ + {"A": "", "B": "C"} + ] + }, + { + "name": "Edge case - single comma", + "options": {}, + "csv": ",", + "json": [ + ["", ""] + ], + "jsonWithHeaders": [] + }, + { + "@comment": "The behavior here is undefined - the only thing that matters is it should not throw an exception, the result is free to make no sense.", + "name": "Edge case - quote separator", + "options": {"separator": "\""}, + "csv": "cell-11,\"cell-12\",cell-13\r\n\"cell-21\",cell-22,cell-23\r\ncell-31,cell-32,\"cell-33\"", + "json": [ + ["cell-11,", "cell-12", ",cell-13"], + ["cell-21", "cell-22,cell-23"], + ["cell-31,cell-32,", "cell-33", ""] + ], + "jsonWithHeaders": [ + {"cell-11,": "cell-21", "cell-12": "cell-22,cell-23", ",cell-13": ""}, + {"cell-11,": "cell-31,cell-32,", "cell-12": "cell-33", ",cell-13": ""} + ] + }, + { + "@comment": "The behavior here is undefined - the only thing that matters is it should not throw an exception, the result is free to make no sense.", + "name": "Edge case - carriage return separator", + "options": {"separator": "\r"}, + "csv": "cell-11,\"cell-12\",cell-13\r\n\"cell-21\",cell-22,cell-23\r\ncell-31,cell-32,\"cell-33\"", + "json": [ + ["cell-11,\"cell-12\",cell-13"], + ["cell-21", "cell-22,cell-23"], + ["cell-31,cell-32,\"cell-33\""] + ], + "jsonWithHeaders": [ + {"cell-11,\"cell-12\",cell-13": "cell-21" }, + {"cell-11,\"cell-12\",cell-13": "cell-31,cell-32,\"cell-33\""} + ] + }, + { + "@comment": "The behavior here is undefined - the only thing that matters is it should not throw an exception, the result is free to make no sense.", + "name": "Edge case - newline separator", + "options": {"separator": "\n"}, + "csv": "cell-11,\"cell-12\",cell-13\r\n\"cell-21\",cell-22,cell-23\r\ncell-31,cell-32,\"cell-33\"", + "json": [ + ["cell-11,\"cell-12\",cell-13"], + ["cell-21", "cell-22,cell-23"], + ["cell-31,cell-32,\"cell-33\""] + ], + "jsonWithHeaders": [ + {"cell-11,\"cell-12\",cell-13": "cell-21" }, + {"cell-11,\"cell-12\",cell-13": "cell-31,cell-32,\"cell-33\""} + ] + } +] \ No newline at end of file diff --git a/editions/test/tiddlers/tests/modules/utils/test-csv.js b/editions/test/tiddlers/tests/modules/utils/test-csv.js new file mode 100644 index 000000000..b53e9b289 --- /dev/null +++ b/editions/test/tiddlers/tests/modules/utils/test-csv.js @@ -0,0 +1,33 @@ +/*\ +title: modules/utils/test-csv.js +type: application/javascript +tags: [[$:/tags/test-spec]] + +Tests the backlinks mechanism. + +\*/ +(function(){ +/*jslint node: true, browser: true */ +/*global $tw: false */ +"use strict"; + +describe('CSV Parsing', function() { + var tid = $tw.wiki.getTiddler('csv-cases'); + var testCases = JSON.parse(tid.fields.text); + + $tw.utils.each(testCases, function(testCase) { + if (testCase.skip) { + return; + } + it("Test case: " + testCase.name, function() { + var parsedCsv = $tw.utils.parseCsvString(testCase.csv, testCase.options); + expect(parsedCsv).withContext("The generated CSV should match the expected one").toEqual(testCase.json); + + var parsedCsvWithHeaders = $tw.utils.parseCsvStringWithHeader(testCase.csv, testCase.options); + expect(parsedCsvWithHeaders).withContext("The generated CSV with headers should match the expected one").toEqual(testCase.jsonWithHeaders); + }); + }) + +}); + +})(); diff --git a/themes/tiddlywiki/vanilla/base.tid b/themes/tiddlywiki/vanilla/base.tid index e9faee67b..6ff10e040 100644 --- a/themes/tiddlywiki/vanilla/base.tid +++ b/themes/tiddlywiki/vanilla/base.tid @@ -395,6 +395,11 @@ CSV parser plugin white-space: nowrap; } +.tc-csv-table th, +.tc-csv-table td { + white-space: pre-line; +} + /* Tiddler frame in story river */