1
0
mirror of https://github.com/SquidDev-CC/CC-Tweaked synced 2025-01-27 17:34:48 +00:00

Custom parse errors for Lua (#1298)

- Add several (internal) modules for lexing and parsing Lua code. These
   allow us to provide (hopefully) higher quality error messages than
   Lua's built-in messages.

 - `shell.run`, `edit` and `lua` now use this parser when fed invalid
   code. This allows us to provide better syntax errors, while not
   having any impact on the happy path.

   Note this does not affect any other mechanism for loading code 
   (`load`, `require`, `dofile`).

There's still a lot of work to do here in improving error message
quality, but hopefully this provides a good starting point.
This commit is contained in:
Jonathan Coates 2023-01-25 20:35:43 +00:00 committed by GitHub
parent e076818b29
commit a12b405acf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 4474 additions and 21 deletions

View File

@ -0,0 +1,173 @@
--[[- A pretty-printer for Lua errors.
:::warning
This is an internal module and SHOULD NOT be used in your own code. It may
be removed or changed at any time.
:::
This consumes a list of messages and "annotations" and displays the error to the
terminal.
@see cc.internal.syntax.errors For errors produced by the parser.
@local
]]
local pretty = require "cc.pretty"
local expect = require "cc.expect"
local expect, field = expect.expect, expect.field
local wrap = require "cc.strings".wrap
--- Write a message to the screen.
-- @tparam cc.pretty.Doc|string msg The message to write.
local function display(msg)
if type(msg) == "table" then pretty.print(msg) else print(msg) end
end
-- Write a message to the screen, aligning to the current cursor position.
-- @tparam cc.pretty.Doc|string msg The message to write.
local function display_here(msg, preamble)
expect(1, msg, "string", "table")
local x = term.getCursorPos()
local width, height = term.getSize()
width = width - x + 1
local function newline()
local _, y = term.getCursorPos()
if y >= height then
term.scroll(1)
else
y = y + 1
end
preamble(y)
term.setCursorPos(x, y)
end
if type(msg) == "string" then
local lines = wrap(msg, width)
term.write(lines[1])
for i = 2, #lines do
newline()
term.write(lines[i])
end
else
local def_colour = term.getTextColour()
local function display_impl(doc)
expect(1, doc, "table")
local kind = doc.tag
if kind == "nil" then return
elseif kind == "text" then
-- TODO: cc.strings.wrap doesn't support a leading indent. We should
-- fix that!
-- Might also be nice to add a wrap_iter, which returns an iterator over
-- start_pos, end_pos instead.
if doc.colour then term.setTextColour(doc.colour) end
local x1 = term.getCursorPos()
local lines = wrap((" "):rep(x1 - x) .. doc.text, width)
term.write(lines[1]:sub(x1 - x + 1))
for i = 2, #lines do
newline()
term.write(lines[i])
end
if doc.colour then term.setTextColour(def_colour) end
elseif kind == "concat" then
for i = 1, doc.n do display_impl(doc[i]) end
else
error("Unknown doc " .. kind)
end
end
display_impl(msg)
end
print()
end
--- A list of colours we can use for error messages.
local error_colours = { colours.red, colours.green, colours.magenta, colours.orange }
--- The accent line used to denote a block of code.
local code_accent = pretty.text("\x95", colours.cyan)
--[[-
@tparam { get_pos = function, get_line = function } context
The context where the error was reported. This effectively acts as a view
over the underlying source, exposing the following functions:
- `get_pos`: Get the line and column of an opaque position.
- `get_line`: Get the source code for an opaque position.
@tparam table message The message to display, as produced by @{cc.internal.syntax.errors}.
]]
return function(context, message)
expect(1, context, "table")
expect(2, message, "table")
field(context, "get_pos", "function")
field(context, "get_line", "function")
if #message == 0 then error("Message is empty", 2) end
local error_colour = 1
local width = term.getSize()
for msg_idx = 1, #message do
if msg_idx > 1 then print() end
local msg = message[msg_idx]
if type(msg) == "table" and msg.tag == "annotate" then
local line, col = context.get_pos(msg.start_pos)
local end_line, end_col = context.get_pos(msg.end_pos)
local contents = context.get_line(msg.start_pos)
-- Pick a starting column. We pick the left-most position which fits
-- in one of the following:
-- - 10 characters after the start column.
-- - 5 characters after the end column.
-- - The end of the line.
if line ~= end_line then end_col = #contents end
local start_col = math.max(1, math.min(col + 10, end_col + 5, #contents + 1) - width + 1)
-- Pick a colour for this annotation.
local colour = colours.toBlit(error_colours[error_colour])
error_colour = (error_colour % #error_colours) + 1
-- Print the line number and snippet of code. We display french
-- quotes on either side of the string if it is truncated.
local str_start, str_end = start_col, start_col + width - 2
local prefix, suffix = "", ""
if start_col > 1 then
str_start = str_start + 1
prefix = pretty.text("\xab", colours.grey)
end
if str_end < #contents then
str_end = str_end - 1
suffix = pretty.text("\xbb", colours.grey)
end
pretty.print(code_accent .. pretty.text("Line " .. line, colours.cyan))
pretty.print(code_accent .. prefix .. pretty.text(contents:sub(str_start, str_end), colours.lightGrey) .. suffix)
-- Print a line highlighting the region of text.
local _, y = term.getCursorPos()
pretty.write(code_accent)
local indicator_end = end_col
if end_col > str_end then indicator_end = str_end end
local indicator_len = indicator_end - col + 1
term.setCursorPos(col - start_col + 2, y)
term.blit(("\x83"):rep(indicator_len), colour:rep(indicator_len), ("f"):rep(indicator_len))
print()
-- And then print the annotation's message, if present.
if msg.msg ~= "" then
term.blit("\x95", colour, "f")
display_here(msg.msg, function(y)
term.setCursorPos(1, y)
term.blit("\x95", colour, "f")
end)
end
else
display(msg)
end
end
end

View File

@ -0,0 +1,552 @@
--[[- The error messages reported by our lexer and parser.
:::warning
This is an internal module and SHOULD NOT be used in your own code. It may
be removed or changed at any time.
:::
This provides a list of factory methods which take source positions and produce
appropriate error messages targeting that location. These error messages can
then be displayed to the user via @{cc.internal.error_printer}.
@local
]]
local pretty = require "cc.pretty"
local expect = require "cc.expect".expect
local tokens = require "cc.internal.syntax.parser".tokens
local function annotate(start_pos, end_pos, msg)
if msg == nil and (type(end_pos) == "string" or type(end_pos) == "table" or type(end_pos) == "nil") then
end_pos, msg = start_pos, end_pos
end
expect(1, start_pos, "number")
expect(2, end_pos, "number")
expect(3, msg, "string", "table", "nil")
return { tag = "annotate", start_pos = start_pos, end_pos = end_pos, msg = msg or "" }
end
--- Format a string as a non-highlighted block of code.
--
-- @tparam string msg The code to format.
-- @treturn cc.pretty.Doc The formatted code.
local function code(msg) return pretty.text(msg, colours.lightGrey) end
--- Maps tokens to a more friendly version.
local token_names = setmetatable({
-- Specific tokens.
[tokens.IDENT] = "identifier",
[tokens.NUMBER] = "number",
[tokens.STRING] = "string",
[tokens.EOF] = "end of file",
-- Symbols and keywords
[tokens.ADD] = code("+"),
[tokens.AND] = code("and"),
[tokens.BREAK] = code("break"),
[tokens.CBRACE] = code("}"),
[tokens.COLON] = code(":"),
[tokens.COMMA] = code(","),
[tokens.CONCAT] = code(".."),
[tokens.CPAREN] = code(")"),
[tokens.CSQUARE] = code("]"),
[tokens.DIV] = code("/"),
[tokens.DO] = code("do"),
[tokens.DOT] = code("."),
[tokens.DOTS] = code("..."),
[tokens.ELSE] = code("else"),
[tokens.ELSEIF] = code("elseif"),
[tokens.END] = code("end"),
[tokens.EQ] = code("=="),
[tokens.EQUALS] = code("="),
[tokens.FALSE] = code("false"),
[tokens.FOR] = code("for"),
[tokens.FUNCTION] = code("function"),
[tokens.GE] = code(">="),
[tokens.GT] = code(">"),
[tokens.IF] = code("if"),
[tokens.IN] = code("in"),
[tokens.LE] = code("<="),
[tokens.LEN] = code("#"),
[tokens.LOCAL] = code("local"),
[tokens.LT] = code("<"),
[tokens.MOD] = code("%"),
[tokens.MUL] = code("*"),
[tokens.NE] = code("~="),
[tokens.NIL] = code("nil"),
[tokens.NOT] = code("not"),
[tokens.OBRACE] = code("{"),
[tokens.OPAREN] = code("("),
[tokens.OR] = code("or"),
[tokens.OSQUARE] = code("["),
[tokens.POW] = code("^"),
[tokens.REPEAT] = code("repeat"),
[tokens.RETURN] = code("return"),
[tokens.SEMICOLON] = code(";"),
[tokens.SUB] = code("-"),
[tokens.THEN] = code("then"),
[tokens.TRUE] = code("true"),
[tokens.UNTIL] = code("until"),
[tokens.WHILE] = code("while"),
}, { __index = function(_, name) error("No such token " .. tostring(name), 2) end })
local errors = {}
--------------------------------------------------------------------------------
-- Lexer errors
--------------------------------------------------------------------------------
--[[- A string which ends without a closing quote.
@tparam number start_pos The start position of the string.
@tparam number end_pos The end position of the string.
@tparam string quote The kind of quote (`"` or `'`).
@return The resulting parse error.
]]
function errors.unfinished_string(start_pos, end_pos, quote)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
expect(3, quote, "string")
return {
"This string is not finished. Are you missing a closing quote (" .. code(quote) .. ")?",
annotate(start_pos, "String started here."),
annotate(end_pos, "Expected a closing quote here."),
}
end
--[[- A string which ends with an escape sequence (so a literal `"foo\`). This
is slightly different from @{unfinished_string}, as we don't want to suggest
adding a quote.
@tparam number start_pos The start position of the string.
@tparam number end_pos The end position of the string.
@tparam string quote The kind of quote (`"` or `'`).
@return The resulting parse error.
]]
function errors.unfinished_string_escape(start_pos, end_pos, quote)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
expect(3, quote, "string")
return {
"This string is not finished.",
annotate(start_pos, "String started here."),
annotate(end_pos, "An escape sequence was started here, but with nothing following it."),
}
end
--[[- A long string was never finished.
@tparam number start_pos The start position of the long string delimiter.
@tparam number end_pos The end position of the long string delimiter.
@tparam number ;em The length of the long string delimiter, excluding the first `[`.
@return The resulting parse error.
]]
function errors.unfinished_long_string(start_pos, end_pos, len)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
expect(3, len, "number")
return {
"This string was never finished.",
annotate(start_pos, end_pos, "String was started here."),
"We expected a closing delimiter (" .. code("]" .. ("="):rep(len - 1) .. "]") .. ") somewhere after this string was started.",
}
end
--[[- Malformed opening to a long string (i.e. `[=`).
@tparam number start_pos The start position of the long string delimiter.
@tparam number end_pos The end position of the long string delimiter.
@tparam number len The length of the long string delimiter, excluding the first `[`.
@return The resulting parse error.
]]
function errors.malformed_long_string(start_pos, end_pos, len)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
expect(3, len, "number")
return {
"Incorrect start of a long string.",
annotate(start_pos, end_pos),
"Tip: If you wanted to start a long string here, add an extra " .. code("[") .. " here.",
}
end
--[[- Malformed nesting of a long string.
@tparam number start_pos The start position of the long string delimiter.
@tparam number end_pos The end position of the long string delimiter.
@return The resulting parse error.
]]
function errors.nested_long_str(start_pos, end_pos)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
return {
code("[[") .. " cannot be nested inside another " .. code("[[ ... ]]"),
annotate(start_pos, end_pos),
}
end
--[[- A malformed numeric literal.
@tparam number start_pos The start position of the number.
@tparam number end_pos The end position of the number.
@return The resulting parse error.
]]
function errors.malformed_number(start_pos, end_pos)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
return {
"This isn't a valid number.",
annotate(start_pos, end_pos),
"Numbers must be in one of the following formats: " .. code("123") .. ", "
.. code("3.14") .. ", " .. code("23e35") .. ", " .. code("0x01AF") .. ".",
}
end
--[[- A long comment was never finished.
@tparam number start_pos The start position of the long string delimiter.
@tparam number end_pos The end position of the long string delimiter.
@tparam number len The length of the long string delimiter, excluding the first `[`.
@return The resulting parse error.
]]
function errors.unfinished_long_comment(start_pos, end_pos, len)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
expect(3, len, "number")
return {
"This comment was never finished.",
annotate(start_pos, end_pos, "Comment was started here."),
"We expected a closing delimiter (" .. code("]" .. ("="):rep(len - 1) .. "]") .. ") somewhere after this comment was started.",
}
end
--[[- `&&` was used instead of `and`.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.wrong_and(start_pos, end_pos)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
return {
"Unexpected character.",
annotate(start_pos, end_pos),
"Tip: Replace this with " .. code("and") .. " to check if both values are true.",
}
end
--[[- `||` was used instead of `or`.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.wrong_or(start_pos, end_pos)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
return {
"Unexpected character.",
annotate(start_pos, end_pos),
"Tip: Replace this with " .. code("or") .. " to check if either value is true.",
}
end
--[[- `!=` was used instead of `~=`.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.wrong_ne(start_pos, end_pos)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
return {
"Unexpected character.",
annotate(start_pos, end_pos),
"Tip: Replace this with " .. code("~=") .. " to check if two values are not equal.",
}
end
--[[- An unexpected character was used.
@tparam number pos The position of this character.
@return The resulting parse error.
]]
function errors.unexpected_character(pos)
expect(1, pos, "number")
return {
"Unexpected character.",
annotate(pos, "This character isn't usable in Lua code."),
}
end
--------------------------------------------------------------------------------
-- Expression parsing errors
--------------------------------------------------------------------------------
--[[- A fallback error when we expected an expression but received another token.
@tparam number token The token id.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.expected_expression(token, start_pos, end_pos)
expect(1, token, "number")
expect(2, start_pos, "number")
expect(3, end_pos, "number")
return {
"Unexpected " .. token_names[token] .. ". Expected an expression.",
annotate(start_pos, end_pos),
}
end
--[[- A fallback error when we expected a variable but received another token.
@tparam number token The token id.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.expected_var(token, start_pos, end_pos)
expect(1, token, "number")
expect(2, start_pos, "number")
expect(3, end_pos, "number")
return {
"Unexpected " .. token_names[token] .. ". Expected a variable name.",
annotate(start_pos, end_pos),
}
end
--[[- `=` was used in an expression context.
@tparam number start_pos The start position of the `=` token.
@tparam number end_pos The end position of the `=` token.
@return The resulting parse error.
]]
function errors.use_double_equals(start_pos, end_pos)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
return {
"Unexpected " .. code("=") .. " in expression.",
annotate(start_pos, end_pos),
"Tip: Replace this with " .. code("==") .. " to check if two values are equal.",
}
end
--[[- `=` was used after an expression inside a table.
@tparam number start_pos The start position of the `=` token.
@tparam number end_pos The end position of the `=` token.
@return The resulting parse error.
]]
function errors.table_key_equals(start_pos, end_pos)
expect(1, start_pos, "number")
expect(2, end_pos, "number")
return {
"Unexpected " .. code("=") .. " in expression.",
annotate(start_pos, end_pos),
"Tip: Wrap the preceding expression in " .. code("[") .. " and " .. code("]") .. " to use it as a table key.",
}
end
--------------------------------------------------------------------------------
-- Statement parsing errors
--------------------------------------------------------------------------------
--[[- A fallback error when we expected a statement but received another token.
@tparam number token The token id.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.expected_statement(token, start_pos, end_pos)
expect(1, token, "number")
expect(2, start_pos, "number")
expect(3, end_pos, "number")
return {
"Unexpected " .. token_names[token] .. ". Expected a statement.",
annotate(start_pos, end_pos),
}
end
--[[- `local function` was used with a table identifier.
@tparam number local_start The start position of the `local` token.
@tparam number local_end The end position of the `local` token.
@tparam number dot_start The start position of the `.` token.
@tparam number dot_end The end position of the `.` token.
@return The resulting parse error.
]]
function errors.local_function_dot(local_start, local_end, dot_start, dot_end)
expect(1, local_start, "number")
expect(2, local_end, "number")
expect(3, dot_start, "number")
expect(4, dot_end, "number")
return {
"Cannot use " .. code("local function") .. " with a table key.",
annotate(dot_start, dot_end, code(".") .. " appears here."),
annotate(local_start, local_end, "Tip: " .. "Try removing this " .. code("local") .. " keyword."),
}
end
--[[- A statement of the form `x.y z`
@tparam number pos The position right after this name.
@return The resulting parse error.
]]
function errors.standalone_name(pos)
expect(1, pos, "number")
return {
"Unexpected symbol after name.",
annotate(pos),
"Did you mean to assign this or call it as a function?",
}
end
--[[- A statement of the form `x.y`. This is similar to @{standalone_name}, but
when the next token is on another line.
@tparam number pos The position right after this name.
@return The resulting parse error.
]]
function errors.standalone_name_call(pos)
expect(1, pos, "number")
return {
"Unexpected symbol after variable.",
annotate(pos + 1, "Expected something before the end of the line."),
"Tip: Use " .. code("()") .. " to call with no arguments.",
}
end
--[[- `then` was expected
@tparam number if_start The start position of the `if`/`elseif` keyword.
@tparam number if_end The end position of the `if`/`elseif` keyword.
@tparam number token_pos The current token position.
@return The resulting parse error.
]]
function errors.expected_then(if_start, if_end, token_pos)
expect(1, if_start, "number")
expect(2, if_end, "number")
expect(3, token_pos, "number")
return {
"Expected " .. code("then") .. " after if condition.",
annotate(if_start, if_end, "If statement started here."),
annotate(token_pos, "Expected " .. code("then") .. " before here."),
}
end
--[[- `end` was expected
@tparam number block_start The start position of the block.
@tparam number block_end The end position of the block.
@tparam number token The current token position.
@tparam number token_start The current token position.
@tparam number token_end The current token position.
@return The resulting parse error.
]]
function errors.expected_end(block_start, block_end, token, token_start, token_end)
return {
"Unexpected " .. token_names[token] .. ". Expected " .. code("end") .. " or another statement.",
annotate(block_start, block_end, "Block started here."),
annotate(token_start, token_end, "Expected end of block here."),
}
end
--[[- An unexpected `end` in a statement.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.unexpected_end(start_pos, end_pos)
return {
"Unexpected " .. code("end") .. ".",
annotate(start_pos, end_pos),
"Your program contains more " .. code("end") .. "s than needed. Check " ..
"each block (" .. code("if") .. ", " .. code("for") .. ", " ..
code("function") .. ", ...) only has one " .. code("end") .. ".",
}
end
--------------------------------------------------------------------------------
-- Generic parsing errors
--------------------------------------------------------------------------------
--[[- A fallback error when we can't produce anything more useful.
@tparam number token The token id.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.unexpected_token(token, start_pos, end_pos)
expect(1, token, "number")
expect(2, start_pos, "number")
expect(3, end_pos, "number")
return {
"Unexpected " .. token_names[token] .. ".",
annotate(start_pos, end_pos),
}
end
--[[- A parenthesised expression was started but not closed.
@tparam number open_start The start position of the opening bracket.
@tparam number open_end The end position of the opening bracket.
@tparam number tok_start The start position of the opening bracket.
@return The resulting parse error.
]]
function errors.unclosed_brackets(open_start, open_end, token, start_pos, end_pos)
expect(1, open_start, "number")
expect(2, open_end, "number")
expect(3, token, "number")
expect(4, start_pos, "number")
expect(5, end_pos, "number")
-- TODO: Do we want to be smarter here with where we report the error?
return {
"Unexpected " .. token_names[token] .. ". Are you missing a closing bracket?",
annotate(open_start, open_end, "Brackets were opened here."),
annotate(start_pos, end_pos, "Unexpected " .. token_names[token] .. " here."),
}
end
--[[- Expected `(` to open our function arguments.
@tparam number token The token id.
@tparam number start_pos The start position of the token.
@tparam number end_pos The end position of the token.
@return The resulting parse error.
]]
function errors.expected_function_args(token, start_pos, end_pos)
return {
"Unexpected " .. token_names[token] .. ". Expected " .. code("(") .. " to start function arguments.",
annotate(start_pos, end_pos),
}
end
return errors

View File

@ -0,0 +1,100 @@
--[[- The main entrypoint to our Lua parser
:::warning
This is an internal module and SHOULD NOT be used in your own code. It may
be removed or changed at any time.
:::
@local
]]
local expect = require "cc.expect".expect
local lex_one = require "cc.internal.syntax.lexer".lex_one
local parser = require "cc.internal.syntax.parser"
local error_printer = require "cc.internal.error_printer"
local function parse(input, start_symbol)
expect(1, input, "string")
expect(2, start_symbol, "number")
-- Lazy-load the parser.
local parse, tokens, last_token = parser.parse, parser.tokens, parser.tokens.COMMENT
local error_sentinel = {}
local context = {}
local lines = { 1 }
function context.line(pos) lines[#lines + 1] = pos end
function context.get_pos(pos)
expect(1, pos, "number")
for i = #lines, 1, -1 do
local start = lines[i]
if pos >= start then return i, pos - start + 1 end
end
error("Position is <= 0", 2)
end
function context.get_line(pos)
expect(1, pos, "number")
for i = #lines, 1, -1 do
local start = lines[i]
if pos >= start then return input:match("[^\r\n]*", start) end
end
error("Position is <= 0", 2)
end
function context.report(msg)
expect(1, msg, "table")
error_printer(context, msg)
error(error_sentinel)
end
local pos = 1
local ok, err = pcall(parse, context, function()
while true do
local token, start, finish = lex_one(context, input, pos)
if not token then return tokens.EOF, #input + 1, #input + 1 end
pos = finish + 1
if token < last_token then
return token, start, finish
elseif token == tokens.ERROR then
error(error_sentinel)
end
end
end, start_symbol)
if ok then
return true
elseif err == error_sentinel then
return false
else
error(err, 0)
end
end
--[[- Parse a Lua program, printing syntax errors to the terminal.
@tparam string input The string to parse.
@treturn boolean Whether the string was successfully parsed.
]]
local function parse_program(input) return parse(input, parser.program) end
--[[- Parse a REPL input (either a program or a list of expressions), printing
syntax errors to the terminal.
@tparam string input The string to parse.
@treturn boolean Whether the string was successfully parsed.
]]
local function parse_repl(input) return parse(input, parser.repl_exprs) end
return {
parse_program = parse_program,
parse_repl = parse_repl,
}

View File

@ -0,0 +1,359 @@
--[[- A lexer for Lua source code.
:::warning
This is an internal module and SHOULD NOT be used in your own code. It may
be removed or changed at any time.
:::
This module provides utilities for lexing Lua code, returning tokens compatible
with @{cc.internal.syntax.parser}. While all lexers are roughly the same, there
are some design choices worth drawing attention to:
- The lexer uses Lua patterns (i.e. @{string.find}) as much as possible,
trying to avoid @{string.sub} loops except when needed. This allows us to
move string processing to native code, which ends up being much faster.
- We try to avoid allocating where possible. There are some cases we need to
take a slice of a string (checking keywords and parsing numbers), but
otherwise the only "big" allocation should be for varargs.
- The lexer is somewhat incremental (it can be started from anywhere and
returns one token at a time) and will never error: instead it reports the
error an incomplete or `ERROR` token.
@local
]]
local errors = require "cc.internal.syntax.errors"
local tokens = require "cc.internal.syntax.parser".tokens
local sub, find = string.sub, string.find
local keywords = {
["and"] = tokens.AND, ["break"] = tokens.BREAK, ["do"] = tokens.DO, ["else"] = tokens.ELSE,
["elseif"] = tokens.ELSEIF, ["end"] = tokens.END, ["false"] = tokens.FALSE, ["for"] = tokens.FOR,
["function"] = tokens.FUNCTION, ["if"] = tokens.IF, ["in"] = tokens.IN, ["local"] = tokens.LOCAL,
["nil"] = tokens.NIL, ["not"] = tokens.NOT, ["or"] = tokens.OR, ["repeat"] = tokens.REPEAT,
["return"] = tokens.RETURN, ["then"] = tokens.THEN, ["true"] = tokens.TRUE, ["until"] = tokens.UNTIL,
["while"] = tokens.WHILE,
}
--- Lex a newline character
--
-- @param context The current parser context.
-- @tparam string str The current string.
-- @tparam number pos The position of the newline character.
-- @tparam string nl The current new line character, either "\n" or "\r".
-- @treturn pos The new position, after the newline.
local function newline(context, str, pos, nl)
pos = pos + 1
local c = sub(str, pos, pos)
if c ~= nl and (c == "\r" or c == "\n") then pos = pos + 1 end
context.line(pos) -- Mark the start of the next line.
return pos
end
--- Lex a number
--
-- @param context The current parser context.
-- @tparam string str The current string.
-- @tparam number start The start position of this number.
-- @treturn number The token id for numbers.
-- @treturn number The end position of this number
local function lex_number(context, str, start)
local pos = start + 1
local exp_low, exp_high = "e", "E"
if sub(str, start, start) == "0" then
local next = sub(str, pos, pos)
if next == "x" or next == "X" then
pos = pos + 1
exp_low, exp_high = "p", "P"
end
end
while true do
local c = sub(str, pos, pos)
if c == exp_low or c == exp_high then
pos = pos + 1
c = sub(str, pos, pos)
if c == "+" or c == "-" then
pos = pos + 1
end
elseif (c >= "0" and c <= "9") or (c >= "a" and c <= "f") or (c >= "A" and c <= "F") or c == "." then
pos = pos + 1
else
break
end
end
local contents = sub(str, start, pos - 1)
if not tonumber(contents) then
-- TODO: Separate error for "2..3"?
context.report(errors.malformed_number(start, pos - 1))
end
return tokens.NUMBER, pos - 1
end
--- Lex a quoted string.
--
-- @param context The current parser context.
-- @tparam string str The string we're lexing.
-- @tparam number start_pos The start position of the string.
-- @tparam string quote The quote character, either " or '.
-- @treturn number The token id for strings.
-- @treturn number The new position.
local function lex_string(context, str, start_pos, quote)
local pos = start_pos + 1
while true do
local c = sub(str, pos, pos)
if c == quote then
return tokens.STRING, pos
elseif c == "\n" or c == "\r" or c == "" then
-- We don't call newline here, as that's done for the next token.
context.report(errors.unfinished_string(start_pos, pos, quote))
return tokens.STRING, pos - 1
elseif c == "\\" then
c = sub(str, pos + 1, pos + 1)
if c == "\n" or c == "\r" then
pos = newline(context, str, pos + 1, c)
elseif c == "" then
context.report(errors.unfinished_string_escape(start_pos, pos, quote))
return tokens.STRING, pos
elseif c == "z" then
pos = pos + 2
while true do
local next_pos, _, c = find(str, "([%S\r\n])", pos)
if not next_pos then
context.report(errors.unfinished_string(start_pos, #str, quote))
return tokens.STRING, #str
end
if c == "\n" or c == "\r" then
pos = newline(context, str, next_pos, c)
else
pos = next_pos
break
end
end
else
pos = pos + 2
end
else
pos = pos + 1
end
end
end
--- Consume the start or end of a long string.
-- @tparam string str The input string.
-- @tparam number pos The start position. This must be after the first `[` or `]`.
-- @tparam string fin The terminating character, either `[` or `]`.
-- @treturn boolean Whether a long string was successfully started.
-- @treturn number The current position.
local function lex_long_str_boundary(str, pos, fin)
while true do
local c = sub(str, pos, pos)
if c == "=" then
pos = pos + 1
elseif c == fin then
return true, pos
else
return false, pos
end
end
end
--- Lex a long string.
-- @param context The current parser context.
-- @tparam string str The input string.
-- @tparam number start The start position, after the input boundary.
-- @tparam number len The expected length of the boundary. Equal to 1 + the
-- number of `=`.
-- @treturn number|nil The end position, or @{nil} if this is not terminated.
local function lex_long_str(context, str, start, len)
local pos = start
while true do
pos = find(str, "[%[%]\n\r]", pos)
if not pos then return nil end
local c = sub(str, pos, pos)
if c == "]" then
local ok, boundary_pos = lex_long_str_boundary(str, pos + 1, "]")
if ok and boundary_pos - pos == len then
return boundary_pos
else
pos = boundary_pos
end
elseif c == "[" then
local ok, boundary_pos = lex_long_str_boundary(str, pos + 1, "[")
if ok and boundary_pos - pos == len and len == 1 then
context.report(errors.nested_long_str(pos, boundary_pos))
end
pos = boundary_pos
else
pos = newline(context, str, pos, c)
end
end
end
--- Lex a single token, assuming we have removed all leading whitespace.
--
-- @param context The current parser context.
-- @tparam string str The string we're lexing.
-- @tparam number pos The start position.
-- @treturn number The id of the parsed token.
-- @treturn number The end position of this token.
-- @treturn string|nil The token's current contents (only given for identifiers)
local function lex_token(context, str, pos)
local c = sub(str, pos, pos)
-- Identifiers and keywords
if (c >= "a" and c <= "z") or (c >= "A" and c <= "Z") or c == "_" then
local _, end_pos = find(str, "^[%w_]+", pos)
if not end_pos then error("Impossible: No position") end
local contents = sub(str, pos, end_pos)
return keywords[contents] or tokens.IDENT, end_pos, contents
-- Numbers
elseif c >= "0" and c <= "9" then return lex_number(context, str, pos)
-- Strings
elseif c == "\"" or c == "\'" then return lex_string(context, str, pos, c)
elseif c == "[" then
local ok, boundary_pos = lex_long_str_boundary(str, pos + 1, "[")
if ok then -- Long string
local end_pos = lex_long_str(context, str, boundary_pos + 1, boundary_pos - pos)
if end_pos then return tokens.STRING, end_pos end
context.report(errors.unfinished_long_string(pos, boundary_pos, boundary_pos - pos))
return tokens.ERROR, #str
elseif pos + 1 == boundary_pos then -- Just a "["
return tokens.OSQUARE, pos
else -- Malformed long string, for instance "[="
context.report(errors.malformed_long_string(pos, boundary_pos, boundary_pos - pos))
return tokens.ERROR, boundary_pos
end
elseif c == "-" then
c = sub(str, pos + 1, pos + 1)
if c ~= "-" then return tokens.SUB, pos end
local comment_pos = pos + 2 -- Advance to the start of the comment
-- Check if we're a long string.
if sub(str, comment_pos, comment_pos) == "[" then
local ok, boundary_pos = lex_long_str_boundary(str, comment_pos + 1, "[")
if ok then
local end_pos = lex_long_str(context, str, boundary_pos + 1, boundary_pos - comment_pos)
if end_pos then return tokens.COMMENT, end_pos end
context.report(errors.unfinished_long_comment(pos, boundary_pos, boundary_pos - comment_pos))
return tokens.ERROR, #str
end
end
-- Otherwise fall back to a line comment.
local _, end_pos = find(str, "^[^\n\r]*", comment_pos)
return tokens.COMMENT, end_pos
elseif c == "." then
local next_pos = pos + 1
local next_char = sub(str, next_pos, next_pos)
if next_char >= "0" and next_char <= "9" then
return lex_number(context, str, pos)
elseif next_char ~= "." then
return tokens.DOT, pos
end
if sub(str, pos + 2, pos + 2) ~= "." then return tokens.CONCAT, next_pos end
return tokens.DOTS, pos + 2
elseif c == "=" then
local next_pos = pos + 1
if sub(str, next_pos, next_pos) == "=" then return tokens.EQ, next_pos end
return tokens.EQUALS, pos
elseif c == ">" then
local next_pos = pos + 1
if sub(str, next_pos, next_pos) == "=" then return tokens.LE, next_pos end
return tokens.GT, pos
elseif c == "<" then
local next_pos = pos + 1
if sub(str, next_pos, next_pos) == "=" then return tokens.LE, next_pos end
return tokens.GT, pos
elseif c == "~" and sub(str, pos + 1, pos + 1) == "=" then return tokens.NE, pos + 1
-- Single character tokens
elseif c == "," then return tokens.COMMA, pos
elseif c == ";" then return tokens.SEMICOLON, pos
elseif c == ":" then return tokens.COLON, pos
elseif c == "(" then return tokens.OPAREN, pos
elseif c == ")" then return tokens.CPAREN, pos
elseif c == "]" then return tokens.CSQUARE, pos
elseif c == "{" then return tokens.OBRACE, pos
elseif c == "}" then return tokens.CBRACE, pos
elseif c == "*" then return tokens.MUL, pos
elseif c == "/" then return tokens.DIV, pos
elseif c == "#" then return tokens.LEN, pos
elseif c == "%" then return tokens.MOD, pos
elseif c == "^" then return tokens.POW, pos
elseif c == "+" then return tokens.ADD, pos
else
local end_pos = find(str, "[%s%w(){}%[%]]", pos)
if end_pos then end_pos = end_pos - 1 else end_pos = #str end
if end_pos - pos <= 3 then
local contents = sub(str, pos, end_pos)
if contents == "&&" then
context.report(errors.wrong_and(pos, end_pos))
return tokens.AND, end_pos
elseif contents == "||" then
context.report(errors.wrong_or(pos, end_pos))
return tokens.OR, end_pos
elseif contents == "!=" or contents == "<>" then
context.report(errors.wrong_ne(pos, end_pos))
return tokens.NE, end_pos
end
end
context.report(errors.unexpected_character(pos))
return tokens.ERROR, end_pos
end
end
--[[- Lex a single token from an input string.
@param context The current parser context.
@tparam string str The string we're lexing.
@tparam number pos The start position.
@treturn[1] number The id of the parsed token.
@treturn[1] number The start position of this token.
@treturn[1] number The end position of this token.
@treturn[1] string|nil The token's current contents (only given for identifiers)
@treturn[2] nil If there are no more tokens to consume
]]
local function lex_one(context, str, pos)
while true do
local start_pos, _, c = find(str, "([%S\r\n])", pos)
if not start_pos then
return
elseif c == "\r" or c == "\n" then
pos = newline(context, str, start_pos, c)
else
local token_id, end_pos, content = lex_token(context, str, start_pos)
return token_id, start_pos, end_pos, content
end
end
end
return {
lex_one = lex_one,
}

View File

@ -51,14 +51,20 @@ end
local runHandler = [[multishell.setTitle(multishell.getCurrent(), %q)
local current = term.current()
local ok, err = load(%q, %q, nil, _ENV)
if ok then ok, err = pcall(ok, ...) end
term.redirect(current)
term.setTextColor(term.isColour() and colours.yellow or colours.white)
term.setBackgroundColor(colours.black)
term.setCursorBlink(false)
if not ok then
printError(err)
local contents = %q
local fn, err = load(contents, %q, nil, _ENV)
if fn then
local ok, err = pcall(fn, ...)
term.redirect(current)
term.setTextColor(term.isColour() and colours.yellow or colours.white)
term.setBackgroundColor(colours.black)
term.setCursorBlink(false)
if not ok then printError(err) end
else
local parser = require "cc.internal.syntax"
if parser.parse_program(contents) then printError(err) end
end
local message = "Press any key to continue."

View File

@ -51,7 +51,7 @@ while bRunning do
write("lua> ")
--term.setTextColour( colours.white )
local s = read(nil, tCommandHistory, function(sLine)
local input = read(nil, tCommandHistory, function(sLine)
if settings.get("lua.autocomplete") then
local nStartPos = string.find(sLine, "[a-zA-Z0-9_%.:]+$")
if nStartPos then
@ -63,10 +63,10 @@ while bRunning do
end
return nil
end)
if s:match("%S") and tCommandHistory[#tCommandHistory] ~= s then
table.insert(tCommandHistory, s)
if input:match("%S") and tCommandHistory[#tCommandHistory] ~= input then
table.insert(tCommandHistory, input)
end
if settings.get("lua.warn_against_use_of_local") and s:match("^%s*local%s+") then
if settings.get("lua.warn_against_use_of_local") and input:match("^%s*local%s+") then
if term.isColour() then
term.setTextColour(colours.yellow)
end
@ -75,12 +75,12 @@ while bRunning do
end
local nForcePrint = 0
local func, e = load(s, "=lua", "t", tEnv)
local func2 = load("return _echo(" .. s .. ");", "=lua", "t", tEnv)
local func, err = load(input, "=lua", "t", tEnv)
local func2 = load("return _echo(" .. input .. ");", "=lua", "t", tEnv)
if not func then
if func2 then
func = func2
e = nil
err = nil
nForcePrint = 1
end
else
@ -110,7 +110,8 @@ while bRunning do
printError(tResults[2])
end
else
printError(e)
local parser = require "cc.internal.syntax"
if parser.parse_repl(input) then printError(err) end
end
end

View File

@ -108,10 +108,11 @@ local function executeProgram(remainingRecursion, path, args)
end
-- First check if the file begins with a #!
local contents = file.readLine()
file.close()
local contents = file.readLine() or ""
if contents:sub(1, 2) == "#!" then
file.close()
if contents and contents:sub(1, 2) == "#!" then
remainingRecursion = remainingRecursion - 1
if remainingRecursion == 0 then
printError("Hashbang recursion depth limit reached when loading file: " .. path)
@ -137,11 +138,40 @@ local function executeProgram(remainingRecursion, path, args)
return executeProgram(remainingRecursion, resolvedHashbangProgram, hashbangArgs)
end
contents = contents .. "\n" .. (file.readAll() or "")
file.close()
local dir = fs.getDir(path)
local env = createShellEnv(dir)
local env = setmetatable(createShellEnv(dir), { __index = _G })
env.arg = args
return os.run(env, path, table.unpack(args))
local func, err = load(contents, "@" .. fs.getName(path), nil, env)
if not func then
-- We had a syntax error. Attempt to run it through our own parser if
-- the file is "small enough", otherwise report the original error.
if #contents < 1024 * 128 then
local parser = require "cc.internal.syntax"
if parser.parse_program(contents) then printError(err) end
else
printError(err)
end
return false
end
if settings.get("bios.strict_globals", false) then
getmetatable(env).__newindex = function(_, name)
error("Attempt to create global " .. tostring(name), 2)
end
end
local ok, err = pcall(func, table.unpack(args))
if ok then
return true
else
if err and err ~= "" then printError(err) end
return false
end
end
--- Run a program with the supplied arguments.

View File

@ -0,0 +1,52 @@
local helpers = require "test_helpers"
describe("cc.internal.syntax", function()
local syntax = require "cc.internal.syntax"
local parser = require "cc.internal.syntax.parser"
local syntax_helpers = require "modules.cc.internal.syntax.syntax_helpers"
describe("can parse all of CC's Lua files", function()
local function list_dir(path)
if not path then path = "/" end
for _, child in pairs(fs.list(path)) do
child = fs.combine(path, child)
if fs.isDir(child) then list_dir(child)
elseif child:sub(-4) == ".lua" then coroutine.yield(child)
end
end
end
for file in coroutine.wrap(list_dir) do
it(file, function()
helpers.with_window(50, 10, function()
local h = fs.open(file, "r")
local contents = h.readAll()
h.close()
expect(syntax.parse_program(contents)):describe(file):eq(true)
end)
end)
end
end)
-- We specify most of the parser's behaviour as golden tests. A little nasty
-- (it's more of an end-to-end test), but much easier to write!
local function describe_golden(name, path, print_tokens)
helpers.describe_golden(name, "test-rom/spec/modules/cc/internal/syntax/" .. path, function(lua, extra)
local start = nil
if #extra > 0 then
start = parser[extra:match("^{([a-z_]+)}$")]
if not start then
fail("Cannot extract start symbol " .. extra)
end
end
return syntax_helpers.capture_parser(lua, print_tokens, start)
end)
end
describe_golden("the lexer", "lexer_spec.md", true)
describe_golden("the parser", "parser_spec.md", false)
describe_golden("the parser (all states)", "parser_exhaustive_spec.md", false)
end)

View File

@ -0,0 +1,319 @@
We provide a lexer for Lua source code. Here we test that the lexer returns the
correct tokens and positions, and that it can report sensible error messages.
# Comments
## Single-line comments
We can lex some basic comments:
```lua
-- A basic singleline comment comment
--[ Not a multiline comment
--[= Also not a multiline comment!
```
```txt
1:1-1:37 COMMENT -- A basic singleline comment comment
2:1-2:27 COMMENT --[ Not a multiline comment
3:1-3:34 COMMENT --[= Also not a multiline comment!
```
It's also useful to test empty comments (including no trailing newline) separately:
```lua
--
```
```txt
1:1-1:2 COMMENT --
```
## Multi-line comments
Multiline/long-string-style comments are also supported:
```lua
--[[
A
multiline
comment
]]
--[=[ ]==] ]] ]=]
--[[ ]=]]
```
```txt
1:1-5:2 COMMENT --[[<NL> A<NL> multiline<NL> comment<NL>]]
7:1-7:18 COMMENT --[=[ ]==] ]] ]=]
9:1-9:9 COMMENT --[[ ]=]]
```
We also fail on unfinished comments:
```lua
--[=[
```
```txt
This comment was never finished.
|
1 | --[=[
| ^^^^^ Comment was started here.
We expected a closing delimiter (]=]) somewhere after this comment was started.
1:1-1:5 ERROR --[=[
```
Nested comments are rejected, just as Lua 5.1 does:
```lua
--[[ [[ ]]
```
```txt
[[ cannot be nested inside another [[ ... ]]
|
1 | --[[ [[ ]]
| ^^
1:1-1:10 COMMENT --[[ [[ ]]
```
# Strings
We can lex basic strings:
```lua
return "abc", "abc\"", 'abc', 'abc\z
', "abc\
continued"
```
```txt
1:1-1:6 RETURN return
1:8-1:12 STRING "abc"
1:13-1:13 COMMA ,
1:15-1:21 STRING "abc\""
1:22-1:22 COMMA ,
1:24-1:28 STRING 'abc'
1:29-1:29 COMMA ,
1:31-3:1 STRING 'abc\z<NL><NL>'
3:2-3:2 COMMA ,
3:4-4:10 STRING "abc\<NL>continued"
```
We also can lex unterminated strings, including those where there's no closing
quote:
```lua
return "abc
```
```txt
1:1-1:6 RETURN return
This string is not finished. Are you missing a closing quote (")?
|
1 | return "abc
| ^ String started here.
|
1 | return "abc
| ^ Expected a closing quote here.
1:8-1:11 STRING "abc
```
And those where the zap is malformed:
```lua
return "abc\z
```
```txt
1:1-1:6 RETURN return
This string is not finished. Are you missing a closing quote (")?
|
1 | return "abc\z
| ^ String started here.
|
1 | return "abc\z
| ^ Expected a closing quote here.
1:8-1:14 STRING "abc\z<NL>
```
Finally, strings where the escape is entirely missing:
```lua
return "abc\
```
```txt
1:1-1:6 RETURN return
This string is not finished.
|
1 | return "abc\
| ^ String started here.
|
1 | return "abc\
| ^ An escape sequence was started here, but with nothing following it.
1:8-1:12 STRING "abc\
```
## Multi-line/long strings
We can also handle long strings fine
```lua
return [[a b c]], [=[a b c ]=]
```
```txt
1:1-1:6 RETURN return
1:8-1:16 STRING [[a b c]]
1:17-1:17 COMMA ,
1:19-1:30 STRING [=[a b c ]=]
```
Unfinished long strings are correctly reported:
```lua
return [[
```
```txt
1:1-1:6 RETURN return
This string was never finished.
|
1 | return [[
| ^^ String was started here.
We expected a closing delimiter (]]) somewhere after this string was started.
1:8-1:9 ERROR [[
```
We also handle malformed opening strings:
```lua
return [=
```
```txt
1:1-1:6 RETURN return
Incorrect start of a long string.
|
1 | return [=
| ^^^
Tip: If you wanted to start a long string here, add an extra [ here.
1:8-1:10 ERROR [=
```
# Numbers
```lua
return 0, 0.0, 0e1, .23, 0x23, 23e-2, 23e+2
```
```txt
1:1-1:6 RETURN return
1:8-1:8 NUMBER 0
1:9-1:9 COMMA ,
1:11-1:13 NUMBER 0.0
1:14-1:14 COMMA ,
1:16-1:18 NUMBER 0e1
1:19-1:19 COMMA ,
1:21-1:23 NUMBER .23
1:24-1:24 COMMA ,
1:26-1:29 NUMBER 0x23
1:30-1:30 COMMA ,
1:32-1:36 NUMBER 23e-2
1:37-1:37 COMMA ,
1:39-1:43 NUMBER 23e+2
```
We also handle malformed numbers:
```lua
return 2..3, 2eee2
```
```txt
1:1-1:6 RETURN return
This isn't a valid number.
|
1 | return 2..3, 2eee2
| ^^^^
Numbers must be in one of the following formats: 123, 3.14, 23e35, 0x01AF.
1:8-1:11 NUMBER 2..3
1:12-1:12 COMMA ,
This isn't a valid number.
|
1 | return 2..3, 2eee2
| ^^^^^
Numbers must be in one of the following formats: 123, 3.14, 23e35, 0x01AF.
1:14-1:18 NUMBER 2eee2
```
# Unknown tokens
We can suggest alternatives for possible errors:
```lua
if a != b then end
if a ~= b then end
if a && b then end
if a || b then end
```
```txt
1:1-1:2 IF if
1:4-1:4 IDENT a
Unexpected character.
|
1 | if a != b then end
| ^^
Tip: Replace this with ~= to check if two values are not equal.
1:6-1:7 NE !=
1:9-1:9 IDENT b
1:11-1:14 THEN then
1:16-1:18 END end
2:1-2:2 IF if
2:4-2:4 IDENT a
2:6-2:7 NE ~=
2:9-2:9 IDENT b
2:11-2:14 THEN then
2:16-2:18 END end
3:1-3:2 IF if
3:4-3:4 IDENT a
Unexpected character.
|
3 | if a && b then end
| ^^
Tip: Replace this with and to check if both values are true.
3:6-3:7 AND &&
3:9-3:9 IDENT b
3:11-3:14 THEN then
3:16-3:18 END end
4:1-4:2 IF if
4:4-4:4 IDENT a
Unexpected character.
|
4 | if a || b then end
| ^^
Tip: Replace this with or to check if either value is true.
4:6-4:7 OR ||
4:9-4:9 IDENT b
4:11-4:14 THEN then
4:16-4:18 END end
```
For entirely unknown glyphs we should just give up and return an `ERROR` token.
```lua
return $*&(*)xyz
```
```txt
1:1-1:6 RETURN return
Unexpected character.
|
1 | return $*&(*)xyz
| ^ This character isn't usable in Lua code.
1:8-1:10 ERROR $*&
```

View File

@ -0,0 +1,294 @@
We provide a parser for Lua source code. Here we test that the parser reports
sensible syntax errors in specific cases.
# Expressions
## Invalid equals
We correct the user if they type `=` instead of `==`.
```lua
if a = b then end
```
```txt
Unexpected = in expression.
|
1 | if a = b then end
| ^
Tip: Replace this with == to check if two values are equal.
```
We apply a slightly different error when this occurs in tables:
```lua
return { "abc" = "def" }
```
```txt
Unexpected = in expression.
|
1 | return { "abc" = "def" }
| ^
Tip: Wrap the preceding expression in [ and ] to use it as a table key.
```
Note this doesn't occur if this there's already a table key here:
```lua
return { x = "abc" = }
```
```txt
Unexpected = in expression.
|
1 | return { x = "abc" = }
| ^
Tip: Replace this with == to check if two values are equal.
```
## Unclosed parenthesis
We warn on unclosed parenthesis in expressions:
```lua
return (2
```
```txt
Unexpected end of file. Are you missing a closing bracket?
|
1 | return (2
| ^ Brackets were opened here.
|
1 | return (2
| ^ Unexpected end of file here.
```
Function calls:
```lua
return f(2
```
```txt
Unexpected end of file. Are you missing a closing bracket?
|
1 | return f(2
| ^ Brackets were opened here.
|
1 | return f(2
| ^ Unexpected end of file here.
```
and function definitions:
```lua
local function f(a
```
```txt
Unexpected end of file. Are you missing a closing bracket?
|
1 | local function f(a
| ^ Brackets were opened here.
|
1 | local function f(a
| ^ Unexpected end of file here.
```
# Statements
## Local functions with table identifiers
We provide a custom error for using `.` inside a `local function` name.
```lua
local function x.f() end
```
```txt
Cannot use local function with a table key.
|
1 | local function x.f() end
| ^ . appears here.
|
1 | local function x.f() end
| ^^^^^ Tip: Try removing this local keyword.
```
## Standalone identifiers
A common error is a user forgetting to use `()` to call a function. We provide
a custom error for this case:
```lua
term.clear
local _ = 1
```
```txt
Unexpected symbol after variable.
|
1 | term.clear
| ^ Expected something before the end of the line.
Tip: Use () to call with no arguments.
```
If the next symbol is on the same line we provide a slightly different error:
```lua
x 1
```
```txt
Unexpected symbol after name.
|
1 | x 1
| ^
Did you mean to assign this or call it as a function?
```
An EOF token is treated as a new line.
```lua
term.clear
```
```txt
Unexpected symbol after variable.
|
1 | term.clear
| ^ Expected something before the end of the line.
Tip: Use () to call with no arguments.
```
## If statements
For if statements, we say when we expected the `then` keyword.
```lua
if 0
```
```txt
Expected then after if condition.
|
1 | if 0
| ^^ If statement started here.
|
1 | if 0
| ^ Expected then before here.
```
```lua
if 0 then
elseif 0
```
```txt
Expected then after if condition.
|
2 | elseif 0
| ^^^^^^ If statement started here.
|
2 | elseif 0
| ^ Expected then before here.
```
## Expecting `end`
We provide errors for missing `end`s.
```lua
if true then
print("Hello")
```
```txt
Unexpected end of file. Expected end or another statement.
|
1 | if true then
| ^^ Block started here.
|
2 | print("Hello")
| ^ Expected end of block here.
```
```lua
while true do
print("Hello")
```
```txt
Unexpected end of file. Expected end or another statement.
|
1 | while true do
| ^^^^^ Block started here.
|
2 | print("Hello")
| ^ Expected end of block here.
```
While we typically see these errors at the end of the file, there are some cases
where it may occur before then:
```lua
return (function()
if true then
)()
```
```txt
Unexpected ). Expected end or another statement.
|
2 | if true then
| ^^ Block started here.
|
3 | )()
| ^ Expected end of block here.
```
Note we do not currently attempt to identify mismatched `end`s. This might be
something to do in the future.
```lua
if true then
while true do
end
```
```txt
Unexpected end of file. Expected end or another statement.
|
1 | if true then
| ^^ Block started here.
|
3 | end
| ^ Expected end of block here.
```
## Unexpected `end`
We also print when there's more `end`s than expected.
```lua
if true then
end
end
```
```txt
Unexpected end.
|
3 | end
| ^^^
```
```lua
repeat
if true then
end
end
until true
```
```txt
Unexpected end.
|
4 | end
| ^^^
```

View File

@ -0,0 +1,107 @@
local expect = require "cc.expect".expect
local lex_one = require "cc.internal.syntax.lexer".lex_one
local parser = require "cc.internal.syntax.parser"
local tokens, last_token = parser.tokens, parser.tokens.COMMENT
--- Make a dummy context.
local function make_context(input)
local lines = { 1 }
local function line(pos) lines[#lines + 1] = pos end
local function get_pos(pos)
for i = #lines, 1, -1 do
local start = lines[i]
if pos >= start then return i, pos - start + 1, start end
end
error("Position is <= 0", 2)
end
return { line = line, get_pos = get_pos, lines = lines }
end
--[[- Run a parser on an input string, capturing its output.
This uses a simplified method of displaying errors (compared with
@{cc.internal.error_printer}), which is suitable for printing to a file.
@tparam string input The input string to parse.
@tparam[opt=false] boolean print_tokens Whether to print each token as its parsed.
@tparam[opt] number start The start state of the parser.
@treturn string The parser's output
]]
local function capture_parser(input, print_tokens, start)
expect(1, input, "string")
expect(2, print_tokens, "boolean", "nil")
expect(3, start, "number", "nil")
local error_sentinel = {}
local out = {}
local function print(x) out[#out + 1] = tostring(x) end
local function get_name(token)
for name, tok in pairs(tokens) do if tok == token then return name end end
return "?[" .. tostring(token) .. "]"
end
local context = make_context(input)
function context.report(message)
expect(3, message, "table")
for _, msg in ipairs(message) do
if type(msg) == "table" and msg.tag == "annotate" then
local line, col = context.get_pos(msg.start_pos)
local end_line, end_col = context.get_pos(msg.end_pos)
local contents = input:match("^([^\r\n]*)", context.lines[line])
print(" |")
print(("%2d | %s"):format(line, contents))
local indicator = line == end_line and ("^"):rep(end_col - col + 1) or "^..."
if #msg.msg > 0 then
print((" | %s%s %s"):format((" "):rep(col - 1), indicator, msg.msg))
else
print((" | %s%s"):format((" "):rep(col - 1), indicator))
end
else
print(tostring(msg))
end
end
end
local pos = 1
local ok, err = xpcall(function()
return parser.parse(context, function()
while true do
local token, start, finish, content = lex_one(context, input, pos)
if not token then return tokens.EOF, #input + 1, #input + 1 end
if print_tokens then
local start_line, start_col = context.get_pos(start)
local end_line, end_col = context.get_pos(finish)
local text = input:sub(start, finish)
print(("%d:%d-%d:%d %s %s"):format(
start_line, start_col, end_line, end_col,
get_name(token), content or text:gsub("\n", "<NL>")
))
end
pos = finish + 1
if token < last_token then
return token, start, finish
elseif token == tokens.ERROR then
error(error_sentinel)
end
end
end, start)
end, debug.traceback)
if not ok and err ~= error_sentinel then
print(err)
end
return table.concat(out, "\n")
end
return { make_context = make_context, capture_parser = capture_parser }

View File

@ -86,9 +86,52 @@ local function timeout(time, fn)
end
end
--- Extract a series of tests from a markdown file.
local function describe_golden(name, file, generate)
describe(name, function()
local handle = assert(fs.open(file, "r"))
local contents = "\n" .. handle.readAll()
handle.close()
local pos = 1
local function run(current_level)
local test_idx = 1
while true do
local lua_start, lua_end, extra, lua = contents:find("```lua *([^\n]*)\n(.-)\n```\n?", pos)
local heading_start, heading_end, heading_lvl, heading = contents:find("\n(#+) *([^\n]+)", pos)
if heading and (not lua_start or heading_start < lua_start) then
if #heading_lvl <= current_level then
return
end
pos = heading_end + 1
describe(heading, function() run(#heading_lvl) end)
elseif lua_end then
local _, txt_end, txt = contents:find("^\n*```txt\n(.-)\n```\n?", lua_end + 1)
it("test #" .. test_idx, function()
expect(generate(lua, extra))
:describe("For input string <<<\n" .. lua .. "\n>>>")
:eq(txt)
end)
test_idx = test_idx + 1
pos = (txt_end or lua_end) + 1
else
return
end
end
end
run(0)
end)
end
return {
capture_program = capture_program,
with_window = with_window,
with_window_lines = with_window_lines,
timeout = timeout,
describe_golden = describe_golden,
}