--
-- decoy.lexer
--
-- SPDX-FileCopyrightText: Copyright (c) 2023-2024 Chris Pressey, Cat's Eye Technologies.
-- This work is distributed under a 2-clause BSD license. For more information, see:
-- SPDX-License-Identifier: LicenseRef-BSD-2-Clause-X-Decoy
table = require "table"
require "decoy.model"
--[[ ========== SCANNER ========== ]]--
local function isdigit(s)
return string.find("0123456789", s, 1, true) ~= nil
end
local function islower(s)
return string.find("abcdefghijklmnopqrstuvwxyz", s, 1, true) ~= nil
end
local function isupper(s)
return string.find("ABCDEFGHIJKLMNOPQRSTUVWXYZ", s, 1, true) ~= nil
end
local function isalpha(s)
return islower(s) or isupper(s)
end
local function isalnum(s)
return isalpha(s) or isdigit(s)
end
local function issep(s)
return string.find("()", s, 1, true) ~= nil
end
local function isspace(s)
return string.find(" \t\n\r", s, 1, true) ~= nil
end
local function iseol(s)
return string.find("\n\r", s, 1, true) ~= nil
end
local function issymbolic(s)
return (not issep(s)) and (not isspace(s))
end
local Lexer = {}
function Lexer:get_token_text()
return self._text
end
function Lexer:get_token_type()
return self._type
end
function Lexer:is_eof()
return self._type == "EOF"
end
function Lexer:set_token(text, type)
self._text = text
self._type = type
debug("scanner", "set_token " .. text .. " (" .. type .. ")")
end
function Lexer:scan()
self:scan_impl()
debug("scanner", "scanned '" .. self._text .. "' (" .. self._type .. ")")
return self._text
end
function Lexer:scan_impl()
-- TODO: count pos and line
local string = self.string
-- discard leading whitespace
while (isspace(string:sub(1,1)) or string:sub(1,1) == ";") and string ~= "" do
if isspace(string:sub(1,1)) then
string = string:sub(2)
elseif string:sub(1,1) == ";" then
local len = 1
while not iseol(string:sub(1+len,1+len)) do
len = len + 1
end
string = string:sub(1+len)
end
end
-- check for end of input
if string == "" then
self:set_token("EOF", "EOF")
self.string = string
return
end
-- one character token
if issep(string:sub(1,1)) then
local c = string:sub(1,1)
string = string:sub(2)
self:set_token(c, "separator")
self.string = string
return
end
-- literal decimal number
if string:sub(1,1) == "-" or isdigit(string:sub(1,1)) then
local len = 0
if string:sub(1,1) == "-" then
len = len + 1
end
while isdigit(string:sub(1+len,1+len)) and len <= string:len() do
len = len + 1
end
if string:sub(1+len,1+len) == "." then
len = len + 1
while isdigit(string:sub(1+len,1+len)) and len <= string:len() do
len = len + 1
end
end
-- handle exception: - with nothing after it
if string:sub(1, len) ~= "-" then
self:set_token(string:sub(1, len), "numlit")
string = string:sub(len + 1)
self.string = string
return
end
end
-- quoted string
if string:sub(1,1) == "\"" then
local len = 1
while string:sub(1+len,1+len) ~= "\"" and len <= string:len() do
len = len + 1
end
len = len + 1 -- skip over closing quote
local word = string:sub(2, 1+len-2)
string = string:sub(1+len)
self:set_token(word, "strlit")
self.string = string
return
end
-- symbol
if issymbolic(string:sub(1,1)) then
local len = 0
while issymbolic(string:sub(1+len,1+len)) and len <= string:len() do
len = len + 1
end
local word = string:sub(1, 1+len-1)
string = string:sub(1+len)
self:set_token(word, "symbol")
self.string = string
return
end
-- anything else => one character token
local c = string:sub(1,1)
string = string:sub(2)
self:set_token(c, "operator")
self.string = string
end
function Lexer:consume(s)
if self._text == s then
self:scan()
return true
else
return false
end
end
function Lexer:consume_type(t)
if self._type == t then
self:scan()
return true
else
return false
end
end
function Lexer:expect(s)
if self._text == s then
self:scan()
else
error(
"expected '" .. s ..
"', found '" .. self._text .. "'"
)
end
end
Lexer.new = function(s)
local self = {
string = s,
_text = nil,
_type = nil,
}
setmetatable(self, {__index = Lexer})
debug("scanner", "created scanner with string '" .. s .. "'")
return self
end
return Lexer