git @ Cat's Eye Technologies Decoy / master src / decoy / lexer.lua
master

Tree @master (Download .tar.gz)

lexer.lua @masterraw · history · blame

--
-- decoy.lexer
--

-- SPDX-FileCopyrightText: Copyright (c) 2023-2024 Chris Pressey, Cat's Eye Technologies.
-- This work is distributed under a 2-clause BSD license. For more information, see:
-- SPDX-License-Identifier: LicenseRef-BSD-2-Clause-X-Decoy

table = require "table"

require "decoy.model"


--[[ ========== SCANNER ========== ]]--

local function isdigit(s)
    return string.find("0123456789", s, 1, true) ~= nil
end

local function islower(s)
    return string.find("abcdefghijklmnopqrstuvwxyz", s, 1, true) ~= nil
end

local function isupper(s)
    return string.find("ABCDEFGHIJKLMNOPQRSTUVWXYZ", s, 1, true) ~= nil
end

local function isalpha(s)
    return islower(s) or isupper(s)
end

local function isalnum(s)
    return isalpha(s) or isdigit(s)
end

local function issep(s)
    return string.find("()", s, 1, true) ~= nil
end

local function isspace(s)
    return string.find(" \t\n\r", s, 1, true) ~= nil
end

local function iseol(s)
    return string.find("\n\r", s, 1, true) ~= nil
end

local function issymbolic(s)
    return (not issep(s)) and (not isspace(s))
end

local Lexer = {}

function Lexer:get_token_text()
    return self._text
end

function Lexer:get_token_type()
    return self._type
end

function Lexer:is_eof()
    return self._type == "EOF"
end

function Lexer:set_token(text, type)
    self._text = text
    self._type = type
    debug("scanner", "set_token " .. text .. " (" .. type .. ")")
end

function Lexer:scan()
    self:scan_impl()
    debug("scanner", "scanned '" .. self._text .. "' (" .. self._type .. ")")
    return self._text
end

function Lexer:scan_impl()
    -- TODO: count pos and line
    local string = self.string

    -- discard leading whitespace
    while (isspace(string:sub(1,1)) or string:sub(1,1) == ";") and string ~= "" do
        if isspace(string:sub(1,1)) then
            string = string:sub(2)
        elseif string:sub(1,1) == ";" then
            local len = 1
            while not iseol(string:sub(1+len,1+len)) do
                len = len + 1
            end
            string = string:sub(1+len)
        end
    end

    -- check for end of input
    if string == "" then
        self:set_token("EOF", "EOF")
        self.string = string
        return
    end

    -- one character token
    if issep(string:sub(1,1)) then
        local c = string:sub(1,1)
        string = string:sub(2)
        self:set_token(c, "separator")
        self.string = string
        return
    end

    -- literal decimal number
    if string:sub(1,1) == "-" or isdigit(string:sub(1,1)) then
        local len = 0
        if string:sub(1,1) == "-" then
            len = len + 1
        end
        while isdigit(string:sub(1+len,1+len)) and len <= string:len() do
            len = len + 1
        end
        if string:sub(1+len,1+len) == "." then
            len = len + 1
            while isdigit(string:sub(1+len,1+len)) and len <= string:len() do
                len = len + 1
            end
        end
        -- handle exception: - with nothing after it
        if string:sub(1, len) ~= "-" then
            self:set_token(string:sub(1, len), "numlit")
            string = string:sub(len + 1)
            self.string = string
            return
        end
    end

    -- quoted string
    if string:sub(1,1) == "\"" then
        local len = 1
        while string:sub(1+len,1+len) ~= "\"" and len <= string:len() do
            len = len + 1
        end
        len = len + 1  -- skip over closing quote
        local word = string:sub(2, 1+len-2)
        string = string:sub(1+len)
        self:set_token(word, "strlit")
        self.string = string
        return
    end

    -- symbol
    if issymbolic(string:sub(1,1)) then
        local len = 0
        while issymbolic(string:sub(1+len,1+len)) and len <= string:len() do
            len = len + 1
        end
        local word = string:sub(1, 1+len-1)
        string = string:sub(1+len)
        self:set_token(word, "symbol")
        self.string = string
        return
    end

    -- anything else => one character token
    local c = string:sub(1,1)
    string = string:sub(2)
    self:set_token(c, "operator")
    self.string = string
end

function Lexer:consume(s)
    if self._text == s then
        self:scan()
        return true
    else
        return false
    end
end

function Lexer:consume_type(t)
    if self._type == t then
        self:scan()
        return true
    else
        return false
    end
end

function Lexer:expect(s)
    if self._text == s then
        self:scan()
    else
        error(
            "expected '" .. s ..
            "', found '" .. self._text .. "'"
        )
    end
end

Lexer.new = function(s)
    local self = {
        string = s,
        _text = nil,
        _type = nil,
    }

    setmetatable(self, {__index = Lexer})

    debug("scanner", "created scanner with string '" .. s .. "'")

    return self
end

return Lexer