git @ Cat's Eye Technologies Eqthy / master src / eqthy / scanner.py
master

Tree @master (Download .tar.gz)

scanner.py @masterraw · history · blame

# Copyright (c) 2022-2024 Chris Pressey, Cat's Eye Technologies
# This file is distributed under a 2-clause BSD license.  See LICENSES directory:
# SPDX-License-Identifier: LicenseRef-BSD-2-Clause-X-Eqthy

# encoding: UTF-8

import re
from typing import Union


class EqthySyntaxError(ValueError):
    def __init__(self, filename: str, line_number: int, message: str):
        super(EqthySyntaxError, self).__init__(filename, line_number, message)

    def __str__(self) -> str:
        return "{}, line {}: {}".format(self.args[0], self.args[1], self.args[2])


class Scanner(object):
    def __init__(self, text: str, filename: str):
        self.text = text
        self.filename = filename
        self.token: Union[str, None] = None
        self.type: Union[str, None] = None
        self.pos: int = 0
        self.line_number: int = 1
        self.scan()

    def scan_pattern(self, pattern: str, type: str, token_group: int = 1) -> bool:
        pattern = r'(' + pattern + r')'
        regexp = re.compile(pattern, flags=re.DOTALL)
        match = regexp.match(self.text, pos=self.pos)
        if not match:
            return False
        else:
            self.type = type
            self.token = match.group(token_group)
            self.pos += len(match.group(0))
            self.line_number += self.token.count('\n')
            return True

    def consume_whitespace(self) -> None:
        self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
        while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
            self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
        if self.pos >= len(self.text):
            self.token = None
            self.type = 'EOF'

    def scan(self) -> None:
        self.consume_whitespace()
        if self.type == 'EOF':
            return
        if self.scan_pattern(r'\,|\@|\+|\:|\<|\>|\{|\}|\[|\]|\^', 'operator'):
            return
        if self.scan_pattern(r'\d+', 'integer literal'):
            return
        if self.scan_pattern(r'\"(.*?)\"', 'string literal', token_group=2):
            return
        if self.scan_pattern(r'#[^])\s]+', 'identifier'):
            return
        if self.scan_pattern(r'\w+', 'identifier'):
            return
        if self.scan_pattern(r'.', 'unknown character'):
            return
        else:
            raise AssertionError("this should never happen, self.text=({}), self.pos=({})".format(self.text, self.pos))

    def expect(self, token: str) -> None:
        if self.token == token:
            self.scan()
        else:
            self.syntax_error("Expected '{}', but found '{}'".format(token, self.token))

    def on(self, *tokens: str) -> bool:
        return self.token in tokens

    def on_type(self, type: str) -> bool:
        return self.type == type

    def check_eof(self) -> None:
        self.consume_whitespace()
        if self.type != 'EOF':
            self.syntax_error("Expected end of document, but found '{}' ({})".format(self.token, self.type))

    def consume(self, token: str) -> bool:
        if self.token == token:
            self.scan()
            return True
        else:
            return False

    def syntax_error(self, msg: str) -> None:
        raise EqthySyntaxError(self.filename, self.line_number, msg)