git @ Cat's Eye Technologies Dieter / master src / dieter / scanner.py
master

Tree @master (Download .tar.gz)

scanner.py @masterraw · history · blame

# -*- coding: utf-8 -*-

"""
scanner.py -- lexical scanner for the Dieter programming language.
$Id: scanner.py 382 2010-01-28 23:40:43Z cpressey $
"""

class Scanner(object):
    """
    A lexical scanner.
    """

    def __init__(self, input):
        """
        Create a new Scanner object that will consume the given
        UTF-8 encoded input string.
        """
        try:
            self._input = unicode(input, 'utf-8')
        except NameError:
            self._input = input.decode('utf-8')
        self._token = None
        self.scan()

    def scan(self):
        """
        Consume a token from the input.
        """
        self._token = ""
        if len(self._input) == 0:
            self.toktype = "eof"
            return
        while self._input[0].isspace():
            self._input = self._input[1:]
            if len(self._input) == 0:
                self.toktype = "eof"
                return
        if self._input[0].isalpha():
            while self._input[0].isalnum() or self._input[0] == '_':
                self._token += self._input[0]
                self._input = self._input[1:]
            self.toktype = "ident"
        elif self._input[0].isdigit():
            while self._input[0].isdigit():
                self._token += self._input[0]
                self._input = self._input[1:]
            self.toktype = "int"
            self.tokval = int(self._token)
        elif self._input[:1] == "\"":
            st = ""
            self._input = self._input[1:]
            while self._input[:1] != "\"":
                st += self._input[:1]
                self._input = self._input[1:]
            self._input = self._input[1:]
            self.toktype = "string"
            self.tokval = st
        elif self._input[:2] == ':=':
            self._token = ':='
            self._input = self._input[2:]
            self.toktype = "op"
        elif self._input[:2] == '/*':
            while self._input[:2] != '*/':
                self._input = self._input[1:]
            self._input = self._input[2:]
            return self.scan()
        elif self._input[:1] == u"♥":
            self._token = self._input[:1]
            self._input = self._input[1:]
            self.toktype = "op"
        else:
            self._token = self._input[0]
            self._input = self._input[1:]
            self.toktype = "op"
        # print("scanned: '" + str(self._token) + "'")
    
    def get_token(self):
        """
        Return the current token as a string.  Using the read-only
        token property is preferred for readability.
        """
        return self._token
    
    token = property(get_token)

    def grab(self):
        """
        Return the current token as a string, and advance.
        """
        t = self._token
        self.scan()
        return t

    def expect(self, str):
        """
        Expect a certain token to be in the input, and complain
        if it is not.
        """
        if self._token == str:
            self.scan()
        else:
            self.error("expected " + str + ", found " + self._token)

    def error(self, str):
        """
        Log the given scan error.
        """
        print("error: " + str)
        self.scan()