# -*- coding: utf-8 -*-
"""
scanner.py -- lexical scanner for the Dieter programming language.
$Id: scanner.py 382 2010-01-28 23:40:43Z cpressey $
"""
class Scanner(object):
"""
A lexical scanner.
"""
def __init__(self, input):
"""
Create a new Scanner object that will consume the given
UTF-8 encoded input string.
"""
try:
self._input = unicode(input, 'utf-8')
except NameError:
self._input = input.decode('utf-8')
self._token = None
self.scan()
def scan(self):
"""
Consume a token from the input.
"""
self._token = ""
if len(self._input) == 0:
self.toktype = "eof"
return
while self._input[0].isspace():
self._input = self._input[1:]
if len(self._input) == 0:
self.toktype = "eof"
return
if self._input[0].isalpha():
while self._input[0].isalnum() or self._input[0] == '_':
self._token += self._input[0]
self._input = self._input[1:]
self.toktype = "ident"
elif self._input[0].isdigit():
while self._input[0].isdigit():
self._token += self._input[0]
self._input = self._input[1:]
self.toktype = "int"
self.tokval = int(self._token)
elif self._input[:1] == "\"":
st = ""
self._input = self._input[1:]
while self._input[:1] != "\"":
st += self._input[:1]
self._input = self._input[1:]
self._input = self._input[1:]
self.toktype = "string"
self.tokval = st
elif self._input[:2] == ':=':
self._token = ':='
self._input = self._input[2:]
self.toktype = "op"
elif self._input[:2] == '/*':
while self._input[:2] != '*/':
self._input = self._input[1:]
self._input = self._input[2:]
return self.scan()
elif self._input[:1] == u"♥":
self._token = self._input[:1]
self._input = self._input[1:]
self.toktype = "op"
else:
self._token = self._input[0]
self._input = self._input[1:]
self.toktype = "op"
# print("scanned: '" + str(self._token) + "'")
def get_token(self):
"""
Return the current token as a string. Using the read-only
token property is preferred for readability.
"""
return self._token
token = property(get_token)
def grab(self):
"""
Return the current token as a string, and advance.
"""
t = self._token
self.scan()
return t
def expect(self, str):
"""
Expect a certain token to be in the input, and complain
if it is not.
"""
if self._token == str:
self.scan()
else:
self.error("expected " + str + ", found " + self._token)
def error(self, str):
"""
Log the given scan error.
"""
print("error: " + str)
self.scan()