git @ Cat's Eye Technologies ALPACA / master src / alpaca / scanner.py
master

Tree @master (Download .tar.gz)

scanner.py @masterraw · history · blame

import re


class Scanner(object):
    def __init__(self, text):
        self.text = text
        self.token = None
        self.type = None
        self.scan()

    def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
        pattern = r'^(' + pattern + r')(.*?)$'
        match = re.match(pattern, self.text, re.DOTALL)
        if not match:
            return False
        else:
            self.type = type
            self.token = match.group(token_group)
            self.text = match.group(rest_group)
            #print self.type, self.token
            return True

    def scan(self):
        self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
        while self.text.startswith('/*'):
            self.scan_pattern(r'\/\*.*?\*\/[ \t\n\r]*', 'comment')
        if not self.text:
            self.token = None
            self.type = 'EOF'
            return
        if self.scan_pattern(r'\.|\;|\,|\(|\)|\{|\}|\=', 'punctuation'):
            return
        if self.scan_pattern(r'[<>^v]+', 'arrow chain'):
            return
        if self.scan_pattern(r'class|state|neighbourhood|is|to|when|me|in|not',
                             'keyword'):
            return
        if self.scan_pattern(r'and|or|xor', 'boolean operator'):
            return
        if self.scan_pattern(r'true|false|guess', 'boolean literal'):
            return
        if self.scan_pattern(r'\d+', 'integer literal'):
            return
        if self.scan_pattern(r'\"(.*?)\"', 'string literal',
                             token_group=2, rest_group=3):
            return
        if self.scan_pattern(r'[a-zA-Z][a-zA-Z0-9]*', 'identifier'):
            return
        if self.scan_pattern(r'.', 'unknown character'):
            return
        else:
            raise ValueError("this should never happen, "
                             "self.text=(%s)" % self.text)

    def scan_playfield(self):
        """Called when the token which introduces the playfield has
        already just been scanned.

        """
        self.scan_pattern(r'[ \t]*', 'whitespace')
        self.scan_pattern(r'[\n\r]', 'eol')
        elems = []
        y = 0
        while self.text:
            x = 0
            while self.scan_pattern(r'[^\n\r]', 'arbitrary character'):
                #print repr((x, y, self.token))
                elems.append((x, y, self.token))
                x += 1
            self.scan_pattern(r'[\n\r]', 'eol')
            y += 1
        return elems

    def expect(self, token):
        if self.token == token:
            self.scan()
        else:
            raise SyntaxError("Expected '%s', but found '%s'" %
                              (token, self.token))

    def expect_type(self, type):
        self.check_type(type)
        self.scan()

    def on(self, token):
        return self.token == token

    def on_type(self, type):
        return self.type == type

    def check_type(self, type):
        if not self.type == type:
            raise SyntaxError("Expected %s, but found %s ('%s')" %
                              (type, self.type, self.token))

    def consume(self, token):
        if self.token == token:
            self.scan()
            return True
        else:
            return False

    def consume_type(self, type):
        if self.on_type(type):
            token = self.token
            self.scan()
            return token
        else:
            return None