git @ Cat's Eye Technologies SixtyPical / 007c81a
Split scanner off from parser module, into own module. Chris Pressey 5 years ago
2 changed file(s) with 75 addition(s) and 73 deletion(s). Raw diff Collapse all Expand all
00 # encoding: UTF-8
1
2 import re
31
42 from sixtypical.ast import Program, Defn, Routine, Block, Instr
53 from sixtypical.model import (
75 RoutineType, VectorType, ExecutableType,
86 LocationRef, ConstantRef
97 )
10
11
12 class Scanner(object):
13 def __init__(self, text):
14 self.text = text
15 self.token = None
16 self.type = None
17 self.scan()
18
19 def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
20 pattern = r'^(' + pattern + r')(.*?)$'
21 match = re.match(pattern, self.text, re.DOTALL)
22 if not match:
23 return False
24 else:
25 self.type = type
26 self.token = match.group(token_group)
27 self.text = match.group(rest_group)
28 return True
29
30 def scan(self):
31 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
32 while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
33 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
34 if not self.text:
35 self.token = None
36 self.type = 'EOF'
37 return
38 if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'):
39 return
40 if self.scan_pattern(r'\d+', 'integer literal'):
41 return
42 if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal',
43 token_group=2, rest_group=3):
44 # ecch
45 self.token = str(eval('0x' + self.token))
46 return
47 if self.scan_pattern(r'\"(.*?)\"', 'string literal',
48 token_group=2, rest_group=3):
49 return
50 if self.scan_pattern(r'\w+', 'identifier'):
51 return
52 if self.scan_pattern(r'.', 'unknown character'):
53 return
54 else:
55 raise AssertionError("this should never happen, self.text=(%s)" % self.text)
56
57 def expect(self, token):
58 if self.token == token:
59 self.scan()
60 else:
61 raise SyntaxError("Expected '%s', but found '%s'" %
62 (token, self.token))
63
64 def on(self, token):
65 return self.token == token
66
67 def on_type(self, type):
68 return self.type == type
69
70 def check_type(self, type):
71 if not self.type == type:
72 raise SyntaxError("Expected %s, but found %s ('%s')" %
73 (type, self.type, self.token))
74
75 def consume(self, token):
76 if self.token == token:
77 self.scan()
78 return True
79 else:
80 return False
8 from sixtypical.scanner import Scanner
819
8210
8311 class SymEntry(object):
0 # encoding: UTF-8
1
2 import re
3
4
5 class Scanner(object):
6 def __init__(self, text):
7 self.text = text
8 self.token = None
9 self.type = None
10 self.scan()
11
12 def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
13 pattern = r'^(' + pattern + r')(.*?)$'
14 match = re.match(pattern, self.text, re.DOTALL)
15 if not match:
16 return False
17 else:
18 self.type = type
19 self.token = match.group(token_group)
20 self.text = match.group(rest_group)
21 return True
22
23 def scan(self):
24 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
25 while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
26 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
27 if not self.text:
28 self.token = None
29 self.type = 'EOF'
30 return
31 if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'):
32 return
33 if self.scan_pattern(r'\d+', 'integer literal'):
34 return
35 if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal',
36 token_group=2, rest_group=3):
37 # ecch
38 self.token = str(eval('0x' + self.token))
39 return
40 if self.scan_pattern(r'\"(.*?)\"', 'string literal',
41 token_group=2, rest_group=3):
42 return
43 if self.scan_pattern(r'\w+', 'identifier'):
44 return
45 if self.scan_pattern(r'.', 'unknown character'):
46 return
47 else:
48 raise AssertionError("this should never happen, self.text=(%s)" % self.text)
49
50 def expect(self, token):
51 if self.token == token:
52 self.scan()
53 else:
54 raise SyntaxError("Expected '%s', but found '%s'" %
55 (token, self.token))
56
57 def on(self, token):
58 return self.token == token
59
60 def on_type(self, type):
61 return self.type == type
62
63 def check_type(self, type):
64 if not self.type == type:
65 raise SyntaxError("Expected %s, but found %s ('%s')" %
66 (type, self.type, self.token))
67
68 def consume(self, token):
69 if self.token == token:
70 self.scan()
71 return True
72 else:
73 return False