git @ Cat's Eye Technologies Castile / ded3519
Split scanner off into own module. Chris Pressey 3 years ago
2 changed file(s) with 147 addition(s) and 121 deletion(s). Raw diff Collapse all Expand all
00 import re
11
22 from castile.ast import AST
3
4
5 class CastileSyntaxError(ValueError):
6 pass
3 from castile.scanner import Scanner, CastileSyntaxError
74
85
96 class Parser(object):
107 """Parse a Castile program into an AST.
11
12 The parser includes the scanner as part of it. (Delegating to an external
13 scanner is rather verbose ("self.scanner.expect(...)"; inheriting from a
14 Scanner class, even if it's just a mixin, seems rather weird.)
158
169 The parser mainly just constructs the AST. It does few other analyses
1710 or transformations itself. However, there are a few:
2316
2417 """
2518 def __init__(self, text):
26 self.text = text
27 self.token = None
28 self.type = None
29 self.pos = 0
30 self.scan()
31 # for parser...
19 self.scanner = Scanner(text)
3220 self.locals = None
3321
34 # ### SCANNER ### #
35
36 def near_text(self, length=10):
37 return self.text[self.pos:self.pos + length]
38
39 def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
40 pattern = r'(' + pattern + r')'
41 regexp = re.compile(pattern, flags=re.DOTALL)
42 match = regexp.match(self.text, pos=self.pos)
43 if not match:
44 return False
45 else:
46 self.type = type
47 self.token = match.group(token_group)
48 self.pos += len(match.group(0))
49 return True
50
51 def scan(self):
52 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
53 while self.scan_pattern(r'\/\*.*?\*\/[ \t\n\r]*', 'comment'):
54 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
55 if self.pos >= len(self.text):
56 self.token = None
57 self.type = 'EOF'
58 return
59 if self.scan_pattern(r'->', 'arrow'):
60 return
61 if self.scan_pattern(r'>=|>|<=|<|==|!=', 'relational operator'):
62 return
63 if self.scan_pattern(r'\+|\-', 'additive operator'):
64 return
65 if self.scan_pattern(r'\*|\/|\|', 'multiplicative operator'):
66 return
67 if self.scan_pattern(r'\.|\;|\,|\(|\)|\{|\}|\=', 'punctuation'):
68 return
69 if self.scan_pattern(r'string|integer|boolean|function|void|union',
70 'type name'):
71 return
72 if self.scan_pattern(r'and|or', 'boolean operator'):
73 return
74 if self.scan_pattern(r'(if|else|while|make|struct|'
75 r'typecase|is|as|return|break|'
76 r'true|false|null)(?!\w)',
77 'keyword', token_group=2, rest_group=3):
78 return
79 if self.scan_pattern(r'\d+', 'integer literal'):
80 return
81 if self.scan_pattern(r'\"(.*?)\"', 'string literal',
82 token_group=2, rest_group=3):
83 return
84 if self.scan_pattern(r'[a-zA-Z_][a-zA-Z0-9_]*', 'identifier'):
85 return
86 if self.scan_pattern(r'.', 'unknown character'):
87 return
88 else:
89 raise ValueError("this should never happen, "
90 "self.text=(%s)" % self.text)
91
92 def expect(self, token):
93 if self.token == token:
94 self.scan()
95 else:
96 raise CastileSyntaxError(
97 "Expected '%s', but found '%s' (near '%s')" % (
98 token, self.token, self.near_text()
99 )
100 )
101
102 def expect_type(self, type):
103 self.check_type(type)
104 token = self.token
105 self.scan()
106 return token
107
108 def on(self, token):
109 return self.token == token
110
111 def on_any(self, tokens):
112 return self.token in tokens
113
114 def on_type(self, type):
115 return self.type == type
116
117 def check_type(self, type):
118 if not self.type == type:
119 raise CastileSyntaxError(
120 "Expected %s, but found %s ('%s') (near '%s')" % (
121 type, self.type, self.token, self.near_text()
122 )
123 )
124
125 def consume(self, token):
126 if self.token == token:
127 self.scan()
128 return True
129 else:
130 return False
131
132 def consume_type(self, type):
133 if self.on_type(type):
134 token = self.token
135 self.scan()
136 return token
137 else:
138 return None
139
140 # ### PARSER ### #
22 ### Delegate to scanner
23
24 def consume(self, *args, **kwargs):
25 return self.scanner.consume(*args, **kwargs)
26
27 def consume_type(self, *args, **kwargs):
28 return self.scanner.consume_type(*args, **kwargs)
29
30 def expect(self, *args, **kwargs):
31 return self.scanner.expect(*args, **kwargs)
32
33 def expect_type(self, *args, **kwargs):
34 return self.scanner.expect_type(*args, **kwargs)
35
36 def on(self, *args, **kwargs):
37 return self.scanner.on(*args, **kwargs)
38
39 def on_any(self, *args, **kwargs):
40 return self.scanner.on_any(*args, **kwargs)
41
42 def on_type(self, *args, **kwargs):
43 return self.scanner.on_type(*args, **kwargs)
44
45 ### Parser proper
14146
14247 def program(self):
14348 defns = []
0 import re
1
2 from castile.ast import AST
3
4
5 class CastileSyntaxError(ValueError):
6 pass
7
8
9 class Scanner(object):
10
11 def __init__(self, text):
12 self.text = text
13 self.token = None
14 self.type = None
15 self.pos = 0
16 self.scan()
17
18 def near_text(self, length=10):
19 return self.text[self.pos:self.pos + length]
20
21 def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
22 pattern = r'(' + pattern + r')'
23 regexp = re.compile(pattern, flags=re.DOTALL)
24 match = regexp.match(self.text, pos=self.pos)
25 if not match:
26 return False
27 else:
28 self.type = type
29 self.token = match.group(token_group)
30 self.pos += len(match.group(0))
31 return True
32
33 def scan(self):
34 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
35 while self.scan_pattern(r'\/\*.*?\*\/[ \t\n\r]*', 'comment'):
36 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
37 if self.pos >= len(self.text):
38 self.token = None
39 self.type = 'EOF'
40 return
41 if self.scan_pattern(r'->', 'arrow'):
42 return
43 if self.scan_pattern(r'>=|>|<=|<|==|!=', 'relational operator'):
44 return
45 if self.scan_pattern(r'\+|\-', 'additive operator'):
46 return
47 if self.scan_pattern(r'\*|\/|\|', 'multiplicative operator'):
48 return
49 if self.scan_pattern(r'\.|\;|\,|\(|\)|\{|\}|\=', 'punctuation'):
50 return
51 if self.scan_pattern(r'string|integer|boolean|function|void|union',
52 'type name'):
53 return
54 if self.scan_pattern(r'and|or', 'boolean operator'):
55 return
56 if self.scan_pattern(r'(if|else|while|make|struct|'
57 r'typecase|is|as|return|break|'
58 r'true|false|null)(?!\w)',
59 'keyword', token_group=2, rest_group=3):
60 return
61 if self.scan_pattern(r'\d+', 'integer literal'):
62 return
63 if self.scan_pattern(r'\"(.*?)\"', 'string literal',
64 token_group=2, rest_group=3):
65 return
66 if self.scan_pattern(r'[a-zA-Z_][a-zA-Z0-9_]*', 'identifier'):
67 return
68 if self.scan_pattern(r'.', 'unknown character'):
69 return
70 else:
71 raise ValueError("this should never happen, "
72 "self.text=(%s)" % self.text)
73
74 def expect(self, token):
75 if self.token == token:
76 self.scan()
77 else:
78 raise CastileSyntaxError(
79 "Expected '%s', but found '%s' (near '%s')" % (
80 token, self.token, self.near_text()
81 )
82 )
83
84 def expect_type(self, type):
85 self.check_type(type)
86 token = self.token
87 self.scan()
88 return token
89
90 def on(self, token):
91 return self.token == token
92
93 def on_any(self, tokens):
94 return self.token in tokens
95
96 def on_type(self, type):
97 return self.type == type
98
99 def check_type(self, type):
100 if not self.type == type:
101 raise CastileSyntaxError(
102 "Expected %s, but found %s ('%s') (near '%s')" % (
103 type, self.type, self.token, self.near_text()
104 )
105 )
106
107 def consume(self, token):
108 if self.token == token:
109 self.scan()
110 return True
111 else:
112 return False
113
114 def consume_type(self, type):
115 if self.on_type(type):
116 token = self.token
117 self.scan()
118 return token
119 else:
120 return None