0 | 0 |
import re
|
1 | 1 |
|
2 | 2 |
from castile.ast import AST
|
3 | |
|
4 | |
|
5 | |
class CastileSyntaxError(ValueError):
|
6 | |
pass
|
|
3 |
from castile.scanner import Scanner, CastileSyntaxError
|
7 | 4 |
|
8 | 5 |
|
9 | 6 |
class Parser(object):
|
10 | 7 |
"""Parse a Castile program into an AST.
|
11 | |
|
12 | |
The parser includes the scanner as part of it. (Delegating to an external
|
13 | |
scanner is rather verbose ("self.scanner.expect(...)"; inheriting from a
|
14 | |
Scanner class, even if it's just a mixin, seems rather weird.)
|
15 | 8 |
|
16 | 9 |
The parser mainly just constructs the AST. It does few other analyses
|
17 | 10 |
or transformations itself. However, there are a few:
|
|
23 | 16 |
|
24 | 17 |
"""
|
25 | 18 |
def __init__(self, text):
|
26 | |
self.text = text
|
27 | |
self.token = None
|
28 | |
self.type = None
|
29 | |
self.pos = 0
|
30 | |
self.scan()
|
31 | |
# for parser...
|
|
19 |
self.scanner = Scanner(text)
|
32 | 20 |
self.locals = None
|
33 | 21 |
|
34 | |
# ### SCANNER ### #
|
35 | |
|
36 | |
def near_text(self, length=10):
|
37 | |
return self.text[self.pos:self.pos + length]
|
38 | |
|
39 | |
def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
|
40 | |
pattern = r'(' + pattern + r')'
|
41 | |
regexp = re.compile(pattern, flags=re.DOTALL)
|
42 | |
match = regexp.match(self.text, pos=self.pos)
|
43 | |
if not match:
|
44 | |
return False
|
45 | |
else:
|
46 | |
self.type = type
|
47 | |
self.token = match.group(token_group)
|
48 | |
self.pos += len(match.group(0))
|
49 | |
return True
|
50 | |
|
51 | |
def scan(self):
|
52 | |
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
53 | |
while self.scan_pattern(r'\/\*.*?\*\/[ \t\n\r]*', 'comment'):
|
54 | |
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
55 | |
if self.pos >= len(self.text):
|
56 | |
self.token = None
|
57 | |
self.type = 'EOF'
|
58 | |
return
|
59 | |
if self.scan_pattern(r'->', 'arrow'):
|
60 | |
return
|
61 | |
if self.scan_pattern(r'>=|>|<=|<|==|!=', 'relational operator'):
|
62 | |
return
|
63 | |
if self.scan_pattern(r'\+|\-', 'additive operator'):
|
64 | |
return
|
65 | |
if self.scan_pattern(r'\*|\/|\|', 'multiplicative operator'):
|
66 | |
return
|
67 | |
if self.scan_pattern(r'\.|\;|\,|\(|\)|\{|\}|\=', 'punctuation'):
|
68 | |
return
|
69 | |
if self.scan_pattern(r'string|integer|boolean|function|void|union',
|
70 | |
'type name'):
|
71 | |
return
|
72 | |
if self.scan_pattern(r'and|or', 'boolean operator'):
|
73 | |
return
|
74 | |
if self.scan_pattern(r'(if|else|while|make|struct|'
|
75 | |
r'typecase|is|as|return|break|'
|
76 | |
r'true|false|null)(?!\w)',
|
77 | |
'keyword', token_group=2, rest_group=3):
|
78 | |
return
|
79 | |
if self.scan_pattern(r'\d+', 'integer literal'):
|
80 | |
return
|
81 | |
if self.scan_pattern(r'\"(.*?)\"', 'string literal',
|
82 | |
token_group=2, rest_group=3):
|
83 | |
return
|
84 | |
if self.scan_pattern(r'[a-zA-Z_][a-zA-Z0-9_]*', 'identifier'):
|
85 | |
return
|
86 | |
if self.scan_pattern(r'.', 'unknown character'):
|
87 | |
return
|
88 | |
else:
|
89 | |
raise ValueError("this should never happen, "
|
90 | |
"self.text=(%s)" % self.text)
|
91 | |
|
92 | |
def expect(self, token):
|
93 | |
if self.token == token:
|
94 | |
self.scan()
|
95 | |
else:
|
96 | |
raise CastileSyntaxError(
|
97 | |
"Expected '%s', but found '%s' (near '%s')" % (
|
98 | |
token, self.token, self.near_text()
|
99 | |
)
|
100 | |
)
|
101 | |
|
102 | |
def expect_type(self, type):
|
103 | |
self.check_type(type)
|
104 | |
token = self.token
|
105 | |
self.scan()
|
106 | |
return token
|
107 | |
|
108 | |
def on(self, token):
|
109 | |
return self.token == token
|
110 | |
|
111 | |
def on_any(self, tokens):
|
112 | |
return self.token in tokens
|
113 | |
|
114 | |
def on_type(self, type):
|
115 | |
return self.type == type
|
116 | |
|
117 | |
def check_type(self, type):
|
118 | |
if not self.type == type:
|
119 | |
raise CastileSyntaxError(
|
120 | |
"Expected %s, but found %s ('%s') (near '%s')" % (
|
121 | |
type, self.type, self.token, self.near_text()
|
122 | |
)
|
123 | |
)
|
124 | |
|
125 | |
def consume(self, token):
|
126 | |
if self.token == token:
|
127 | |
self.scan()
|
128 | |
return True
|
129 | |
else:
|
130 | |
return False
|
131 | |
|
132 | |
def consume_type(self, type):
|
133 | |
if self.on_type(type):
|
134 | |
token = self.token
|
135 | |
self.scan()
|
136 | |
return token
|
137 | |
else:
|
138 | |
return None
|
139 | |
|
140 | |
# ### PARSER ### #
|
|
22 |
### Delegate to scanner
|
|
23 |
|
|
24 |
def consume(self, *args, **kwargs):
|
|
25 |
return self.scanner.consume(*args, **kwargs)
|
|
26 |
|
|
27 |
def consume_type(self, *args, **kwargs):
|
|
28 |
return self.scanner.consume_type(*args, **kwargs)
|
|
29 |
|
|
30 |
def expect(self, *args, **kwargs):
|
|
31 |
return self.scanner.expect(*args, **kwargs)
|
|
32 |
|
|
33 |
def expect_type(self, *args, **kwargs):
|
|
34 |
return self.scanner.expect_type(*args, **kwargs)
|
|
35 |
|
|
36 |
def on(self, *args, **kwargs):
|
|
37 |
return self.scanner.on(*args, **kwargs)
|
|
38 |
|
|
39 |
def on_any(self, *args, **kwargs):
|
|
40 |
return self.scanner.on_any(*args, **kwargs)
|
|
41 |
|
|
42 |
def on_type(self, *args, **kwargs):
|
|
43 |
return self.scanner.on_type(*args, **kwargs)
|
|
44 |
|
|
45 |
### Parser proper
|
141 | 46 |
|
142 | 47 |
def program(self):
|
143 | 48 |
defns = []
|