git @ Cat's Eye Technologies Castile / d756a32
Merge pull request #3 from catseye/develop-0.5 Develop 0.5 Chris Pressey authored 3 years ago GitHub committed 3 years ago
24 changed file(s) with 951 addition(s) and 462 deletion(s). Raw diff Collapse all Expand all
0 History of Castile
1 ==================
2
3 Castile 0.5
4 -----------
5
6 ### Distribution
7
8 * Added HISTORY.md file.
9
10 ### Language
11
12 * Scoped structs can be declared with the `for (...)` clause
13 after the struct. A value of a scope struct can only be
14 `make`d, and the fields of such a value can only be accessed,
15 lexically inside one of the definitions named in the `for`.
16 * Structs cannot be tested for equality with the `==` and `!=`
17 operators. Instead the programmer should write a function
18 that compares structs for equality, if desired.
19 * Values of union type can be tested for equality, but only if
20 none of the types involved in the union are structs.
21
22 ### Implementation
23
24 * Lexical scanner has been split off from parser code, into
25 its own module. A performance bug (using O(n^2) space
26 instead of O(n)) during scanning has also been fixed.
27 * Line numbers are recorded in the AST when parsing, and
28 reported on type errors when type errors occur.
29 * Requesting the AST be dumped, will also dump the AST with
30 type assignments, if an error occurs during type checking.
31 * Established an abstract base class for compiler backends.
32 * Fixed a bug where tagged values were being tagged again
33 during a cast from a union type to another union type.
34 * ArgumentParser is used instead of OptionParser to parse
35 command-line arguments. `--version` added, `--test` (and
36 remaining doctests in source modules) removed.
37
38 Castile 0.4
39 -----------
40
41 ### Distribution
42
43 * Re-focused project: Castile is a simple imperative language
44 with union types.
45 * Released under a 3-clause BSD license.
46
47 ### Language
48
49 * `struct`s cannot be compared for order, it is a static error.
50 * A union type is allowed to be promoted to a bigger union type,
51 or to itself.
52
53 ### Implementation
54
55 * Completed the C-generating backend of the compiler: it passes all tests now.
56 * Implemented `str` builtin, equality testing of `struct`s in all backends.
57 * Improved pretty-printing of code in C and Ruby backends.
58 * Implemented `ne` in stackmac implementation.
59
60 Castile 0.3 revision 2021.0625
61 ------------------------------
62
63 * Updated implementation to run under both Python 2 and Python 3.
64 * Refactored test suite, making it more idiomatic Falderal.
65
66 Castile 0.3 revision 2016.0331
67 ------------------------------
68
69 * Fixed generated Ruby code that worked in Ruby 1.8 but fails in Ruby 1.9.
70
71 Castile 0.3 revision 2015.0101
72 ------------------------------
73
74 * Stopped using deprecated Falderal variable names in test suite.
75
76 Castile 0.3
77 -----------
78
79 * Treatment of local variables became more Python-like.
80 * Beginnings of a C backend in compiler.
81
82 Castile 0.2
83 -----------
84
85 * Heavy development of the language, with many changes.
86 * Added JavaScript, Ruby, and stackmac (stack-machine) backends to compiler.
87
88 Castile 0.1
89 -----------
90
91 Initial release of Castile, an unremarkable language with an unremarkable
92 compiler/interpreter in Python.
00 Castile
11 =======
22
3 Version 0.4 | _Entry_ [@ catseye.tc](https://catseye.tc/node/Castile)
3 Version 0.5 | _Entry_ [@ catseye.tc](https://catseye.tc/node/Castile)
44 | _See also:_ [Eightebed](https://github.com/catseye/Eightebed#readme)
55 ∘ [Dieter](https://github.com/catseye/Dieter#readme)
66
11 ----
22
33 ### Implementation ###
4
5 Line number reporting for Context errors and Syntax errors.
46
57 Name mangling for compilers (prepend with `_` most likely.)
68
1012
1113 Implement `int`, `chr`, `ord` for Ruby, JavaScript, stackmac, C.
1214
13 Struct equality is not properly deep in JavaScript or C.
14
1515 Better indentation in the JavaScript backend.
1616
1717 TaggedValue -> just a tuple.
2020 and void types in unions of (void, X) should only be one value.
2121 (structs are still boxed though)
2222
23 AST nodes should have source line numbers, it would be really nice.
24
2523 Implement garbage collection of some sort in the C backend. Either that
2624 or implement some kind of resource-awareness in the language itself.
2725
2826 Other backends (Python? Java? CIL? Scheme?)
2927
28 Test framework: collect the backend-independent tests into a single
29 file, and only test it once. Run all the *other* tests on every
30 backend.
31
3032 ### Design ###
3133
3234 Don't output final value. Command-line arguments passed to `main`. (`sysmain`?)
3335
34 Convenience:
36 Automatic type promotion (upcasting), e.g. using an integer where
37 integer|string is expected (as e.g. a function argument) is fine,
38 an `as integer|string` should be automatically inserted.
3539
36 * Should we have automatic promotion (value tagging?)
37 Since it causes an operation, I think it should be explicit, but the
38 explicit syntax could be more lightweight.
39 * Lua-esque `:` operator: `a:b(c)` -> `a.b(a, c)`
40 Lua-esque `:` operator: `a:b(c)` -> `a.b(a, c)`
4041
4142 Type promotion with higher precedence? So that it can be used at toplevel.
4243
44
55 Program ::= {Defn [";"]}.
66 Defn ::= "fun" ident "(" [Arg {"," Arg}] ")" Body
7 | "struct" ident "{" {ident ":" TExpr [";"]} "}"
7 | "struct" ident "{" {ident ":" TExpr [";"]} "}" ["for" "(" [ident {"," ident}] ")"]
88 | ident (":" TExpr0 | "=" Literal).
99 Arg ::= ident [":" TExpr1].
1010 Body ::= "{" {Stmt [";"]} "}".
0 /*
1 * Implementation of an associative map in Castile.
2 *
3 * The map is implemented as an association list,
4 * but this fact is hidden from clients, as only
5 * the operations have access to the internals
6 * of the struct.
7 */
8
9 struct assoc {
10 key: string;
11 value: string;
12 next: assoc|void;
13 } for (update, lookup, remove, render)
14
15 fun empty() {
16 return null as assoc|void
17 }
18
19 fun update(k: string, v: string, a: assoc|void) {
20 make assoc(key:k, value:v, next:a as assoc|void)
21 }
22
23 lookup : assoc|void, string -> string|void
24 fun lookup(a: assoc|void, k: string) {
25 typecase a is void {
26 return null as string|void
27 }
28 typecase a is assoc {
29 if a.key == k {
30 return a.value as string|void
31 }
32 return lookup(a.next, k)
33 }
34 }
35
36 remove : assoc|void, string -> assoc|void
37 fun remove(a: assoc|void, k: string) {
38 typecase a is void {
39 return a as assoc|void
40 }
41 typecase a is assoc {
42 if a.key == k {
43 return remove(a.next, k)
44 }
45 return make assoc(key:a.key, value:a.value, next:remove(a.next, k)) as assoc|void
46 }
47 }
48
49 render : assoc|void -> string
50 fun render(a: assoc|void) {
51 typecase a is void {
52 return ""
53 }
54 typecase a is assoc {
55 return concat(a.value, concat(",", render(a.next)))
56 }
57 }
58
59 fun main() {
60 a = update("3", "third", empty());
61 a = update("2", "second", a as assoc|void);
62 a = update("1", "first", a as assoc|void);
63 print(render(a as assoc|void))
64 b = remove((a as assoc|void), "2");
65 print(render(b))
66 r = lookup(b, "2");
67 typecase r is void { print("NOT FOUND"); }
68 typecase r is string { print(r); }
69 }
0 struct list {
1 value: string;
2 next: list|void;
3 }
4
5 fun empty() {
6 return null as list|void
7 }
8
9 fun cons(v: string, l: list|void) {
10 make list(value:v, next:l) as list|void
11 }
12
13 equal_list : list|void, list|void -> boolean
14 fun equal_list(a: list|void, b: list|void) {
15 typecase a is void {
16 typecase b is void {
17 return true
18 }
19 }
20 typecase a is list {
21 typecase b is list {
22 return a.value == b.value and equal_list(a.next, b.next)
23 }
24 }
25 return false
26 }
27
28 length : list|void -> integer
29 fun length(l: list|void) {
30 typecase l is void { return 0 }
31 typecase l is list { return 1 + length(l.next) }
32 }
33
34 main = fun() {
35 l1 = cons("first", cons("second", cons("third", empty())));
36 l2 = cons("first", cons("second", cons("third", empty())));
37 l3 = cons("first", cons("second", empty()));
38
39 print(str(length(l1 as list|void)));
40
41 if (equal_list(l1, l2) and not equal_list(l2, l3)) {
42 print("Yep, story checks out")
43 }
44 }
0 struct person { name: string };
1 fun foo(a, b: integer|string) {
2 r = a;
3 typecase b is integer {
4 r = r + b;
5 };
6 typecase b is person {
7 r = r + len(b);
8 };
9 r
10 }
11 main = fun() {
12 a = 0;
13 a = foo(a, 333 as integer|string);
14 a = foo(a, "hiya" as integer|string);
15 a /* should output 337 */
16 }
88 r
99 }
1010 main = fun() {
11 a = 0;
1112 a = foo(a, 333 as integer|string);
1213 a = foo(a, "hiya" as integer|string);
1314 a /* should output 337 */
00 class AST(object):
1 def __init__(self, tag, children=None, value=None, type=None, aux=None):
1 def __init__(self, tag, children=None, value=None, type=None, aux=None, line=None):
22 self.tag = tag
3 self.line = line
34 self.value = value
45 # typechecker may populate this. parser will not.
56 self.type = type
0 """Abstract base class for Castile compiler backends,
1 especially source-to-source."""
2
3 class BaseCompiler(object):
4 def __init__(self, out):
5 self.out = out
6 self.indent = 0
7
8 def commas(self, asts, sep=','):
9 if asts:
10 for child in asts[:-1]:
11 self.compile(child)
12 self.out.write(sep)
13 self.compile(asts[-1])
14
15 def write(self, x):
16 self.out.write(x)
17
18 def write_indent(self, x):
19 self.out.write(' ' * self.indent)
20 self.out.write(x)
0 from castile.backends.base import BaseCompiler
01 from castile.transformer import VarDeclTypeAssigner
12 from castile.types import (
23 Integer, String, Void, Boolean, Function, Union, Struct
89 }
910
1011 PRELUDE = r"""
11 /* AUTOMATICALLY GENERATED -- EDIT AT OWN RISK */
12 /* AUTOMATICALLY GENERATED -- EDIT AT YOUR OWN RISK */
1213
1314 #include <stdio.h>
1415 #include <stdlib.h>
7273 return !strcmp(tag, tv->tag);
7374 }
7475
76 int equal_tagged_value(struct tagged_value *tv1, struct tagged_value *tv2)
77 {
78 return is_tag(tv1->tag, tv2) && tv1->value == tv2->value;
79 }
80
7581 """
7682
7783
78 class Compiler(object):
84 class Compiler(BaseCompiler):
7985 def __init__(self, out):
80 self.out = out
86 super(Compiler, self).__init__(out)
8187 self.main_type = None
82 self.indent = 0
8388 self.typecasing = set()
84
85 def commas(self, asts, sep=','):
86 if asts:
87 for child in asts[:-1]:
88 self.compile(child)
89 self.out.write(sep)
90 self.compile(asts[-1])
91
92 def write(self, x):
93 self.out.write(x)
94
95 def write_indent(self, x):
96 self.out.write(' ' * self.indent)
97 self.out.write(x)
9889
9990 # as used in local variable declarations
10091 def c_type(self, type):
177168 elif ast.tag == 'Forward':
178169 self.write_indent('extern %s;\n' % self.c_decl(ast.children[0].type, ast.value))
179170 elif ast.tag == 'StructDefn':
171 field_defns = ast.children[0].children
180172 self.write_indent('struct %s {\n' % ast.value)
181173 self.indent += 1
182 for child in ast.children:
174 for child in field_defns:
175 assert child.tag == 'FieldDefn', child.tag
183176 self.compile(child)
184177 self.indent -= 1
185178 self.write_indent('};\n\n')
186179 self.write_indent('struct %s * make_%s(' % (ast.value, ast.value))
187180
188 if ast.children:
189 for child in ast.children[:-1]:
190 assert child.tag == 'FieldDefn'
181 if field_defns:
182 for child in field_defns[:-1]:
183 assert child.tag == 'FieldDefn', child.tag
191184 self.write('%s, ' % self.c_decl(child.children[0].type, child.value))
192 child = ast.children[-1]
193 assert child.tag == 'FieldDefn'
185 child = field_defns[-1]
186 assert child.tag == 'FieldDefn', child.tag
194187 self.write('%s' % self.c_decl(child.children[0].type, child.value))
195188
196189 self.write(') {\n')
197190 self.indent += 1
198191 self.write_indent('struct %s *x = malloc(sizeof(struct %s));\n' % (ast.value, ast.value))
199192
200 for child in ast.children:
201 assert child.tag == 'FieldDefn'
193 for child in field_defns:
194 assert child.tag == 'FieldDefn', child.tag
202195 self.write_indent('x->%s = %s;\n' % (child.value, child.value))
203196
204197 self.write_indent('return x;\n')
205 self.indent -= 1
206 self.write_indent('}\n\n')
207
208 self.write_indent('int equal_%s(struct %s * a, struct %s * b) {\n' % (ast.value, ast.value, ast.value))
209
210 self.indent += 1
211 for child in ast.children:
212 assert child.tag == 'FieldDefn'
213 # TODO does not handle structs within structs
214 self.write_indent('if (a->%s != b->%s) return 0;\n' % (child.value, child.value))
215
216 self.write_indent('return 1;\n')
217198 self.indent -= 1
218199 self.write_indent('}\n\n')
219200
256237 self.indent -= 1
257238 elif ast.tag == 'Op':
258239 if ast.value == '==' and isinstance(ast.children[0].type, Struct):
259 self.write('equal_%s(' % ast.children[0].type.name)
240 raise NotImplementedError('structs cannot be compared for equality')
241 elif ast.value == '==' and isinstance(ast.children[0].type, Union):
242 self.write('equal_tagged_value(')
260243 self.compile(ast.children[0])
261244 self.write(', ')
262245 self.compile(ast.children[1])
342325 self.compile(ast.children[0])
343326 self.write('->%s' % ast.value)
344327 elif ast.tag == 'TypeCast':
345 self.write('tag("%s",(void *)' % str(ast.children[0].type))
346 self.compile(ast.children[0])
347 self.write(')')
328 # If the LHS is not already a union type, promote it to a tagged value.
329 if isinstance(ast.children[0].type, Union):
330 self.compile(ast.children[0])
331 else:
332 self.write('tag("%s",(void *)' % str(ast.children[0].type))
333 self.compile(ast.children[0])
334 self.write(')')
348335 elif ast.tag == 'TypeCase':
349336 self.write_indent('if (is_tag("%s",' % str(ast.children[1].type))
350337 self.compile(ast.children[0])
0 from castile.types import Struct
0 from castile.backends.base import BaseCompiler
1 from castile.types import Struct, Union
12
23 OPS = {
34 'and': '&&',
56 '==': '===',
67 }
78
8
9 class Compiler(object):
10 def __init__(self, out):
11 self.out = out
12
13 def commas(self, asts, sep=','):
14 if asts:
15 for child in asts[:-1]:
16 self.compile(child)
17 self.out.write(sep)
18 self.compile(asts[-1])
19
20 def compile(self, ast):
21 if ast.tag == 'Program':
22 self.out.write("""\
23 /* AUTOMATICALLY GENERATED -- EDIT AT OWN RISK */
9 PRELUDE = r"""
10 /* AUTOMATICALLY GENERATED -- EDIT AT YOUR OWN RISK */
2411
2512 /*
2613 var stdin = process.openStdin();
5542 }
5643 };
5744
58 """)
59 for child in ast.children:
60 self.compile(child)
61 self.out.write("""\
45 var equal_tagged_value = function(tv1, tv2)
46 {
47 return (tv1.tag === tv2.tag) && (tv1.value === tv2.value);
48 }
49 """
50
51 POSTLUDE = """\
6252
6353 var result = main();
6454 if (result !== undefined && result !== null)
6555 print(repr(result));
66 """)
56 """
57
58
59 class Compiler(BaseCompiler):
60
61 def compile(self, ast):
62 if ast.tag == 'Program':
63 self.write(PRELUDE)
64 for child in ast.children:
65 self.compile(child)
66 self.write(POSTLUDE)
6767 elif ast.tag == 'Defn':
68 self.out.write('var %s = ' % ast.value)
68 self.write('var %s = ' % ast.value)
6969 self.compile(ast.children[0])
70 self.out.write(';\n')
70 self.write(';\n')
7171 elif ast.tag == 'Forward':
7272 pass
7373 elif ast.tag == 'StructDefn':
74 self.out.write('function equal_%s(a, b) {\n' % ast.value)
75 for child in ast.children:
76 assert child.tag == 'FieldDefn'
77 # TODO does not handle structs within structs
78 self.out.write('if (a.%s !== b.%s) return false;\n' % (child.value, child.value))
79 self.out.write('return true;\n')
80 self.out.write('}\n\n')
74 pass
8175 elif ast.tag == 'FunLit':
82 self.out.write('function(')
76 self.write('function(')
8377 self.compile(ast.children[0])
84 self.out.write(')\n')
78 self.write(')\n')
8579 self.compile(ast.children[1])
8680 elif ast.tag == 'Args':
8781 self.commas(ast.children)
8882 elif ast.tag == 'Arg':
89 self.out.write(ast.value)
83 self.write(ast.value)
9084 elif ast.tag == 'Body':
91 self.out.write('{')
85 self.write('{')
9286 self.compile(ast.children[0])
9387 assert ast.children[1].tag == 'Block'
9488 block = ast.children[1]
9589 for child in block.children:
9690 self.compile(child)
97 self.out.write(';\n')
98 self.out.write('}')
91 self.write(';\n')
92 self.write('}')
9993 elif ast.tag == 'VarDecls':
10094 for child in ast.children:
10195 self.compile(child)
10296 elif ast.tag == 'VarDecl':
103 self.out.write('var %s;\n' % ast.value)
97 self.write('var %s;\n' % ast.value)
10498 elif ast.tag == 'Block':
105 self.out.write('{')
99 self.write('{')
106100 for child in ast.children:
107101 self.compile(child)
108 self.out.write(';\n')
109 self.out.write('}')
102 self.write(';\n')
103 self.write('}')
110104 elif ast.tag == 'While':
111 self.out.write('while (')
105 self.write('while (')
112106 self.compile(ast.children[0])
113 self.out.write(')')
107 self.write(')')
114108 self.compile(ast.children[1])
115109 elif ast.tag == 'Op':
116110 if ast.value == '==' and isinstance(ast.children[0].type, Struct):
117 self.out.write('equal_%s(' % ast.children[0].type.name)
111 raise NotImplementedError('structs cannot be compared for equality')
112 elif ast.value == '==' and isinstance(ast.children[0].type, Union):
113 self.write('equal_tagged_value(')
118114 self.compile(ast.children[0])
119 self.out.write(', ')
115 self.write(', ')
120116 self.compile(ast.children[1])
121 self.out.write(')')
117 self.write(')')
122118 else:
123 self.out.write('(')
119 self.write('(')
124120 self.compile(ast.children[0])
125 self.out.write(' %s ' % OPS.get(ast.value, ast.value))
121 self.write(' %s ' % OPS.get(ast.value, ast.value))
126122 self.compile(ast.children[1])
127 self.out.write(')')
123 self.write(')')
128124 elif ast.tag == 'VarRef':
129 self.out.write(ast.value)
125 self.write(ast.value)
130126 elif ast.tag == 'FunCall':
131127 self.compile(ast.children[0])
132 self.out.write('(')
128 self.write('(')
133129 self.commas(ast.children[1:])
134 self.out.write(')')
130 self.write(')')
135131 elif ast.tag == 'If':
136 self.out.write('if(')
132 self.write('if(')
137133 self.compile(ast.children[0])
138 self.out.write(')')
134 self.write(')')
139135 if len(ast.children) == 3: # if-else
140136 self.compile(ast.children[1])
141 self.out.write(' else ')
137 self.write(' else ')
142138 self.compile(ast.children[2])
143139 else: # just-if
144140 self.compile(ast.children[1])
145141 elif ast.tag == 'Return':
146 self.out.write('return ')
142 self.write('return ')
147143 self.compile(ast.children[0])
148144 elif ast.tag == 'Break':
149 self.out.write('break')
145 self.write('break')
150146 elif ast.tag == 'Not':
151 self.out.write('!(')
147 self.write('!(')
152148 self.compile(ast.children[0])
153 self.out.write(')')
149 self.write(')')
154150 elif ast.tag == 'None':
155 self.out.write('null')
151 self.write('null')
156152 elif ast.tag == 'IntLit':
157 self.out.write(str(ast.value))
153 self.write(str(ast.value))
158154 elif ast.tag == 'StrLit':
159 self.out.write("'%s'" % ast.value)
155 self.write("'%s'" % ast.value)
160156 elif ast.tag == 'BoolLit':
161157 if ast.value:
162 self.out.write("true")
158 self.write("true")
163159 else:
164 self.out.write("false")
160 self.write("false")
165161 elif ast.tag == 'Assignment':
166162 self.compile(ast.children[0])
167 self.out.write(' = ')
163 self.write(' = ')
168164 self.compile(ast.children[1])
169165 elif ast.tag == 'Make':
170 self.out.write('{')
166 self.write('{')
171167 self.commas(ast.children[1:])
172 self.out.write('}')
168 self.write('}')
173169 elif ast.tag == 'FieldInit':
174 self.out.write("'%s':" % ast.value)
170 self.write("'%s':" % ast.value)
175171 self.compile(ast.children[0])
176172 elif ast.tag == 'Index':
177173 self.compile(ast.children[0])
178 self.out.write('.%s' % ast.value)
174 self.write('.%s' % ast.value)
179175 elif ast.tag == 'TypeCast':
180 self.out.write("['%s'," % str(ast.children[0].type))
176 # If the LHS is not already a union type, promote it to a tagged value.
177 if isinstance(ast.children[0].type, Union):
178 self.compile(ast.children[0])
179 else:
180 self.write("['%s'," % str(ast.children[0].type))
181 self.compile(ast.children[0])
182 self.write(']')
183 elif ast.tag == 'TypeCase':
184 self.write('if (')
181185 self.compile(ast.children[0])
182 self.out.write(']')
183 elif ast.tag == 'TypeCase':
184 self.out.write('if (')
186 self.write("[0] == '%s')" % str(ast.children[1].type))
187 self.write('{ var save=')
185188 self.compile(ast.children[0])
186 self.out.write("[0] == '%s')" % str(ast.children[1].type))
187 self.out.write('{ var save=')
189 self.write('; ')
188190 self.compile(ast.children[0])
189 self.out.write('; ')
191 self.write('=')
190192 self.compile(ast.children[0])
191 self.out.write('=')
192 self.compile(ast.children[0])
193 self.out.write('[1]; ')
193 self.write('[1]; ')
194194 self.compile(ast.children[2])
195195 self.compile(ast.children[0])
196 self.out.write(' =save; }')
196 self.write(' =save; }')
197197 else:
198198 raise NotImplementedError(repr(ast))
0 from castile.backends.base import BaseCompiler
1 from castile.types import Union
2
3
04 OPS = {
15 }
26
37 PRELUDE = """\
4 # AUTOMATICALLY GENERATED -- EDIT AT OWN RISK
8 # AUTOMATICALLY GENERATED -- EDIT AT YOUR OWN RISK
59
610 input = lambda { |s|
711 print(s)
6367 """
6468
6569
66 class Compiler(object):
67 def __init__(self, out):
68 self.out = out
69 self.indent = 0
70
71 def commas(self, asts, sep=','):
72 if asts:
73 for child in asts[:-1]:
74 self.compile(child)
75 self.out.write(sep)
76 self.compile(asts[-1])
77
78 def write(self, x):
79 self.out.write(x)
80
81 def write_indent(self, x):
82 self.out.write(' ' * self.indent)
83 self.out.write(x)
84
70 class Compiler(BaseCompiler):
8571 def mangle(self, ident):
8672 if ident.startswith('next'):
8773 return '{}_'.format(ident)
200186 self.compile(ast.children[0])
201187 self.write('["%s"]' % ast.value)
202188 elif ast.tag == 'TypeCast':
203 self.write("['%s'," % str(ast.children[0].type))
204 self.compile(ast.children[0])
205 self.write(']')
189 # If the LHS is not already a union type, promote it to a tagged value.
190 if isinstance(ast.children[0].type, Union):
191 self.compile(ast.children[0])
192 else:
193 self.write("['%s'," % str(ast.children[0].type))
194 self.compile(ast.children[0])
195 self.write(']')
206196 elif ast.tag == 'TypeCase':
207197 self.write_indent('if (')
208198 self.compile(ast.children[0])
209199 self.write("[0] == '%s')" % str(ast.children[1].type))
210 self.write('then save=')
200 self.write(' then\n')
201 self.indent += 1
202 self.write_indent('save=')
211203 self.compile(ast.children[0])
212204 self.write('\n')
205 self.write_indent('')
213206 self.compile(ast.children[0])
214207 self.write('=')
215208 self.compile(ast.children[0])
216209 self.write('[1]\n')
217210 self.compile(ast.children[2])
218 self.compile(ast.children[0])
219 self.write(' = save end')
211 self.write_indent('')
212 self.compile(ast.children[0])
213 self.write(' = save\n')
214 self.indent -= 1
215 self.write_indent('end')
220216 else:
221217 raise NotImplementedError(repr(ast))
8888 def compile(self, ast):
8989 if ast.tag == 'Program':
9090 self.out.write("""\
91 ; AUTOMATICALLY GENERATED -- EDIT AT OWN RISK
91 ; AUTOMATICALLY GENERATED -- EDIT AT YOUR OWN RISK
9292
9393 """)
9494 for child in ast.children:
237237 elif ast.tag == 'TypeCast':
238238 self.compile(ast.children[0])
239239 t = str(ast.children[0].type)
240 self.out.write('; tag with "%s"\n' % t)
241 if self.size_of(ast.children[0].type) == 0:
242 # special case. there is nothing on the stack
243 self.out.write('push 0\n')
244 tag = self.get_tag(t)
245 self.out.write('tag %d\n' % tag)
240 # If the LHS is not already a union type, promote it to a tagged value.
241 if not isinstance(ast.children[0].type, Union):
242 self.out.write('; tag with "%s"\n' % t)
243 if self.size_of(ast.children[0].type) == 0:
244 # special case. there is nothing on the stack
245 self.out.write('push 0\n')
246 tag = self.get_tag(t)
247 self.out.write('tag %d\n' % tag)
246248 elif ast.tag == 'TypeCase':
247249 end_typecase = self.get_label('end_typecase')
248250 self.compile(ast.children[0])
99
1010 def __repr__(self):
1111 return '(%r, %r)' % (self.tag, self.value)
12
13 def __eq__(self, other):
14 return self.tag == other.tag and self.value == other.value
1215
1316
1417 def builtin_len(s):
55
66
77 class CastileTypeError(ValueError):
8 pass
8 def __init__(self, ast, message, *args, **kwargs):
9 message = 'line {}: {}'.format(ast.line, message)
10 super(CastileTypeError, self).__init__(message, *args, **kwargs)
911
1012
1113 class StructDefinition(object):
12 def __init__(self, name, field_names, content_types):
14 def __init__(self, name, field_names, content_types, scope_idents):
1315 self.name = name
1416 self.field_names = field_names # dict of name -> position
1517 self.content_types = content_types # list of types in order
18 self.scope_idents = scope_idents # list of identifiers, or None
1619
1720 def field_names_in_order(self):
1821 m = {}
3235 self.context = ScopedContext(global_context, level='global')
3336 self.toplevel_context = ScopedContext({}, self.context, level='toplevel')
3437 self.context = self.toplevel_context
38 self.current_defn = None
3539
3640 self.forwards = {}
3741 self.structs = {} # struct name -> StructDefinition
4650 print('%s: %s' % (name, type))
4751 return type
4852
49 def assert_eq(self, t1, t2):
53 def assert_eq(self, ast, t1, t2):
5054 if t1 == t2:
5155 return
52 raise CastileTypeError("type mismatch: %s != %s" % (t1, t2))
56 raise CastileTypeError(ast, "type mismatch: %s != %s" % (t1, t2))
5357
5458 def collect_structs(self, ast):
5559 for child in ast.children:
5963 def collect_struct(self, ast):
6064 name = ast.value
6165 if name in self.structs:
62 raise CastileTypeError('duplicate struct %s' % name)
66 raise CastileTypeError(ast, 'duplicate struct %s' % name)
6367 struct_fields = {}
64 te = []
68 type_exprs = []
6569 i = 0
66 for child in ast.children:
67 assert child.tag == 'FieldDefn'
70 field_defns = ast.children[0].children
71 scope_idents = None
72 if len(ast.children) > 1:
73 scope_idents = [a.value for a in ast.children[1].children]
74 for child in field_defns:
75 assert child.tag == 'FieldDefn', child.tag
6876 field_name = child.value
6977 if field_name in struct_fields:
70 raise CastileTypeError('already-defined field %s' % field_name)
78 raise CastileTypeError(child, 'already-defined field %s' % field_name)
7179 struct_fields[field_name] = i
7280 i += 1
73 te.append(self.type_of(child.children[0]))
74 self.structs[name] = StructDefinition(ast.value, struct_fields, te)
81 type_exprs.append(self.type_of(child.children[0]))
82 self.structs[name] = StructDefinition(ast.value, struct_fields, type_exprs, scope_idents)
7583
7684 def resolve_structs(self, ast):
7785 if isinstance(ast.type, Struct):
7886 if ast.type.name not in self.structs:
79 raise CastileTypeError('undefined struct %s' % ast.type.name)
87 raise CastileTypeError(ast, 'undefined struct %s' % ast.type.name)
8088 ast.type.defn = self.structs[ast.type.name]
8189 for child in ast.children:
8290 self.resolve_structs(child)
8593 def type_of(self, ast):
8694 if ast.tag == 'Op':
8795 if ast.value in ('and', 'or'):
88 self.assert_eq(self.type_of(ast.children[0]), Boolean())
89 self.assert_eq(self.type_of(ast.children[1]), Boolean())
96 self.assert_eq(ast, self.type_of(ast.children[0]), Boolean())
97 self.assert_eq(ast, self.type_of(ast.children[1]), Boolean())
9098 ast.type = Boolean()
9199 elif ast.value in ('+', '-', '*', '/'):
92100 type1 = self.type_of(ast.children[0])
93101 type2 = self.type_of(ast.children[1])
94 self.assert_eq(type1, type2)
95 self.assert_eq(type1, Integer())
102 self.assert_eq(ast, type1, type2)
103 self.assert_eq(ast, type1, Integer())
96104 ast.type = Integer()
97105 elif ast.value in ('==', '!=', '>', '>=', '<', '<='):
98106 type1 = self.type_of(ast.children[0])
99107 type2 = self.type_of(ast.children[1])
100 self.assert_eq(type1, type2)
101 if ast.value in ('>', '>=', '<', '<=') and isinstance(type1, Struct):
102 raise CastileTypeError("structs cannot be compared for order")
108 self.assert_eq(ast, type1, type2)
109 if isinstance(type1, Struct):
110 raise CastileTypeError(ast, "structs cannot be compared")
111 if isinstance(type1, Union) and type1.contains_instance_of(Struct):
112 raise CastileTypeError(ast, "unions containing structs cannot be compared")
103113 ast.type = Boolean()
104114 elif ast.tag == 'Not':
105115 type1 = self.type_of(ast.children[0])
106 self.assert_eq(type1, Boolean())
116 self.assert_eq(ast, type1, Boolean())
107117 ast.type = Boolean()
108118 elif ast.tag == 'IntLit':
109119 ast.type = Integer()
113123 ast.type = Boolean()
114124 elif ast.tag == 'FunLit':
115125 save_context = self.context
116 self.context = ScopedContext({}, self.toplevel_context,
117 level='argument')
126 self.context = ScopedContext(
127 {}, self.toplevel_context, level='argument'
128 )
118129 self.return_type = None
119130 arg_types = self.type_of(ast.children[0]) # args
120131 t = self.type_of(ast.children[1]) # body
121 self.assert_eq(t, Void())
132 self.assert_eq(ast, t, Void())
122133 self.context = save_context
123134 return_type = self.return_type
124135 self.return_type = None
143154 elif ast.tag == 'Body':
144155 self.context = ScopedContext({}, self.context,
145156 level='local')
146 self.assert_eq(self.type_of(ast.children[1]), Void())
157 self.assert_eq(ast, self.type_of(ast.children[1]), Void())
147158 self.context = self.context.parent
148159 ast.type = Void()
149160 elif ast.tag == 'FunType':
156167 for c in ast.children:
157168 type_ = self.type_of(c)
158169 if type_ in types:
159 raise CastileTypeError("bad union type")
170 raise CastileTypeError(c, "bad union type")
160171 types.append(type_)
161172 ast.type = Union(types)
162173 elif ast.tag == 'StructType':
171182 assert isinstance(t1, Function), \
172183 '%r is not a function' % t1
173184 if len(t1.arg_types) != len(ast.children) - 1:
174 raise CastileTypeError("argument mismatch")
185 raise CastileTypeError(ast, "argument mismatch")
175186 i = 0
176187 for child in ast.children[1:]:
177 self.assert_eq(self.type_of(child), t1.arg_types[i])
188 self.assert_eq(ast, self.type_of(child), t1.arg_types[i])
178189 i += 1
179190 ast.type = t1.return_type
180191 elif ast.tag == 'Return':
182193 if self.return_type is None:
183194 self.return_type = t1
184195 else:
185 self.assert_eq(t1, self.return_type)
196 self.assert_eq(ast, t1, self.return_type)
186197 ast.type = Void()
187198 elif ast.tag == 'Break':
188199 ast.type = Void()
195206 if len(ast.children) == 3:
196207 # TODO useless! is void.
197208 t3 = self.type_of(ast.children[2])
198 self.assert_eq(t2, t3)
209 self.assert_eq(ast, t2, t3)
199210 ast.type = t2
200211 else:
201212 ast.type = Void()
204215 within_control = self.within_control
205216 self.within_control = True
206217 t1 = self.type_of(ast.children[0])
207 self.assert_eq(t1, Boolean())
218 self.assert_eq(ast, t1, Boolean())
208219 t2 = self.type_of(ast.children[1])
209220 ast.type = Void()
210221 self.within_control = within_control
211222 elif ast.tag == 'Block':
212223 for child in ast.children:
213 self.assert_eq(self.type_of(child), Void())
224 self.assert_eq(ast, self.type_of(child), Void())
214225 ast.type = Void()
215226 elif ast.tag == 'Assignment':
216227 t2 = self.type_of(ast.children[1])
218229 name = ast.children[0].value
219230 if ast.aux == 'defining instance':
220231 if self.within_control:
221 raise CastileTypeError('definition of %s within control block' % name)
232 raise CastileTypeError(ast, 'definition of %s within control block' % name)
222233 if name in self.context:
223 raise CastileTypeError('definition of %s shadows previous' % name)
234 raise CastileTypeError(ast, 'definition of %s shadows previous' % name)
224235 self.set(name, t2)
225236 t1 = t2
226237 else:
227238 if name not in self.context:
228 raise CastileTypeError('variable %s used before definition' % name)
239 raise CastileTypeError(ast, 'variable %s used before definition' % name)
229240 t1 = self.type_of(ast.children[0])
230 self.assert_eq(t1, t2)
241 self.assert_eq(ast, t1, t2)
231242 # not quite useless now (typecase still likes this)
232243 if self.context.level(ast.children[0].value) != 'local':
233 raise CastileTypeError('cannot assign to non-local')
244 raise CastileTypeError(ast, 'cannot assign to non-local')
234245 ast.type = Void()
235246 elif ast.tag == 'Make':
236247 t = self.type_of(ast.children[0])
237248 if t.name not in self.structs:
238 raise CastileTypeError("undefined struct %s" % t.name)
249 raise CastileTypeError(ast, "undefined struct %s" % t.name)
239250 struct_defn = self.structs[t.name]
251 if struct_defn.scope_idents is not None:
252 if self.current_defn not in struct_defn.scope_idents:
253 raise CastileTypeError(ast, "inaccessible struct %s for make: %s not in %s" %
254 (t.name, self.current_defn, struct_defn.scope_idents)
255 )
240256 if len(struct_defn.content_types) != len(ast.children) - 1:
241 raise CastileTypeError("argument mismatch")
257 raise CastileTypeError(ast, "argument mismatch; expected {}, got {} in {}".format(
258 len(struct_defn.content_types), len(ast.children) - 1, ast
259 ))
242260 i = 0
243261 for defn in ast.children[1:]:
244262 name = defn.value
245263 t1 = self.type_of(defn)
246264 pos = struct_defn.field_names[name]
247265 defn.aux = pos
248 self.assert_eq(t1, struct_defn.content_types[pos])
266 self.assert_eq(ast, t1, struct_defn.content_types[pos])
249267 i += 1
250268 ast.type = t
251269 elif ast.tag == 'FieldInit':
252270 ast.type = self.type_of(ast.children[0])
253271 elif ast.tag == 'Index':
254272 t = self.type_of(ast.children[0])
273 struct_defn = self.structs[t.name]
274 if struct_defn.scope_idents is not None:
275 if self.current_defn not in struct_defn.scope_idents:
276 raise CastileTypeError(ast, "inaccessible struct %s for access: %s not in %s" %
277 (t.name, self.current_defn, struct_defn.scope_idents)
278 )
255279 field_name = ast.value
256 struct_fields = self.structs[t.name].field_names
280 struct_fields = struct_defn.field_names
257281 if field_name not in struct_fields:
258 raise CastileTypeError("undefined field")
282 raise CastileTypeError(ast, "undefined field")
259283 index = struct_fields[field_name]
260284 # we make this value available to compiler backends
261285 ast.aux = index
262286 # we look up the type from the StructDefinition
263 ast.type = self.structs[t.name].content_types[index]
287 ast.type = struct_defn.content_types[index]
264288 elif ast.tag == 'TypeCase':
265289 t1 = self.type_of(ast.children[0])
266290 t2 = self.type_of(ast.children[1])
267291 if not isinstance(t1, Union):
268 raise CastileTypeError('bad typecase, %s not a union' % t1)
292 raise CastileTypeError(ast, 'bad typecase, %s not a union' % t1)
269293 if not t1.contains(t2):
270 raise CastileTypeError('bad typecase, %s not in %s' % (t2, t1))
294 raise CastileTypeError(ast, 'bad typecase, %s not in %s' % (t2, t1))
271295 # typecheck t3 with variable in children[0] having type t2
272296 assert ast.children[0].tag == 'VarRef'
273297 within_control = self.within_control
279303 self.within_control = within_control
280304 elif ast.tag == 'Program':
281305 for defn in ast.children:
282 self.assert_eq(self.type_of(defn), Void())
306 self.assert_eq(ast, self.type_of(defn), Void())
283307 ast.type = Void()
284308 self.resolve_structs(ast)
285309 elif ast.tag == 'Defn':
310 self.current_defn = ast.value
286311 t = self.type_of(ast.children[0])
312 self.current_defn = None
287313 if ast.value in self.forwards:
288 self.assert_eq(self.forwards[ast.value], t)
314 self.assert_eq(ast, self.forwards[ast.value], t)
289315 del self.forwards[ast.value]
290316 else:
291317 self.set(ast.value, t)
293319 # any return type is fine, for now, so,
294320 # we compare it against itself
295321 rt = t.return_type
296 self.assert_eq(t, Function([], rt))
322 self.assert_eq(ast, t, Function([], rt))
297323 ast.type = Void()
298324 elif ast.tag == 'Forward':
299325 t = self.type_of(ast.children[0])
306332 value_t = self.type_of(ast.children[0])
307333 union_t = self.type_of(ast.children[1])
308334 if not isinstance(union_t, Union):
309 raise CastileTypeError('bad cast, not a union: %s' % union_t)
335 raise CastileTypeError(ast, 'bad cast, not a union: %s' % union_t)
310336 if not union_t.contains(value_t):
311337 raise CastileTypeError(
312 'bad cast, %s does not include %s' % (union_t, value_t)
338 ast, 'bad cast, %s does not include %s' % (union_t, value_t)
313339 )
314340 ast.type = union_t
315341 else:
22
33
44 class ScopedContext(object):
5 """
6 >>> d = ScopedContext({ 'a': 2, 'b': 3 })
7 >>> e = ScopedContext({ 'c': 4 }, parent=d)
8 >>> e['c']
9 4
10 >>> e['b']
11 3
12 >>> 'a' in e
13 True
14 >>> 'e' in e
15 False
16
17 """
185 def __init__(self, dict, parent=None, level=None):
196 self._dict = dict
207 self.parent = parent
4633
4734 def __repr__(self):
4835 return 'ScopedContext(%r,parent=%r)' % (self._dict, self.parent)
49
50
51 if __name__ == "__main__":
52 import sys
53 import doctest
54 (fails, something) = doctest.testmod()
55 if fails == 0:
56 print("All tests passed.")
57 sys.exit(0)
58 else:
59 sys.exit(1)
141141 return v
142142 elif ast.tag == 'TypeCast':
143143 v = self.eval(ast.children[0])
144 return TaggedValue(typeof(v), v)
144 if not isinstance(v, TaggedValue):
145 v = TaggedValue(typeof(v), v)
146 return v
145147 elif ast.tag == 'TypeCase':
146148 r = self.eval(ast.children[0])
147149 assert isinstance(r, TaggedValue)
166168 return "Type(string:)"
167169 elif isinstance(x, StructDict):
168170 return x.name
171 elif isinstance(x, TaggedValue):
172 return x.tag
169173 else:
170 return "wtf"
174 raise NotImplementedError(x)
171175
172176
173177 class Program(object):
00 """castile {options} program-file.castile
11
2 Interpreter/compiler for Castile, an unremarkable programming language.
2 Interpreter/compiler for Castile, a programming language with union types.
33
44 """
55
66 import sys
77
8 from optparse import OptionParser
8 from argparse import ArgumentParser
99
1010 from castile.parser import Parser
1111 from castile.eval import Program
1515
1616
1717 def main(argv):
18 optparser = OptionParser(__doc__.strip())
19 optparser.add_option("-a", "--show-ast",
20 action="store_true", dest="show_ast", default=False,
21 help="show parsed AST instead of evaluating")
22 optparser.add_option("-c", "--compile-to", metavar='BACKEND',
23 dest="compile_to", default=None,
24 help="compile to given backend code instead "
25 "of evaluating directly (available backends: "
26 "javascript, ruby, stackmac)")
27 optparser.add_option("-p", "--parse-only",
28 action="store_true", dest="parse_only",
29 default=False,
30 help="parse the input program only and exit")
31 optparser.add_option("-t", "--test",
32 action="store_true", dest="test", default=False,
33 help="run test cases and exit")
34 optparser.add_option("-Y", "--no-typecheck",
35 action="store_false", dest="typecheck", default=True,
36 help="do not typecheck the program")
37 (options, args) = optparser.parse_args(argv[1:])
38 if options.test:
39 import doctest
40 (fails, something) = doctest.testmod()
41 if fails == 0:
42 print("All tests passed.")
43 sys.exit(0)
44 else:
45 sys.exit(1)
46 with open(args[0], 'r') as f:
18 argparser = ArgumentParser()
19
20 argparser.add_argument('input_files', nargs='+', metavar='FILENAME', type=str,
21 help='Source files containing the Castile program'
22 )
23 argparser.add_argument("-a", "--show-ast",
24 action="store_true", dest="show_ast", default=False,
25 help="show parsed AST instead of evaluating"
26 )
27 argparser.add_argument("-c", "--compile-to", metavar='BACKEND',
28 dest="compile_to", default=None,
29 help="compile to given backend code instead "
30 "of evaluating directly (available backends: "
31 "c, javascript, ruby, stackmac)"
32 )
33 argparser.add_argument("-p", "--parse-only",
34 action="store_true", dest="parse_only",
35 default=False,
36 help="parse the input program only and exit"
37 )
38 argparser.add_argument("-Y", "--no-typecheck",
39 action="store_false", dest="typecheck", default=True,
40 help="do not typecheck the program"
41 )
42 argparser.add_argument('--version', action='version', version="%(prog)s 0.5")
43
44 options = argparser.parse_args(argv[1:])
45
46 with open(options.input_files[0], 'r') as f:
4747 p = Parser(f.read())
4848 ast = p.program()
4949 if options.show_ast:
5454 if options.typecheck:
5555 t = TypeChecker()
5656 t.collect_structs(ast)
57 t.type_of(ast)
57 try:
58 t.type_of(ast)
59 except Exception:
60 if options.show_ast:
61 print(ast.pprint(0))
62 print("-----")
63 raise
5864 if options.compile_to is not None:
5965 x = FunctionLifter()
6066 ast = x.lift_functions(ast)
0 import re
1
20 from castile.ast import AST
3
4
5 class CastileSyntaxError(ValueError):
6 pass
1 from castile.scanner import Scanner, CastileSyntaxError
72
83
94 class Parser(object):
105 """Parse a Castile program into an AST.
11
12 The parser includes the scanner as part of it. (Delegating to an external
13 scanner is rather verbose ("self.scanner.expect(...)"; inheriting from a
14 Scanner class, even if it's just a mixin, seems rather weird.)
156
167 The parser mainly just constructs the AST. It does few other analyses
178 or transformations itself. However, there are a few:
2314
2415 """
2516 def __init__(self, text):
26 self.text = text
27 self.token = None
28 self.type = None
29 self.scan()
30 # for parser...
17 self.scanner = Scanner(text)
3118 self.locals = None
3219
33 # ### SCANNER ### #
34
35 def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
36 pattern = r'^(' + pattern + r')(.*?)$'
37 match = re.match(pattern, self.text, re.DOTALL)
38 if not match:
39 return False
40 else:
41 self.type = type
42 self.token = match.group(token_group)
43 self.text = match.group(rest_group)
44 # print(self.type, self.token)
45 return True
46
47 def scan(self):
48 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
49 while self.text.startswith('/*'):
50 self.scan_pattern(r'\/\*.*?\*\/[ \t\n\r]*', 'comment')
51 if not self.text:
52 self.token = None
53 self.type = 'EOF'
54 return
55 if self.scan_pattern(r'->', 'arrow'):
56 return
57 if self.scan_pattern(r'>=|>|<=|<|==|!=', 'relational operator'):
58 return
59 if self.scan_pattern(r'\+|\-', 'additive operator'):
60 return
61 if self.scan_pattern(r'\*|\/|\|', 'multiplicative operator'):
62 return
63 if self.scan_pattern(r'\.|\;|\,|\(|\)|\{|\}|\=', 'punctuation'):
64 return
65 if self.scan_pattern(r'string|integer|boolean|function|void|union',
66 'type name'):
67 return
68 if self.scan_pattern(r'and|or', 'boolean operator'):
69 return
70 if self.scan_pattern(r'(if|else|while|make|struct|'
71 r'typecase|is|as|return|break|'
72 r'true|false|null)(?!\w)',
73 'keyword', token_group=2, rest_group=3):
74 return
75 if self.scan_pattern(r'\d+', 'integer literal'):
76 return
77 if self.scan_pattern(r'\"(.*?)\"', 'string literal',
78 token_group=2, rest_group=3):
79 return
80 if self.scan_pattern(r'[a-zA-Z_][a-zA-Z0-9_]*', 'identifier'):
81 return
82 if self.scan_pattern(r'.', 'unknown character'):
83 return
84 else:
85 raise ValueError("this should never happen, "
86 "self.text=(%s)" % self.text)
87
88 def expect(self, token):
89 if self.token == token:
90 self.scan()
91 else:
92 raise CastileSyntaxError(
93 "Expected '%s', but found '%s'" % (token, self.token)
94 )
95
96 def expect_type(self, type):
97 self.check_type(type)
98 token = self.token
99 self.scan()
100 return token
101
102 def on(self, token):
103 return self.token == token
104
105 def on_any(self, tokens):
106 return self.token in tokens
107
108 def on_type(self, type):
109 return self.type == type
110
111 def check_type(self, type):
112 if not self.type == type:
113 raise CastileSyntaxError(
114 "Expected %s, but found %s ('%s')" % (type, self.type, self.token)
115 )
116
117 def consume(self, token):
118 if self.token == token:
119 self.scan()
120 return True
121 else:
122 return False
123
124 def consume_type(self, type):
125 if self.on_type(type):
126 token = self.token
127 self.scan()
128 return token
129 else:
130 return None
131
132 # ### PARSER ### #
20 ### Delegate to scanner
21
22 def consume(self, *args, **kwargs):
23 return self.scanner.consume(*args, **kwargs)
24
25 def consume_type(self, *args, **kwargs):
26 return self.scanner.consume_type(*args, **kwargs)
27
28 def expect(self, *args, **kwargs):
29 return self.scanner.expect(*args, **kwargs)
30
31 def expect_type(self, *args, **kwargs):
32 return self.scanner.expect_type(*args, **kwargs)
33
34 def on(self, *args, **kwargs):
35 return self.scanner.on(*args, **kwargs)
36
37 def on_any(self, *args, **kwargs):
38 return self.scanner.on_any(*args, **kwargs)
39
40 def on_type(self, *args, **kwargs):
41 return self.scanner.on_type(*args, **kwargs)
42
43 ### Delegate to AST
44
45 def ast(self, *args, **kwargs):
46 kwargs['line'] = self.scanner.line
47 return AST(*args, **kwargs)
48
49 ### Parser proper
13350
13451 def program(self):
13552 defns = []
13653 while not self.on_type('EOF'):
13754 defns.append(self.defn())
13855 self.consume(';')
139 return AST('Program', defns)
56 return self.ast('Program', defns)
14057
14158 def defn(self):
14259 if self.consume('fun'):
14966 args.append(self.arg())
15067 self.expect(")")
15168 body = self.body()
152 funlit = AST('FunLit', [AST('Args', args), body])
153 return AST('Defn', [funlit], value=id)
69 funlit = self.ast('FunLit', [self.ast('Args', args), body])
70 return self.ast('Defn', [funlit], value=id)
15471 elif self.consume('struct'):
15572 id = self.expect_type('identifier')
15673 self.expect("{")
15976 name = self.expect_type('identifier')
16077 self.expect(':')
16178 texpr = self.texpr0()
162 components.append(AST('FieldDefn', [texpr], value=name))
79 components.append(self.ast('FieldDefn', [texpr], value=name))
16380 self.consume(';')
16481 self.expect("}")
165 return AST('StructDefn', components, value=id)
82 scope_children = []
83 if self.consume("for"):
84 self.expect("(")
85 idents = []
86 if not self.on(")"):
87 idents.append(self.ast('Ident', value=self.expect_type('identifier')))
88 while self.consume(","):
89 idents.append(self.ast('Ident', value=self.expect_type('identifier')))
90 self.expect(")")
91 scope_children.append(self.ast('Idents', idents))
92 return self.ast('StructDefn', [self.ast('FieldDefns', components)] + scope_children, value=id)
16693 else:
16794 id = self.expect_type('identifier')
16895 if self.consume('='):
16996 e = self.literal()
170 return AST('Defn', [e], value=id)
97 return self.ast('Defn', [e], value=id)
17198 else:
17299 self.expect(':')
173100 e = self.texpr0()
174 return AST('Forward', [e], value=id)
101 return self.ast('Forward', [e], value=id)
175102
176103 def arg(self):
177104 id = self.expect_type('identifier')
178 te = AST('Type', value='integer')
105 te = self.ast('Type', value='integer')
179106 if self.consume(':'):
180107 te = self.texpr1()
181 return AST('Arg', [te], value=id)
108 return self.ast('Arg', [te], value=id)
182109
183110 def texpr0(self):
184111 ast = self.texpr1()
185112 if self.consume('->'):
186113 r = self.texpr1()
187 return AST('FunType', [r, ast])
114 return self.ast('FunType', [r, ast])
188115 if self.on(','):
189116 args = [ast]
190117 while self.consume(','):
191118 args.append(self.texpr1())
192119 self.expect('->')
193120 r = self.texpr1()
194 return AST('FunType', [r] + args)
121 return self.ast('FunType', [r] + args)
195122 return ast
196123
197124 def texpr1(self):
200127 args = [ast]
201128 while self.consume('|'):
202129 args.append(self.texpr2())
203 ast = AST('UnionType', args)
130 ast = self.ast('UnionType', args)
204131 return ast
205132
206133 def texpr2(self):
210137 return ast
211138 elif self.on_type('identifier'):
212139 id = self.consume_type('identifier')
213 return AST('StructType', [], value=id)
140 return self.ast('StructType', [], value=id)
214141 tname = self.expect_type('type name')
215 return AST('Type', value=tname)
142 return self.ast('Type', value=tname)
216143
217144 def block(self):
218145 self.expect('{')
221148 stmts.append(self.stmt())
222149 self.consume(';')
223150 self.expect('}')
224 return AST('Block', stmts)
151 return self.ast('Block', stmts)
225152
226153 STMT_TAGS = ('If', 'While', 'TypeCase', 'Return', 'Break')
227154
239166 stmts.append(last)
240167 self.consume(';')
241168 if len(stmts) == 0:
242 stmts = [AST('Return', [AST('None')])]
169 stmts = [self.ast('Return', [self.ast('None')])]
243170 elif last is not None and last.tag not in self.STMT_TAGS:
244 stmts[-1] = AST('Return', [stmts[-1]])
171 stmts[-1] = self.ast('Return', [stmts[-1]])
245172 self.expect('}')
246 vardecls = AST(
173 vardecls = self.ast(
247174 'VarDecls',
248 [AST('VarDecl', value=name) for name in self.locals]
175 [self.ast('VarDecl', value=name) for name in self.locals]
249176 )
250 stmts = AST('Block', stmts)
177 stmts = self.ast('Block', stmts)
251178 self.locals = save_locals
252 return AST('Body', [vardecls, stmts])
179 return self.ast('Body', [vardecls, stmts])
253180
254181 def stmt(self):
255182 if self.on('if'):
257184 elif self.consume('while'):
258185 t = self.expr0()
259186 b = self.block()
260 return AST('While', [t, b])
187 return self.ast('While', [t, b])
261188 elif self.consume('typecase'):
262189 id = self.expect_type('identifier')
263 e = AST('VarRef', value=id)
190 e = self.ast('VarRef', value=id)
264191 self.expect('is')
265192 te = self.texpr0()
266193 b = self.block()
267 return AST('TypeCase', [e, te, b], value=te.minirepr())
194 return self.ast('TypeCase', [e, te, b], value=te.minirepr())
268195 elif self.consume('return'):
269 return AST('Return', [self.expr0()])
196 return self.ast('Return', [self.expr0()])
270197 elif self.consume('break'):
271 return AST('Break')
198 return self.ast('Break')
272199 else:
273200 return self.expr0()
274201
282209 b2 = self.ifstmt()
283210 else:
284211 b2 = self.block()
285 return AST('If', [t, b1, b2])
286 return AST('If', [t, b1])
212 return self.ast('If', [t, b1, b2])
213 return self.ast('If', [t, b1])
287214
288215 def expr0(self):
289216 e = self.expr1()
290217 while self.on_type('boolean operator'):
291218 op = self.expect_type('boolean operator')
292219 e2 = self.expr1()
293 e = AST('Op', [e, e2], value=op)
220 e = self.ast('Op', [e, e2], value=op)
294221 if self.consume('as'):
295222 union_te = self.texpr0()
296 e = AST('TypeCast', [e, union_te])
223 e = self.ast('TypeCast', [e, union_te])
297224 return e
298225
299226 def expr1(self):
301228 while self.on_type('relational operator'):
302229 op = self.expect_type('relational operator')
303230 e2 = self.expr2()
304 e = AST('Op', [e, e2], value=op)
231 e = self.ast('Op', [e, e2], value=op)
305232 return e
306233
307234 def expr2(self):
309236 while self.on_type('additive operator'):
310237 op = self.expect_type('additive operator')
311238 e2 = self.expr3()
312 e = AST('Op', [e, e2], value=op)
239 e = self.ast('Op', [e, e2], value=op)
313240 return e
314241
315242 def expr3(self):
317244 while self.on_type('multiplicative operator'):
318245 op = self.expect_type('multiplicative operator')
319246 e2 = self.expr4()
320 e = AST('Op', [e, e2], value=op)
247 e = self.ast('Op', [e, e2], value=op)
321248 return e
322249
323250 def expr4(self):
331258 while self.consume(","):
332259 args.append(self.expr0())
333260 self.expect(")")
334 e = AST('FunCall', [e] + args)
261 e = self.ast('FunCall', [e] + args)
335262 elif self.consume('.'):
336263 id = self.expect_type('identifier')
337 e = AST('Index', [e], value=id)
264 e = self.ast('Index', [e], value=id)
338265 else:
339266 done = True
340267 return e
345272 elif self.on_any(('-', 'fun', 'true', 'false', 'null')):
346273 return self.literal()
347274 elif self.consume('not'):
348 return AST('Not', [self.expr1()])
275 return self.ast('Not', [self.expr1()])
349276 elif self.consume('make'):
350277 # TODO I just accidentally any type. Is that bad?
351278 texpr = self.texpr0()
355282 id = self.expect_type('identifier')
356283 self.expect(':')
357284 e = self.expr0()
358 args.append(AST('FieldInit', [e], value=id))
285 args.append(self.ast('FieldInit', [e], value=id))
359286 while self.consume(","):
360287 id = self.expect_type('identifier')
361288 self.expect(':')
362289 e = self.expr0()
363 args.append(AST('FieldInit', [e], value=id))
290 args.append(self.ast('FieldInit', [e], value=id))
364291 self.expect(")")
365 return AST('Make', [texpr] + args, value=texpr.minirepr())
292 return self.ast('Make', [texpr] + args, value=texpr.minirepr())
366293
367294 elif self.consume('('):
368295 e = self.expr0()
370297 return e
371298 else:
372299 id = self.expect_type('identifier')
373 ast = AST('VarRef', value=id)
300 ast = self.ast('VarRef', value=id)
374301 if self.consume('='):
375302 e = self.expr0()
376303 aux = None
377304 if id not in self.locals:
378305 self.locals.add(id)
379306 aux = 'defining instance'
380 ast = AST('Assignment', [ast, e], aux=aux)
307 ast = self.ast('Assignment', [ast, e], aux=aux)
381308 return ast
382309
383310 def literal(self):
384311 if self.on_type('string literal'):
385312 v = self.consume_type('string literal')
386 return AST('StrLit', value=v)
313 return self.ast('StrLit', value=v)
387314 elif self.on_type('integer literal'):
388315 v = int(self.consume_type('integer literal'))
389 return AST('IntLit', value=v)
316 return self.ast('IntLit', value=v)
390317 elif self.consume('-'):
391318 v = 0 - int(self.expect_type('integer literal'))
392 return AST('IntLit', value=v)
319 return self.ast('IntLit', value=v)
393320 elif self.consume('true'):
394 return AST('BoolLit', value=True)
321 return self.ast('BoolLit', value=True)
395322 elif self.consume('false'):
396 return AST('BoolLit', value=False)
323 return self.ast('BoolLit', value=False)
397324 elif self.consume('null'):
398 return AST('None')
325 return self.ast('None')
399326 else:
400327 self.expect('fun')
401328 self.expect("(")
406333 args.append(self.arg())
407334 self.expect(")")
408335 body = self.body()
409 return AST('FunLit', [AST('Args', args), body])
336 return self.ast('FunLit', [self.ast('Args', args), body])
0 import re
1
2
3 class CastileSyntaxError(ValueError):
4 pass
5
6
7 class Scanner(object):
8
9 def __init__(self, text):
10 self.text = text
11 self.token = None
12 self.type = None
13 self.pos = 0
14 self.line = 1
15 self.scan()
16
17 def near_text(self, length=10):
18 return self.text[self.pos:self.pos + length]
19
20 def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
21 pattern = r'(' + pattern + r')'
22 regexp = re.compile(pattern, flags=re.DOTALL)
23 match = regexp.match(self.text, pos=self.pos)
24 if not match:
25 return False
26 else:
27 self.type = type
28 self.token = match.group(token_group)
29 self.pos += len(match.group(0))
30 self.line += self.token.count('\n')
31 return True
32
33 def scan(self):
34 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
35 while self.scan_pattern(r'\/\*.*?\*\/[ \t\n\r]*', 'comment'):
36 self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
37 if self.pos >= len(self.text):
38 self.token = None
39 self.type = 'EOF'
40 return
41 if self.scan_pattern(r'->', 'arrow'):
42 return
43 if self.scan_pattern(r'>=|>|<=|<|==|!=', 'relational operator'):
44 return
45 if self.scan_pattern(r'\+|\-', 'additive operator'):
46 return
47 if self.scan_pattern(r'\*|\/|\|', 'multiplicative operator'):
48 return
49 if self.scan_pattern(r'\.|\;|\,|\(|\)|\{|\}|\=', 'punctuation'):
50 return
51 if self.scan_pattern(r'string|integer|boolean|function|void|union',
52 'type name'):
53 return
54 if self.scan_pattern(r'and|or', 'boolean operator'):
55 return
56 if self.scan_pattern(r'(if|else|while|make|struct|'
57 r'typecase|is|as|return|break|'
58 r'true|false|null)(?!\w)',
59 'keyword', token_group=2, rest_group=3):
60 return
61 if self.scan_pattern(r'\d+', 'integer literal'):
62 return
63 if self.scan_pattern(r'\"(.*?)\"', 'string literal',
64 token_group=2, rest_group=3):
65 return
66 if self.scan_pattern(r'[a-zA-Z_][a-zA-Z0-9_]*', 'identifier'):
67 return
68 if self.scan_pattern(r'.', 'unknown character'):
69 return
70 else:
71 raise ValueError("this should never happen, "
72 "self.text=(%s)" % self.text)
73
74 def expect(self, token):
75 if self.token == token:
76 self.scan()
77 else:
78 raise CastileSyntaxError(
79 "Expected '%s', but found '%s' (line %s, near '%s')" % (
80 token, self.token, self.line, self.near_text()
81 )
82 )
83
84 def expect_type(self, type):
85 self.check_type(type)
86 token = self.token
87 self.scan()
88 return token
89
90 def on(self, token):
91 return self.token == token
92
93 def on_any(self, tokens):
94 return self.token in tokens
95
96 def on_type(self, type):
97 return self.type == type
98
99 def check_type(self, type):
100 if not self.type == type:
101 raise CastileSyntaxError(
102 "Expected %s, but found %s ('%s') (line %s, near '%s')" % (
103 type, self.type, self.token, self.line, self.near_text()
104 )
105 )
106
107 def consume(self, token):
108 if self.token == token:
109 self.scan()
110 return True
111 else:
112 return False
113
114 def consume_type(self, type):
115 if self.on_type(type):
116 token = self.token
117 self.scan()
118 return token
119 else:
120 return None
6060 return True
6161 return False
6262
63 def contains_instance_of(self, cls):
64 for member in self.content_types:
65 if isinstance(member, cls):
66 return True
67 return False
68
6369 def __str__(self):
6470 h = "union("
6571 h += ', '.join(sorted([str(t) for t in self.content_types]))
2323 APPLIANCES="$APPLIANCES tests/appliances/castile-c-c.md"
2424 fi
2525
26 #APPLIANCES="tests/appliances/castile.py3.md"
27 #APPLIANCES="tests/appliances/castile-c-c.md"
28 #APPLIANCES="tests/appliances/castile-c-javascript.md"
29
2630 falderal $APPLIANCES tests/Castile.md
2731 RESULT=$?
2832 rm -f foo.* a.out
361361
362362 ### Non-local Values ###
363363
364 Literals may appear at the toplevel. Semicolons are optional at toplevel.
364 Literals may appear at the toplevel. Semicolons are optional at toplevel.
365365
366366 | factor = 5;
367367 | fun main() {
369369 | }
370370 = 30
371371
372 Toplevel literals may not be updated. (And thus
372 Toplevel literals may not be updated. Thus, the following looks like it
373 is defining a local with the same name as a toplevel, which is not permitted.
373374
374375 | factor = 5
375376 | fun main() {
600601 | }
601602 ? mismatch
602603
603 Equality can be checked between unions. (TODO)
604
605 /| fun main() {
606 /| a = 40 as string|integer
607 /| b = 40 as string|integer
608 /| if a == b {
609 /| print("it is")
610 /| }
611 /| }
612 /= ok
604 Equality can be checked between unions, as long as they are
605 unions entirely of simple (non-struct) types.
606
607 | fun main() {
608 | a = 40 as string|integer
609 | b = 40 as string|integer
610 | if a == b {
611 | print("it is")
612 | }
613 | }
614 = it is
613615
614616 | fun main() {
615617 | a = 40 as string|integer
630632 | }
631633 | }
632634 ? mismatch
635
636 Equality cannot be tested between values of a union type
637 that contains a struct type as one of its members.
638
639 | struct person { name: string; age: integer }
640 | fun main() {
641 | a = 40 as person|integer
642 | b = 40 as person|integer
643 | if a == b {
644 | print("it is")
645 | }
646 | }
647 ? struct
633648
634649 ### Builtins ###
635650
767782 | }
768783 = 23
769784
770 Structs can be tested for equality. (Since structs are immutable, it
771 doesn't matter if this is structural equality or identity.)
785 Structs cannot be tested for equality with the `==` or `!==`
786 operators.
772787
773788 | struct person { name: string; age: integer }
774789 | main = fun() {
776791 | k = make person(name:"Jake", age: 23);
777792 | j == k
778793 | }
779 = True
780
781 | struct person { age: integer; name: string }
782 | main = fun() {
783 | j = make person(age: 23, name:"Jake");
784 | k = make person(age: 23, name:"John");
785 | j == k
786 | }
787 = False
794 ? structs cannot be compared
788795
789796 | struct person { age: integer; name: string }
790797 | main = fun() {
792799 | k = make person(age: 21, name:"Jake");
793800 | j != k
794801 | }
795 = True
802 ? structs cannot be compared
796803
797804 Structs of two different types cannot be tested for equality.
798805
805812 | }
806813 ? mismatch
807814
815 If you really want to compare two structs for equality, you'll
816 have to write the equality predicate function yourself.
817
818 | struct person { name: string; age: integer }
819 | equ_person = fun(a: person, b: person) {
820 | a.age == b.age and a.name == b.name
821 | }
822 | main = fun() {
823 | j = make person(age: 23, name:"Jake");
824 | k = make person(name:"Jake", age: 23);
825 | equ_person(j, k)
826 | }
827 = True
828
808829 Structs cannot be compared for ordering.
809830
810831 | struct person { age: integer; name: string }
813834 | k = make person(age: 21, name:"Jake");
814835 | j > k
815836 | }
816 ? structs cannot be compared for order
837 ? structs cannot be compared
817838
818839 Structs can be passed to functions.
819840
12281249 | }
12291250 = red
12301251 = blue
1252
1253 ### Scoped Structs ###
1254
1255 When a `struct` is declared, it may be associated with a set of identifiers.
1256 Functions with these global names are the only function definitions which
1257 can `make` such a struct, or see that it has fields; to all other functions,
1258 these operations will not be available. It is in this way that encapsulation
1259 is accomplished.
1260
1261 | struct list {
1262 | value: string;
1263 | next: list|void;
1264 | } for (cons, singleton, length)
1265 |
1266 | fun cons(v: string, l: list) {
1267 | make list(value:v, next:l as list|void)
1268 | }
1269 |
1270 | fun singleton(v: string) {
1271 | make list(value:v, next:null as list|void)
1272 | }
1273 |
1274 | length : list|void -> integer
1275 | fun length(l: list|void) {
1276 | typecase l is void { return 0 }
1277 | typecase l is list { return 1 + length(l.next) }
1278 | }
1279 |
1280 | fun main() {
1281 | l = cons("first", cons("second", singleton("third")));
1282 | print(str(length(l as list|void)));
1283 | }
1284 = 3
1285
1286 | struct list {
1287 | value: string;
1288 | next: list|void;
1289 | } for (cons, singleton, length)
1290 |
1291 | fun cons(v: string, l: list) {
1292 | make list(value:v, next:l as list|void)
1293 | }
1294 |
1295 | fun singleton(v: string) {
1296 | make list(value:v, next:null as list|void)
1297 | }
1298 |
1299 | length : list|void -> integer
1300 | fun length(l: list|void) {
1301 | typecase l is void { return 0 }
1302 | typecase l is list { return 1 + length(l.next) }
1303 | }
1304 |
1305 | fun main() {
1306 | l = make list(value:"first", next:null as list|void);
1307 | print(str(length(l as list|void)));
1308 | }
1309 ? make
1310
1311 | struct list {
1312 | value: string;
1313 | next: list|void;
1314 | } for (cons, singleton, length)
1315 |
1316 | fun cons(v: string, l: list) {
1317 | make list(value:v, next:l as list|void)
1318 | }
1319 |
1320 | fun singleton(v: string) {
1321 | make list(value:v, next:null as list|void)
1322 | }
1323 |
1324 | fun main() {
1325 | l = cons("first", cons("second", singleton("third")));
1326 | print(l.value);
1327 | }
1328 ? struct
1329
1330 One can use this facility to implement abstract data types.
1331
1332 | struct assoc {
1333 | key: string;
1334 | value: string;
1335 | next: assoc|void;
1336 | } for (singleton, update, lookup, remove)
1337 |
1338 | fun singleton(k: string, v: string) {
1339 | make assoc(key:k, value:v, next:null as assoc|void)
1340 | }
1341 |
1342 | fun update(k: string, v: string, a: assoc) {
1343 | make assoc(key:k, value:v, next:a as assoc|void)
1344 | }
1345 |
1346 | lookup : assoc, string -> string|void
1347 | fun lookup(a: assoc, k: string) {
1348 | if a.key == k {
1349 | return a.value as string|void
1350 | }
1351 | n = a.next
1352 | typecase n is void {
1353 | return null as string|void
1354 | }
1355 | typecase n is assoc {
1356 | return lookup(n, k)
1357 | }
1358 | }
1359 |
1360 | fun main() {
1361 | a = update("1", "first", update("2", "second", singleton("3", "third")));
1362 | r = lookup(a, "2");
1363 | print("um");
1364 | typecase r is void { print("NOT FOUND"); }
1365 | typecase r is string { print(r); }
1366 | print("ya");
1367 | }
1368 = um
1369 = second
1370 = ya
1371
1372 This program should work even with a redundant upcast in it.
1373
1374 | struct assoc {
1375 | key: string;
1376 | value: string;
1377 | next: assoc|void;
1378 | } for (singleton, update, lookup, remove)
1379 |
1380 | fun singleton(k: string, v: string) {
1381 | make assoc(key:k, value:v, next:null as assoc|void)
1382 | }
1383 |
1384 | fun update(k: string, v: string, a: assoc) {
1385 | make assoc(key:k, value:v, next:a as assoc|void)
1386 | }
1387 |
1388 | lookup : assoc, string -> string|void
1389 | fun lookup(a: assoc, k: string) {
1390 | if a.key == k {
1391 | return a.value as string|void
1392 | }
1393 | n = a.next
1394 | typecase n is void {
1395 | return null as string|void
1396 | }
1397 | typecase n is assoc {
1398 | return lookup(n, k) as string|void
1399 | }
1400 | }
1401 |
1402 | fun main() {
1403 | a = update("1", "first", update("2", "second", singleton("3", "third")));
1404 | r = lookup(a, "2");
1405 | print("um");
1406 | typecase r is void { print("NOT FOUND"); }
1407 | typecase r is string { print(r); }
1408 | print("ya");
1409 | }
1410 = um
1411 = second
1412 = ya