git @ Cat's Eye Technologies Eightebed / rel_1_1_2011_0510
Initial import of Eightebed version 1.1 revision 2011.0510. catseye 9 years ago
9 changed file(s) with 2021 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
1 <!-- encoding: UTF-8 -->
2 <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
3 <head>
4 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
5 <title>The Eightebed Programming Language</title>
6 <!-- begin html doc dynamic markup -->
7 <script type="text/javascript" src="/contrib/jquery-1.6.4.min.js"></script>
8 <script type="text/javascript" src="/scripts/documentation.js"></script>
9 <!-- end html doc dynamic markup -->
10 </head>
11 <body>
12
13 <h1>The Eightebed Programming Language</h1>
14
15 <p>Language version 1.1</p>
16
17 <h2>Abstract</h2>
18
19 <p>While discussing <a class="external" href="http://cyclone.thelanguage.org/">Cyclone</a>,
20 Gregor Richards stated that in order for a language
21 to support explicit <code>malloc()</code>ing and <code>free()</code>ing of
22 allocated memory, while also being safe (in the sense of not being able to execute
23 or dereference incorrectly-populated memory) would require that language to
24 either support garbage collection, or to not implement <code>free()</code>.
25 In his words:</p>
26
27 <blockquote><p>A C-like language which provides a true explicit free() cannot be safe.
28 (By "true" I mean that you can get that memory back in a later malloc().)
29 To be safe a language must either never free (which is bad) or be GC'd.
30 [C-like languages being] imperative languages with pointers at arbitrary data, where safety is
31 defined as not seeing that data as a different type.</p></blockquote>
32
33 <p><dfn>Eightebed</dfn> was designed as a counterexample to that claim.
34 Eightebed is a small, C-like language with explicit <code>malloc()</code>
35 and <code>free()</code>. Memory is actually freed by <code>free()</code>
36 and might be re-allocated by a future <code>malloc()</code>. Yet Eightebed
37 is a safe language, requiring only a modicum of static analysis and runtime support,
38 and in particular, it neither specifies nor requires garbage collection:</p>
39
40 <ul>
41 <li><dfn>Garbage</dfn>, reasonably defined as "any unreachable block of memory",
42 is disregarded and considered a memory leak, as is good and proper (or at least
43 accepted) in a language with explicit memory management; and</li>
44 <li>Nothing is collected in any way.</li>
45 </ul>
46
47 <h2>Without Loss of Generality</h2>
48
49 <p>We place some restrictions on Eightebed in order that our implementation of
50 a compiler and analyzer for it may be simplified. These restrictions do not, we assert,
51 prevent the language from being "C-like", as it would be possible to extend the
52 language to include them; the only thing we would be adding if we were to do so
53 would be additional complexity in implementation. These restrictions are:</p>
54
55 <ul>
56 <li>There are no functions in Eightebed. Common functionality can be repeated
57 verbatim inline, and recursion can be replaced with <code>while</code> loops.</li>
58
59 <li>Pointers may only point to named types, not integers or other pointers, and
60 only structures may be named. The effect of a pointer to an integer or pointer
61 may be easily achieved by pointing to a named structure which consists of only
62 an integer or pointer itself.</li>
63
64 <li>Structures may not contain structures. Again, this can be easily simulated
65 by "flattening" the structure into a single structure with perhaps differentiated names.</li>
66 </ul>
67
68 <h2>Syntax</h2>
69
70 <h3>EBNF Grammar</h3>
71
72 <p>Note that where this grammar is a little weird, it is only to support
73 being fully LL(1) to ease parser construction. Notably, the syntax to access
74 a member of a structure uses both square brackets around the structure
75 and a dot between structure and member. Unlike C, there is no syntax like
76 <code>-&gt;</code> to dereference and access a member in one go; you need
77 to dereference with <code>@</code>, then access the member with <code>[].</code>.</p>
78
79 <pre>
80 Eightebed ::= {TypeDecl} {VarDecl} Block.
81 Block ::= "{" {Stmt} "}".
82 TypeDecl ::= "type" Name<sub>Type</sub> Type ";"
83 Type ::= "int"
84 | "struct" "{" {Decl} "}"
85 | "ptr" "to" Type
86 | Name<sub>Type</sub>.
87 Decl ::= Type Name ";".
88 VarDecl ::= "var" Decl.
89 Stmt ::= "while" Expr Block
90 | "if" Expr Block ["else" Block]
91 | "free" Ref ";"
92 | "print" Expr ";"
93 | Ref "=" Expr ";".
94 Ref ::= "[" Ref "]" "." Name
95 | "@" Ref
96 | Name.
97 Expr ::= "(" Expr ("+"|"-"|"*"|"/"|"="|"&gt;"|"&amp;"|"|") Expr ")"
98 | "malloc" Name<sub>Type</sub>
99 | "valid" Expr
100 | IntLit
101 | Ref.
102 </pre>
103
104 <h3>Example Program</h3>
105
106 <pre>
107 type node struct {
108 int value;
109 ptr to node next;
110 };
111 var ptr to node jim;
112 var ptr to node george;
113 {
114 jim = malloc node;
115 if valid jim {
116 [@jim].value = (1 + 4);
117 george = jim;
118 }
119 if valid george {
120 print [@george].value;
121 }
122 free george;
123 free jim;
124 }
125 </pre>
126
127 <h2>How it Works</h2>
128
129 <h3>Static Analysis</h3>
130
131 <p>Dereferencing a pointer <var>x</var> must only occur at the
132 safe start of the "then" part of an <code>if</code> statement whose
133 test condition consists only of the expression <code>valid <var>x</var></code>.
134 The <dfn>safe start</dfn> of a block is the set of statements preceding and
135 including the first assignment statement or <code>free</code>. (This is on the
136 [admittedly somewhat pessimistic]
137 assumption that any assignment could invalidate <var>x</var>.)
138 (<em>New in 1.1</em>: the safe start must precede the first <code>free</code>
139 statement, to prevent creation of dangling aliased pointers. Thanks Gregor!)
140 To simplify implementation, we limit <var>x</var> to a simple variable name
141 rather than a full expression. (This too is without loss of generality, as it is a
142 simple matter to use a temporary variable to store the result of a pointer expression.)
143 Any attempt to dereference a pointer which
144 does not follow these rules is caught by the static checker and disallowed.</p>
145
146 <h3>Runtime Support</h3>
147
148 <p>Every pointer in the Eightebed language is implemented internally
149 as a structure of a <dfn>machine pointer</dfn> (obtained, for instance, by C's <code>malloc()</code>)
150 coupled with a boolean flag called <code>valid</code>.
151 When a chunk of memory is initially successfully allocated, <code>valid</code> is
152 set to true. Freeing a pointer first checks this flag; freeing the
153 machine pointer is only attempted if <code>valid</code> is true.
154 In addition, just before freeing the machine pointer, we invalidate
155 all aliases to that pointer. (Starting with the "root set" of the
156 program's global variables, we traverse all memory blocks reachable
157 by following valid pointers from them, looking for pointers which
158 match the pointer about to be freed; any we find, we set their <code>valid</code>
159 flags to false.) After freeing a pointer, we set its <code>valid</code>
160 to false.</p>
161
162 <h3>Why this Works</h3>
163
164 <p>Because of the static analysis, it is not possible to dereference
165 a pointer at a point in the program where we do not know for certain
166 that it is valid (i.e., it is not possible to dereference an invalid
167 pointer.) Because of the runtime support, as soon as a pointer becomes
168 invalid, all aliases of it become invalid as well. (All reachable
169 aliases, that is – but if an alias isn't reachable, it can't be dereferenced
170 anyway.) Add both of these together, and you get memory that can
171 leak without any risk of being reused.</p>
172
173 <p>And no, this isn't garbage collection, because (as stated already)
174 we don't care about garbage and we don't collect anything. Yes, the
175 runtime support looks a bit like the mark phase of a mark-and-sweep
176 garbage collector, but even it has a different job: not marking everything
177 that is reachable, rather invalidating all aliases of a given pointer.</p>
178
179 <p>And finally, yes, I realize how little this proves. Long live loopholes.</p>
180
181 <pre>
182 16:19:38 &lt;Gregor&gt; We implement this without a GC by stuffing most of a GC into the free function, thereby making it just as slow as a GC'd language with none of the advantages!
183 16:25:29 &lt;Gregor&gt; So yes, although you have managed to fit my requirements, I am wildly underwhelmed :P
184 </pre>
185
186 <h2>Reference Implementation</h2>
187
188 <p>Cat's Eye Technologies provides a cockamamie reference implementation of
189 Eightebed called <code>8ebed2c.py</code>. Written in Python 2.6,
190 it compiles Eightebed code to C, and for convenience will optionally
191 compile that C with the C compiler of your choice and run the resulting
192 executable.</p>
193
194 <p><code>8ebed2c.py</code> ships with a fairly extensive (for a language
195 like this!) suite of test programs, which can of course double as example
196 sources; these can be found in the <code>eightebed.tests</code> module.</p>
197
198 <p>For an appreciation of just how cockamamie <code>8ebed2c.py</code> is,
199 run <code>8ebed2c.py --help</code> and read through the command-line options
200 it provides.</p>
201
202 <h2>Legal Issues</h2>
203
204 <p>The name Eightebed started life as a typo for the word "enlightened" made on an
205 iPhone by a mysterious individual known only as Alise. (Well, perhaps not <em>only</em>.)
206 Alise has aggressively asserted her intellectual property rights by copyrighting
207 [<i>sic</i>] the name Eightebed. Cat's Eye Technologies has pursued permission to use
208 the name for this language, only to be told that the procedure for obtaining such
209 permission "involves five yaks, a Golden toad that hasn't eaten for five days,
210 five boxes of antique confetti (not stripped of uranium), dye number 90 (blood green),
211 a very confused weasel, and three pieces of A4.15 paper."</p>
212
213 <p>Cat's Eye Technologies' legal-and-yak-husbandry team is currently investigating
214 the feasibility of this arrangement, and as of this writing, official permission is
215 still pending. If complications persist, another, less contentious name
216 (such as "Microsoft Windows 7") may need to be chosen for this language.</p>
217
218 <pre>
219 17:52:08 &lt;alise&gt; cpressey: I request that all harm is done to animals in the making of this production.
220 </pre>
221
222 <h2>Future Work</h2>
223
224 <p><i>In which we reveal the outline of a grand plan for a blockbuster sequel to Eightebed which will never materialize</i></p>
225
226 <ul>
227 <li>To be titled <em>Eightebed: Ascension</em> or <em>Eightebed: Generations</em>. At least, title should have
228 one of those bad-ass colons in it. Possibly <em>Eightebed: Eightebed</em>.</li>
229 <li>To support functions, analysis of arbitrary expressions as the condition in an <code>if valid</code>,
230 pointers to unnamed types, structures which contain other structures, and all that other boring stuff that
231 we just said doesn't matter.</li>
232 <li>To have a literate specification written in SUPER ITALIAN, thus giving all programs the power of
233 UNMATCHED PROPHETIC SNEEZING.</li>
234 <li>To be co-authored with Frank Zappa (note: turns out Mr. Zappa is dead. Maybe Tipper Gore instead? Yes,
235 that should work.)</li>
236 <li><del>To include a garbage collector.</del></li>
237 <li>Puppets???</li>
238 </ul>
239
240 <p>Happy leaking!
241 <br />Chris Pressey
242 <br />September 1, 2010
243 <br />Evanston, IL
244 </p>
245
246 </body>
247 </html>
0 type node struct {
1 int value;
2 ptr to node next;
3 };
4 var ptr to node jim;
5 var ptr to node harry;
6 var ptr to node bertie;
7 var ptr to node albert;
8 var int i;
9 {
10 albert = malloc node;
11 jim = albert;
12 harry = jim;
13 i = 100;
14 while i {
15 harry = malloc node;
16 if valid jim {
17 [@jim].value = i;
18 }
19 if (i = 87) {
20 bertie = jim;
21 }
22 if valid jim {
23 [@jim].next = harry;
24 if valid harry {
25 jim = harry;
26 }
27 }
28 i = (i - 1);
29 }
30 free bertie;
31 jim = albert;
32 while valid jim {
33 if valid jim {
34 print [@jim].value;
35 jim = [@jim].next;
36 }
37 }
38 }
0 #!/usr/bin/python
1 # -*- coding: utf-8 -*-
2
3 """\
4 [python] 8ebed2c.py [-achimrtuv] [-e compiler] [-f format] [-p pedigree]
5 (in.8ebed|@testprog) (out.c|-)
6
7 8ebed2c.py: A compiler (to C) for the Eightebed programming language.
8 Language version 1.1. Implementation version 2011.0510.
9
10 The @testprog syntax can be used to acquire input from the
11 specified attribute of the Tests class of the tests module.
12
13 Using a single hyphen for the output filename will send
14 the generated C source to stdout.\
15 """
16
17 import logging
18 import os
19 import sys
20
21 from optparse import OptionParser
22
23 from eightebed import tests, context, rooibos
24 from eightebed.drivers import parse_and_gen, compile_and_run, cmdline
25
26
27 logger = logging.getLogger("main")
28
29
30 def main(argv):
31 optparser = OptionParser(__doc__)
32 optparser.add_option("-a", "--dump-ast",
33 action="store_true", dest="dump_ast", default=False,
34 help="dump AST after source is parsed")
35 optparser.add_option("-c", "--compile",
36 action="store_true", dest="compile", default=False,
37 help="compile generated C code")
38 optparser.add_option("-e", "--c-compiler", metavar='EXECUTABLE',
39 dest="compiler", default="gcc",
40 help="specify program to use for compiling C "
41 "(default: %default)")
42 optparser.add_option("-f", "--pointer-format", metavar='FORMAT',
43 dest="pointer_format", default="$%08lx",
44 help="printf format to use for pointers in "
45 "--trace-marking (default: %default)")
46 optparser.add_option("-i", "--interactive",
47 action="store_true", dest="interactive", default=False,
48 help="enter interactive mode")
49 optparser.add_option("-m", "--trace-marking",
50 action="store_true", dest="trace_marking", default=False,
51 help="trace marking actions in generated C source")
52 optparser.add_option("-p", "--pedigree",
53 dest="pedigree", default=__file__,
54 help="entity to list as creator of generated C "
55 "source (default: %default)")
56 optparser.add_option("-r", "--run",
57 action="store_true", dest="run", default=False,
58 help="run compiled program (implies --compile)")
59 optparser.add_option("-t", "--test",
60 action="store_true", dest="test", default=False,
61 help="run test cases and exit")
62 optparser.add_option("-u", "--clean",
63 action="store_true", dest="clean", default=False,
64 help="delete generated C source and executable")
65 optparser.add_option("-v", "--verbose",
66 action="store_true", dest="verbose", default=False,
67 help="produce extra status output")
68 (options, args) = optparser.parse_args(argv[1:])
69 if options.verbose:
70 logging.basicConfig(level=logging.INFO)
71 if options.run:
72 options.compile = True
73 if options.test:
74 import doctest
75 doctest.testmod(rooibos)
76 doctest.testmod(context)
77 doctest.testmod(tests)
78 sys.exit(0)
79 if options.interactive:
80 cmdline(options)
81 sys.exit(0)
82 try:
83 infilename = args[0]
84 outfilename = args[1]
85 except IndexError:
86 print "Usage:", __doc__, "\n"
87 print "Run with the -h option to see a list of all options."
88 sys.exit(1)
89 parse_and_gen(options, infilename, outfilename, tests=tests.Tests)
90 if options.compile:
91 result = compile_and_run(outfilename, options)
92 sys.stdout.write(result)
93
94
95 if __name__ == "__main__":
96 main(sys.argv)
(New empty file)
0 # -*- coding: utf-8 -*-
1
2 """
3 Contexts. Can be used for type checking and other static analysis.
4 """
5
6 notset = object()
7 isset = object()
8 class Context(dict):
9 """
10 >>> d = Context({ 'a': 2, 'b': 3 })
11 >>> e = Context({ 'c': 4 }, parent=d)
12 >>> print e.lookup('c')
13 4
14 >>> print e.lookup('b')
15 3
16 >>> print e.lookup('e', None)
17 None
18 >>> print e.lookup('e')
19 Traceback (most recent call last):
20 ...
21 KeyError: 'e'
22 >>> d.declare('d', 7)
23 >>> print e.lookup('d')
24 7
25 >>> d.declare('b', 4)
26 Traceback (most recent call last):
27 ...
28 KeyError: 'b already declared'
29 >>> e.declare('b', 4)
30 Traceback (most recent call last):
31 ...
32 KeyError: 'b already declared'
33 >>> e.empty()
34 >>> print e.lookup('c', None)
35 None
36 >>> print d.lookup('a', None)
37 None
38
39 """
40
41 def __init__(self, initial={}, parent=None):
42 dict.__init__(self, initial)
43 self.parent = parent
44
45 def lookup(self, name, default=notset):
46 if name in self:
47 return self[name]
48 if self.parent is None:
49 if default is notset:
50 raise KeyError, name
51 return default
52 return self.parent.lookup(name, default=default)
53
54 def declare(self, name, value):
55 if self.lookup(name, default=isset) is not isset:
56 raise KeyError, "%s already declared" % name
57 self[name] = value
58
59 def empty(self):
60 self.clear()
61 if self.parent is not None:
62 self.parent.empty()
63
64 def __repr__(self):
65 return "Context(%s, parent=%s)" % (
66 dict.__repr__(self), repr(self.parent)
67 )
68
69
70 if __name__ == "__main__":
71 import doctest
72 doctest.testmod()
0 #!/usr/bin/python
1 # -*- coding: utf-8 -*-
2
3 """
4 drivers.py: Drive the Eightebed compiling/C compiling/running/etc processes.
5 """
6
7 import logging
8 import os
9 import sys
10
11 from pprint import pprint
12 from subprocess import Popen, PIPE
13
14 from eightebed.parser import parse
15 from eightebed.context import Context
16
17
18 logger = logging.getLogger("main")
19
20
21 def parse_and_check(program_text, options=None):
22 ast = parse(program_text)
23 if options is not None and options.dump_ast:
24 pprint(ast)
25 ast.typecheck(Context(), Context())
26 ast.vanalyze(Context())
27 return ast
28
29
30 def parse_and_gen(options, infilename, outfilename, tests=None):
31 if infilename.startswith('@') and tests is not None:
32 program_text = getattr(tests, infilename[1:])
33 else:
34 infile = open(infilename, "r")
35 program_text = infile.read()
36 infile.close()
37 logger.info("Parsing...")
38 ast = parse_and_check(program_text, options=options)
39 if outfilename == '-':
40 outfile = sys.stdout
41 else:
42 outfile = open(outfilename, "w")
43 logger.info("Generating...")
44 ast.emit(outfile, options)
45 outfile.close()
46
47
48 def compile_and_run(filename, options):
49 logger.info("Compiling...")
50 output = Popen([options.compiler, filename], stdout=PIPE).communicate()[0]
51 if options.verbose:
52 sys.stdout.write(output)
53 if output != '':
54 raise RuntimeError, "Compilation failed!"
55 if options.run:
56 logger.info("Running...")
57 output = Popen(["./a.out"], stdout=PIPE).communicate()[0]
58 if options.clean:
59 os.remove(filename)
60 os.remove("./a.out")
61 return output
62
63
64 def load_and_go(ast, options=None):
65 class LoadAndGoOptions(object):
66 verbose=False
67 run=True
68 clean=True
69 compiler="gcc"
70 pedigree=__file__ + ":load_and_go"
71 trace_marking=False
72 pointer_format="$%08lx"
73 options = options or LoadAndGoOptions()
74 file = open("tmp.c", "w")
75 ast.emit(file, options)
76 file.close()
77 sys.stdout.write(compile_and_run("tmp.c", options))
78
79
80 def cmdline(options):
81 cmd = ""
82 print "Eightebed interactive! Type 'quit' to quit."
83 options.run = True
84 options.clean = True
85 while True:
86 sys.stdout.write("> ");
87 cmd = sys.stdin.readline().strip()
88 if cmd == "quit":
89 break
90 try:
91 ast = parse_and_check(cmd, options=options)
92 load_and_go(ast, options)
93 except Exception, e:
94 print "Exception!", repr(e)
0 # -*- coding: utf-8 -*-
1
2 """
3 Parser for the Eightebed programming language.
4 """
5
6 import logging
7 import re
8
9 from .rooibos import (Stream, RegLexer,
10 Terminal, NonTerminal,
11 Alternation, Sequence, Asteration, Optional,
12 Grammar)
13 from .context import Context
14
15
16 logger = logging.getLogger("parser")
17
18
19 class TypeError(RuntimeError):
20 pass
21
22
23 class Eightebed(object):
24 def __init__(self, data):
25 self.typedecls = data[0]
26 self.vardecls = data[1]
27 self.block = data[2]
28 def __repr__(self):
29 return "%s(%s, %s, %s)" % (self.__class__.__name__, repr(self.typedecls), repr(self.vardecls), repr(self.block))
30 def typecheck(self, types, vars):
31 for typedecl in self.typedecls:
32 typedecl.typecheck(types, vars)
33 for vardecl in self.vardecls:
34 vardecl.typecheck(types, vars)
35 self.block.typecheck(types, vars)
36 def vanalyze(self, context):
37 self.block.vanalyze(context)
38 def emit(self, stream, options):
39 stream.write("""\
40 /* Achtung! This Source was Automatically Generated by %s! */
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <assert.h>
45
46 """ % options.pedigree)
47 if options.trace_marking:
48 stream.write("#define TRACE_MARKING 1\n")
49 stream.write("""\
50 typedef struct _ptr {
51 void *p;
52 int valid;
53 } _ptr;
54
55 static void _8ebed_invalidate(_ptr *ptr) {
56 ptr->valid = 0;
57 }
58
59 static int _8ebed_valid(_ptr ptr) {
60 return ptr.valid;
61 }
62
63 static int _8ebed_is_alias(_ptr a, _ptr b) {
64 return a.p == b.p;
65 }
66
67 static _ptr _8ebed_malloc(size_t size) {
68 _ptr ptr;
69 ptr.p = malloc(size);
70 ptr.valid = (ptr.p != NULL);
71 if (ptr.p != NULL) {
72 memset(ptr.p, 0, size);
73 }
74 return ptr;
75 }
76
77 static void _mark__root(_ptr);
78 static void _8ebed_free(_ptr *ptr) {
79 if (!_8ebed_valid(*ptr)) return;
80 _mark__root(*ptr);
81 free(ptr->p);
82 _8ebed_invalidate(ptr);
83 }
84
85 """)
86 for typedecl in self.typedecls:
87 typedecl.emit(stream, options)
88 for vardecl in self.vardecls:
89 vardecl.emit(stream)
90 stream.write(r"""\
91 static void _mark__root(_ptr outcast) {
92 #ifdef TRACE_MARKING
93 fprintf(stderr, "-> BEGIN marking %s @root\n", (long)outcast.p);
94 #endif
95 """ % options.pointer_format)
96 for vardecl in self.vardecls:
97 vardecl.emit_marker(stream)
98 stream.write(r"""
99 #ifdef TRACE_MARKING
100 fprintf(stderr, "-> END marking %s @root\n", (long)outcast.p);
101 #endif
102 }
103
104 int main(int argc, char **argv) {
105 """ % options.pointer_format)
106 self.block.emit(stream)
107 stream.write("}\n")
108
109
110 class Block(object):
111 def __init__(self, data):
112 self.stmts = data[1]
113 def __repr__(self):
114 return "%s(%s)" % (self.__class__.__name__, repr(self.stmts))
115 def typecheck(self, types, vars):
116 block_types = Context(parent=types)
117 block_vars = Context(parent=vars)
118 for stmt in self.stmts:
119 stmt.typecheck(block_types, block_vars)
120 def vanalyze(self, context):
121 for stmt in self.stmts:
122 stmt.vanalyze(context)
123 def emit(self, stream):
124 for stmt in self.stmts:
125 stmt.emit(stream)
126
127 class TypeDecl(object):
128 def __init__(self, data):
129 self.name = data[1]
130 self.type = data[2]
131 def __repr__(self):
132 return "TypeDecl(%s, %s)" % (repr(self.name), repr(self.type))
133 def typecheck(self, types, vars):
134 types.declare(self.name, self.type)
135 self.type.typecheck(types, vars)
136 if not isinstance(self.type, TypeStruct):
137 raise TypeError, "Only structs may be named"
138 return self.type
139 def emit(self, stream, options):
140 if isinstance(self.type, TypeStruct):
141 stream.write("typedef \n")
142 self.type.emit_forward(stream)
143 stream.write(" %s;\n" % self.name)
144 self.type.emit(stream)
145 stream.write(";\n")
146 stream.write("static void mark_%s(_ptr outcast, %s* p) {" % (self.name, self.name))
147 marking_text = options.pointer_format + (" @%s " % self.name) + options.pointer_format
148 stream.write(r"""
149 #ifdef TRACE_MARKING
150 fprintf(stderr, "-> BEGIN marking %s\n", (long)outcast.p, (long)p);
151 #endif
152 """ % marking_text)
153 for member in self.type.members:
154 if isinstance(member.type, TypePtr):
155 stream.write("""
156 if (_8ebed_is_alias(outcast, p->%s)) {
157 _8ebed_invalidate(&p->%s);
158 } else if (_8ebed_valid(p->%s)) {
159 mark_""" % (member.name, member.name, member.name))
160 member.type.points_to().emit(stream)
161 stream.write("(outcast, (")
162 member.type.points_to().emit(stream)
163 stream.write(" *)(p->%s.p));\n }\n" % member.name)
164 stream.write(r"""
165 #ifdef TRACE_MARKING
166 fprintf(stderr, "-> END marking %s\n", (long)outcast.p, (long)p);
167 #endif
168 }
169 """ % marking_text)
170 else:
171 stream.write("typedef \n")
172 self.type.emit(stream)
173 stream.write(" %s;\n" % self.name)
174 stream.write("\n")
175
176 # These classes double as AST components and as type expressions.
177
178 class Type(object):
179 def equiv(self, other):
180 raise NotImplementedError
181 def points_to(self):
182 return None
183 def resolve(self, types):
184 return self
185 def get_member_type(self, name):
186 return None
187
188 class TypeVoid(Type):
189 def __init__(self, data=None):
190 pass
191 def equiv(self, other):
192 return isinstance(other, TypeVoid)
193 def emit(self, stream):
194 stream.write("void")
195
196 class TypeInt(Type):
197 def __init__(self, data=None):
198 pass
199 def __repr__(self):
200 return "%s()" % (self.__class__.__name__)
201 def typecheck(self, types, vars):
202 return self
203 def equiv(self, other):
204 return isinstance(other, TypeInt)
205 def emit(self, stream):
206 stream.write("int")
207
208 struct_id = 0
209 class TypeStruct(Type):
210 def __init__(self, data):
211 global struct_id
212 self.members = data[2]
213 self.id = struct_id
214 struct_id += 1
215 def __repr__(self):
216 return "%s(%s)" % (self.__class__.__name__, repr(self.members))
217 def typecheck(self, types, vars):
218 for member in self.members:
219 type_ = member.typecheck(types, vars)
220 if isinstance(type_, TypeStruct):
221 raise TypeError, "Structs may not contain other structs"
222 return self
223 def equiv(self, other):
224 return False
225 def emit(self, stream):
226 stream.write("struct s_%s {\n" % self.id)
227 for member in self.members:
228 member.emit(stream)
229 stream.write("}")
230 def emit_forward(self, stream):
231 stream.write("struct s_%s" % self.id)
232 def get_member_type(self, name):
233 for decl in self.members:
234 if decl.name == name:
235 return decl.type
236 return None
237
238 class TypePtr(Type):
239 def __init__(self, data):
240 self.target = data[2]
241 def __repr__(self):
242 return "%s(%s)" % (self.__class__.__name__, repr(self.target))
243 def typecheck(self, types, vars):
244 self.target.typecheck(types, vars)
245 if isinstance(self.target, TypeNamed):
246 return self
247 else:
248 raise TypeError, "Pointer type must point to named type"
249 def equiv(self, other):
250 return isinstance(other, TypePtr) and self.target.equiv(other.target)
251 def points_to(self):
252 return self.target
253 def emit(self, stream):
254 stream.write("/* ")
255 self.target.emit(stream)
256 stream.write("*")
257 stream.write(" */ ")
258 stream.write("_ptr")
259
260 class TypeNamed(Type):
261 def __init__(self, data):
262 self.name = data
263 def __repr__(self):
264 return "%s(%s)" % (self.__class__.__name__, repr(self.name))
265 def typecheck(self, types, vars):
266 return True # can't look self up yet, might not exist yet
267 def equiv(self, other):
268 return isinstance(other, TypeNamed) and self.name == other.name
269 def emit(self, stream):
270 stream.write(self.name)
271 def resolve(self, types):
272 return types.lookup(self.name)
273
274 class Decl(object):
275 def __init__(self, data):
276 self.type = data[0]
277 self.name = data[1]
278 def __repr__(self):
279 return "%s(%s, %s)" % (self.__class__.__name__, repr(self.name), repr(self.type))
280 def typecheck(self, types, vars):
281 self.type.typecheck(types, vars)
282 return self.type
283 def emit(self, stream):
284 self.type.emit(stream)
285 stream.write(" %s;\n" % self.name)
286
287 class VarDecl(object):
288 def __init__(self, data):
289 decl = data[1]
290 self.type = decl.type
291 self.name = decl.name
292 def __repr__(self):
293 return "%s(%s, %s)" % (self.__class__.__name__, repr(self.name), repr(self.type))
294 def typecheck(self, types, vars):
295 self.type.typecheck(types, vars)
296 vars.declare(self.name, self.type)
297 return self.type
298 def emit(self, stream):
299 self.type.emit(stream)
300 stream.write(" %s;\n" % self.name)
301 def emit_marker(self, stream):
302 if isinstance(self.type, TypePtr):
303 stream.write("""
304 if (_8ebed_is_alias(outcast, %s)) {
305 _8ebed_invalidate(&%s);
306 } else if (_8ebed_valid(%s)) {
307 mark_""" % (self.name, self.name, self.name))
308 self.type.points_to().emit(stream)
309 stream.write("(outcast, (")
310 self.type.points_to().emit(stream)
311 stream.write(" *)%s.p);\n }\n" % self.name)
312
313 class WhileStmt(object):
314 def __init__(self, data):
315 self.expr = data[1]
316 self.block = data[2]
317 def __repr__(self):
318 return "%s(%s, %s)" % (self.__class__.__name__, repr(self.expr), repr(self.block))
319 def typecheck(self, types, vars):
320 self.expr.typecheck(types, vars)
321 self.block.typecheck(types, vars)
322 return TypeVoid()
323 def vanalyze(self, context):
324 self.expr.vanalyze(context)
325 self.block.vanalyze(context)
326 def emit(self, stream):
327 stream.write("while(")
328 self.expr.emit(stream)
329 stream.write(") {\n")
330 self.block.emit(stream)
331 stream.write("}\n")
332
333 class IfStmt(object):
334 def __init__(self, data):
335 self.expr = data[1]
336 self.then = data[2]
337 elsepart = data[3]
338 if elsepart:
339 self.else_ = elsepart[0][1]
340 else:
341 self.else_ = Block(['{', [], '}'])
342 def __repr__(self):
343 return "%s(%s, %s, %s)" % (self.__class__.__name__, repr(self.expr), repr(self.then), repr(self.else_))
344 def typecheck(self, types, vars):
345 self.expr.typecheck(types, vars)
346 self.then.typecheck(types, vars)
347 self.else_.typecheck(types, vars)
348 return TypeVoid()
349 def vanalyze(self, context):
350 self.expr.vanalyze(context)
351 # If the test expr is exactly "valid x", put x into context,
352 # asserting that it is valid hereafter in this block
353 subcontext = Context(parent=context)
354 if isinstance(self.expr, ValidExpr):
355 if isinstance(self.expr.expr, VarRef):
356 subcontext[self.expr.expr.name] = True
357 self.then.vanalyze(subcontext)
358 self.else_.vanalyze(context)
359 def emit(self, stream):
360 stream.write("if(")
361 self.expr.emit(stream)
362 stream.write(") {\n")
363 self.then.emit(stream)
364 stream.write("} else {\n")
365 self.else_.emit(stream)
366 stream.write("}\n")
367
368 class FreeStmt(object):
369 def __init__(self, data):
370 self.ref = data[1]
371 def __repr__(self):
372 return "%s(%s)" % (self.__class__.__name__, repr(self.ref))
373 def typecheck(self, types, vars):
374 ref_type = self.ref.typecheck(types, vars)
375 if ref_type.points_to() is None:
376 raise TypeError, "%s is not a pointer type" % repr(ref_type)
377 return TypeVoid()
378 def vanalyze(self, context):
379 self.ref.vanalyze(context)
380 # End safe area -- remove all assertions of validity hereafter.
381 context.empty()
382 def emit(self, stream):
383 stream.write("_8ebed_free(&")
384 self.ref.emit(stream)
385 stream.write(");\n")
386
387 class PrintStmt(object):
388 def __init__(self, data):
389 self.expr = data[1]
390 def __repr__(self):
391 return "%s(%s)" % (self.__class__.__name__, repr(self.expr))
392 def typecheck(self, types, vars):
393 expr_type = self.expr.typecheck(types, vars)
394 if not expr_type.equiv(TypeInt()):
395 raise TypeError, "%s is not an int" % repr(expr_type)
396 return TypeVoid()
397 def vanalyze(self, context):
398 self.expr.vanalyze(context)
399 def emit(self, stream):
400 stream.write("printf(\"%d \", ")
401 self.expr.emit(stream)
402 stream.write(");\n")
403
404 class AssignStmt(object):
405 def __init__(self, data):
406 self.ref = data[0]
407 self.expr = data[2]
408 def __repr__(self):
409 return "%s(%s, %s)" % (self.__class__.__name__, repr(self.ref), repr(self.expr))
410 def typecheck(self, types, vars):
411 tlhs = self.ref.typecheck(types, vars)
412 trhs = self.expr.typecheck(types, vars)
413 if trhs.equiv(tlhs):
414 return TypeVoid()
415 else:
416 raise TypeError, "%s (%s) not equivalent to %s (%s) for vars %s" % (repr(tlhs), repr(self.ref), repr(trhs), repr(self.expr), vars)
417 def vanalyze(self, context):
418 self.ref.vanalyze(context)
419 self.expr.vanalyze(context)
420 # End safe area -- remove all assertions of validity hereafter.
421 context.empty()
422 def emit(self, stream):
423 self.ref.emit(stream)
424 stream.write(" = ")
425 self.expr.emit(stream)
426 stream.write(";\n")
427
428 class BinOpExpr(object):
429 map = {
430 '+' : '+',
431 '-' : '-',
432 '*' : '*',
433 '/' : '/',
434 '=' : '==',
435 '>' : '>',
436 '&' : '&&',
437 '|' : '||',
438 }
439 def __init__(self, data):
440 self.lhs = data[1]
441 self.op = data[2]
442 self.rhs = data[3]
443 def __repr__(self):
444 return "%s(%s, %s, %s)" % (self.__class__.__name__, repr(self.lhs), repr(self.op), repr(self.rhs))
445 def typecheck(self, types, vars):
446 trhs = self.lhs.typecheck(types, vars)
447 tlhs = self.rhs.typecheck(types, vars)
448 if not tlhs.equiv(TypeInt()):
449 raise TypeError, "lhs %s is not an int" % repr(tlhs)
450 if not trhs.equiv(TypeInt()):
451 raise TypeError, "rhs %s is not an int" % repr(trhs)
452 return TypeInt()
453 def vanalyze(self, context):
454 self.lhs.vanalyze(context)
455 self.rhs.vanalyze(context)
456 def emit(self, stream):
457 stream.write("(")
458 self.lhs.emit(stream)
459 stream.write(" %s " % self.map[self.op])
460 self.rhs.emit(stream)
461 stream.write(")")
462
463 class MallocExpr(object):
464 def __init__(self, data):
465 self.type = data[1]
466 def __repr__(self):
467 return "%s(%s)" % (self.__class__.__name__, repr(self.type))
468 def typecheck(self, types, vars):
469 return TypePtr(['', '', self.type])
470 def vanalyze(self, context):
471 pass
472 def emit(self, stream):
473 stream.write("_8ebed_malloc(sizeof(")
474 self.type.emit(stream)
475 stream.write("))")
476
477 class ValidExpr(object):
478 def __init__(self, data):
479 self.expr = data[1]
480 def __repr__(self):
481 return "%s(%s)" % (self.__class__.__name__, repr(self.expr))
482 def typecheck(self, types, vars):
483 expr_type = self.expr.typecheck(types, vars)
484 if expr_type.points_to() is None:
485 raise TypeError, "%s is not a pointer type" % repr(expr_type)
486 return TypeInt()
487 def vanalyze(self, context):
488 self.expr.vanalyze(context)
489 def emit(self, stream):
490 stream.write("_8ebed_valid(")
491 self.expr.emit(stream)
492 stream.write(")")
493
494 class DottedRef(object):
495 def __init__(self, data):
496 self.source = data[1]
497 self.member_name = data[4]
498 def __repr__(self):
499 return "%s(%s, %s)" % (self.__class__.__name__, repr(self.source), repr(self.member_name))
500 def typecheck(self, types, vars):
501 source_type = self.source.typecheck(types, vars)
502 source_type = source_type.resolve(types)
503 member_type = source_type.get_member_type(self.member_name)
504 if member_type is None:
505 raise TypeError, "%s does not have member %s" % (repr(source_type), self.member_name)
506 return member_type
507 def vanalyze(self, context, deref=False):
508 self.source.vanalyze(context, deref=deref)
509 def emit(self, stream):
510 self.source.emit(stream)
511 stream.write(".%s" % self.member_name)
512
513 class DeRef(object):
514 def __init__(self, data):
515 self.source = data[1]
516 self._dest_type = None
517 def __repr__(self):
518 return "%s(%s)" % (self.__class__.__name__, repr(self.source))
519 def typecheck(self, types, vars):
520 source_type = self.source.typecheck(types, vars)
521 dest_type = source_type.points_to()
522 if dest_type is None:
523 raise TypeError, "%s is not a pointer type" % repr(source_type)
524 self._dest_type = dest_type
525 return dest_type
526 def vanalyze(self, context, deref=False):
527 self.source.vanalyze(context, deref=True)
528 def emit(self, stream):
529 stream.write("(*(")
530 self._dest_type.emit(stream)
531 stream.write(" *)")
532 self.source.emit(stream)
533 stream.write(".p)")
534
535 class VarRef(object):
536 def __init__(self, data):
537 self.name = data
538 def __repr__(self):
539 return "%s(%s)" % (self.__class__.__name__, repr(self.name))
540 def typecheck(self, types, vars):
541 #if self.name == 'i':
542 # raise NotImplementedError, vars.lookup(self.name)
543 return vars.lookup(self.name)
544 def vanalyze(self, context, deref=False):
545 if deref:
546 if not context.lookup(self.name, default=False):
547 raise TypeError, "Attempt to dereference %s in non-safe context" % self.name
548 def emit(self, stream):
549 stream.write(self.name)
550
551 class IntConst(object):
552 def __init__(self, data):
553 self.value = int(data)
554 def __repr__(self):
555 return "%s(%s)" % (self.__class__.__name__, repr(self.value))
556 def typecheck(self, types, vars):
557 return TypeInt()
558 def vanalyze(self, context, deref=False):
559 pass
560 def emit(self, stream):
561 stream.write(str(self.value))
562
563
564 g = Grammar()
565 g['Eightebed'] = Sequence(Asteration(NonTerminal('TypeDecl')),
566 Asteration(NonTerminal('VarDecl')),
567 NonTerminal('Block')).construct(Eightebed)
568 g['Block'] = Sequence(Terminal('{'),
569 Asteration(NonTerminal('Stmt')),
570 Terminal('}')).construct(Block)
571 g['TypeDecl'] = Sequence(Terminal('type'),
572 NonTerminal('TypeName'),
573 NonTerminal('Type'),
574 Terminal(';')).construct(TypeDecl)
575 g['Type'] = Alternation(Terminal('int').construct(TypeInt),
576 Sequence(Terminal('struct'), Terminal('{'), Asteration(NonTerminal('Decl')), Terminal('}')).construct(TypeStruct),
577 Sequence(Terminal('ptr'), Terminal('to'), NonTerminal('Type')).construct(TypePtr),
578 NonTerminal('TypeName').construct(TypeNamed))
579 g['Decl'] = Sequence(NonTerminal('Type'),
580 NonTerminal('VarName'),
581 Terminal(';')).construct(Decl)
582 g['VarDecl'] = Sequence(Terminal('var'), NonTerminal('Decl')).construct(VarDecl)
583 g['Stmt'] = Alternation(Sequence(Terminal('while'), NonTerminal('Expr'), NonTerminal('Block')).construct(WhileStmt),
584 Sequence(Terminal('if'), NonTerminal('Expr'), NonTerminal('Block'),
585 Optional(Sequence(Terminal('else'), NonTerminal('Block')))).construct(IfStmt),
586 Sequence(Terminal('free'), NonTerminal('Ref'), Terminal(';')).construct(FreeStmt),
587 Sequence(Terminal('print'), NonTerminal('Expr'), Terminal(';')).construct(PrintStmt),
588 Sequence(NonTerminal('Ref'), Terminal('='), NonTerminal('Expr'), Terminal(';')).construct(AssignStmt))
589 g['Ref'] = Alternation(Sequence(Terminal('['), NonTerminal('Ref'), Terminal(']'), Terminal('.'), NonTerminal('VarName')).construct(DottedRef),
590 Sequence(Terminal('@'), NonTerminal('Ref')).construct(DeRef),
591 NonTerminal('VarName').construct(VarRef))
592 g['Expr'] = Alternation(Sequence(Terminal('('), NonTerminal('Expr'), NonTerminal('BinOp'), NonTerminal('Expr'), Terminal(')')).construct(BinOpExpr),
593 Sequence(Terminal('malloc'), NonTerminal('Type')).construct(MallocExpr),
594 Sequence(Terminal('valid'), NonTerminal('Expr')).construct(ValidExpr),
595 NonTerminal('IntLit').construct(IntConst),
596 NonTerminal('Ref'))
597 g['BinOp'] = Alternation(Terminal('+'),Terminal('-'),Terminal('*'),Terminal('/'),Terminal('='),Terminal('>'),Terminal('&'), Terminal('|'))
598
599 g['TypeName'] = Terminal(lambda x: re.match('^[a-zA-Z]\w*$', x))
600 g['VarName'] = Terminal(lambda x: re.match('^[a-zA-Z]\w*$', x))
601 g['IntLit'] = Terminal(lambda x: re.match('^\d+$', x))
602
603
604 def parse(text):
605 r = RegLexer()
606 r.ignore(r'\s+')
607 r.register(r'(\d+)')
608 r.register(r'(\(|\)|\[|\]|\;|\{|\}|\=|\+|\-|\*|\/|\,|\@|\.|\>|\&|\|)')
609 r.register(r'([a-zA-Z]\w*)')
610 s = Stream(r(text))
611 return g.parse('Eightebed', s)
612
613 def parse_file(filename):
614 f = open(filename, "r")
615 contents = f.read()
616 f.close()
617 return parse(contents)
618
619
620 if __name__ == "__main__":
621 import doctest
622 doctest.testmod()
0 # -*- coding: utf-8 -*-
1
2 """
3 Rooibos, a parser combinator module for Python.
4 """
5
6 import re
7 import types
8
9 class Stream(object):
10 """
11 A Stream is a kind of wrapper around an iterator which allows
12 a limited form of look-ahead into the iterator's results.
13
14 >>> def i():
15 ... for x in [6,1,7]:
16 ... yield x
17 >>> s = Stream(i())
18 >>> s.peek()
19 6
20 >>> s.peek()
21 6
22 >>> s.advance()
23 >>> s.peek()
24 1
25 >>> s.advance()
26 >>> s.peek()
27 7
28 >>> s.advance()
29 >>> s.peek() is None
30 True
31 >>> s = Stream([1,2,3])
32 >>> s.peek()
33 1
34
35 """
36
37 def __init__(self, generator):
38 self.buffer = []
39 if isinstance(generator, types.GeneratorType):
40 self.generator = generator
41 else:
42 def g():
43 for x in generator:
44 yield x
45 self.generator = g()
46
47 def peek(self):
48 if not self.buffer:
49 try:
50 self.buffer.append(self.generator.next())
51 except StopIteration:
52 return None
53 return self.buffer[0]
54
55 def advance(self):
56 if not self.buffer:
57 self.buffer.extend(self.generator.next())
58 self.buffer.pop()
59
60
61 class RegLexer(object):
62 """
63 An iterator which, given a string, returns a generator which returns sucessive
64 prefixes of the string based on supplied regexes.
65
66 >>> t = RegLexer()
67 >>> t.register(r'(\d+)', meta='integer')
68 >>> t.register(r'(\(|\))')
69 >>> v = []
70 >>> for n in t("12(34)"): v.append(n)
71 >>> v
72 [('integer', '12'), '(', ('integer', '34'), ')']
73 >>> v = []
74 >>> for n in t("12 ( 34 )"): v.append(n)
75 >>> v
76 [('integer', '12')]
77 >>> t.ignore(r'\s+')
78 >>> v = []
79 >>> for n in t("12 ( 34 )"): v.append(n)
80 >>> v
81 [('integer', '12'), '(', ('integer', '34'), ')']
82
83 """
84
85 def __init__(self):
86 self.text = ""
87 self.patterns = []
88 self.ignoring = []
89
90 def __call__(self, text):
91 self.text = text
92 has_match = True
93 while has_match:
94 has_match = False
95
96 has_ignore = True
97 while has_ignore:
98 has_ignore = False
99 for pattern in self.ignoring:
100 result = re.match(pattern, self.text)
101 if result:
102 self.text = self.text[result.end():]
103 has_ignore = True
104 break
105
106 for (pattern, meta) in self.patterns:
107 result = re.match(pattern, self.text)
108 if result:
109 self.text = self.text[result.end():]
110 has_match = True
111 break
112 if has_match:
113 if meta is not None:
114 yield meta, result.group()
115 else:
116 yield result.group()
117
118 def register(self, pattern, meta=None):
119 self.patterns.append((pattern, meta))
120
121 def ignore(self, pattern):
122 self.ignoring.append(pattern)
123
124
125 class PredicateSet(object):
126 """
127 >>> s = PredicateSet('a','b','c')
128 >>> 'a' in s
129 True
130 >>> 'z' in s
131 False
132 >>> s.add('z')
133 >>> 'z' in s
134 True
135 >>> s.add(lambda x: x > 10)
136 >>> 9 in s
137 False
138 >>> 12 in s
139 True
140 >>> 291 in s
141 True
142 >>> s.add(lambda x: x < 3)
143 >>> 9 in s
144 False
145 >>> 12 in s
146 True
147 >>> 1 in s
148 True
149
150 """
151 def __init__(self, *contents):
152 self._set = set()
153 self.update(contents)
154
155 def add(self, entity):
156 if callable(entity):
157 self._set.add(entity)
158 else:
159 self._set.add(lambda x: x == entity)
160
161 def update(self, iterable):
162 for x in iterable:
163 self.add(x)
164
165 def __contains__(self, other):
166 for x in self._set:
167 if x(other):
168 return True
169 return False
170
171 def __iter__(self):
172 for x in self._set:
173 yield x
174
175 def __repr__(self):
176 return "PredicateSet(%s)" % repr(self._set)
177
178
179 ### Productions ###
180
181
182 class Production(object):
183 constructor = None
184
185 def parse(self, stream, grammar=None):
186 """Subclasses should override this."""
187 raise NotImplementedError
188
189 def construct(self, constructor):
190 self.constructor = constructor
191 return self
192
193 def capture(self, obj, grammar=None):
194 if self.constructor is None:
195 return obj
196 else:
197 return self.constructor(obj)
198
199 def firsts(self, grammar=None):
200 """Subclasses should override this."""
201 return PredicateSet()
202
203 def is_nullable(self, grammar=None):
204 """Subclasses should override this."""
205 return False
206
207
208 class Terminal(Production):
209 """
210 >>> t = Terminal('cat')
211 >>> 'cat' in t.firsts()
212 True
213 >>> s = Stream(['cat'])
214 >>> t.parse(s)
215 'cat'
216 >>> s = Stream(['cat','a','log'])
217 >>> t.parse(s)
218 'cat'
219 >>> s.peek()
220 'a'
221 >>> s = Stream(['dog'])
222 >>> t.parse(s) is None
223 True
224 """
225
226 def __init__(self, entity):
227 self.entity = entity
228
229 def check_entity(self, against):
230 if callable(self.entity):
231 return self.entity(against)
232 else:
233 return against == self.entity
234
235 def parse(self, stream, grammar=None):
236 if self.check_entity(stream.peek()):
237 result = self.capture(stream.peek(), grammar=grammar)
238 stream.advance()
239 return result
240 return None
241
242 def firsts(self, grammar=None):
243 return PredicateSet(self.entity)
244
245
246 class Alternation(Production):
247 """
248 >>> a = Alternation(Terminal('cat'), Terminal('dog'))
249 >>> 'cat' in a.firsts()
250 True
251 >>> 'dog' in a.firsts()
252 True
253 >>> s = Stream(['cat'])
254 >>> a.parse(s)
255 'cat'
256 >>> s = Stream(['dog'])
257 >>> a.parse(s)
258 'dog'
259 >>> s = Stream(['horse'])
260 >>> a.parse(s) is None
261 True
262 """
263
264 def __init__(self, *alternatives):
265 self.alternatives = alternatives
266
267 def parse(self, stream, grammar=None):
268 for alternative in self.alternatives:
269 if stream.peek() in alternative.firsts(grammar=grammar):
270 result = alternative.parse(stream, grammar=grammar)
271 return self.capture(result, grammar=grammar)
272 return None
273
274 def firsts(self, grammar=None):
275 f = PredicateSet()
276 for alternative in self.alternatives:
277 f.update(alternative.firsts(grammar=grammar))
278 return f
279
280 def is_nullable(self, grammar=None):
281 for alternative in self.alternatives:
282 if alternative.is_nullable(grammar=grammar):
283 return True
284 return False
285
286
287 class Sequence(Production):
288 """
289 >>> p = Sequence(Terminal('cat'), Terminal('dog'))
290 >>> 'cat' in p.firsts()
291 True
292 >>> 'dog' in p.firsts()
293 False
294 >>> p.parse(Stream(['cat','food'])) is None
295 True
296 >>> p.parse(Stream(['dog','food'])) is None
297 True
298 >>> p.parse(Stream(['cat','dog']))
299 ['cat', 'dog']
300 """
301
302 def __init__(self, *sequence):
303 self.sequence = sequence
304
305 def parse(self, stream, grammar=None):
306 results = []
307 for component in self.sequence:
308 result = component.parse(stream, grammar=grammar)
309 if result is None:
310 results = None
311 break
312 results.append(result)
313 if results is not None:
314 return self.capture(results, grammar=grammar)
315 return None
316
317 def firsts(self, grammar=None):
318 f = PredicateSet()
319 for component in self.sequence:
320 f.update(component.firsts(grammar=grammar))
321 if not component.is_nullable(grammar=grammar):
322 break
323 return f
324
325 def is_nullable(self, grammar=None):
326 for component in self.sequence:
327 if not component.is_nullable(grammar=grammar):
328 return False
329 return True
330
331
332 class Asteration(Production):
333 """
334 >>> p = Asteration(Terminal('cat'))
335 >>> 'cat' in p.firsts()
336 True
337 >>> s = Stream(['cat'])
338 >>> p.parse(s)
339 ['cat']
340 >>> s.peek() is None
341 True
342 >>> s = Stream(['cat','cat','cat'])
343 >>> p.parse(s)
344 ['cat', 'cat', 'cat']
345 >>> s.peek() is None
346 True
347 >>> s = Stream(['dog'])
348 >>> p.parse(s)
349 []
350 >>> s.peek()
351 'dog'
352
353 >>> p = Sequence(Asteration(Terminal('cat')), Terminal('dog'))
354 >>> p.parse(Stream(['cat'])) is None
355 True
356 >>> p.parse(Stream(['cat','dog']))
357 [['cat'], 'dog']
358 >>> p.parse(Stream(['cat','cat','cat','dog']))
359 [['cat', 'cat', 'cat'], 'dog']
360 >>> p.parse(Stream(['dog']))
361 [[], 'dog']
362 """
363
364 def __init__(self, production):
365 self.production = production
366
367 def parse(self, stream, grammar=None):
368 results = []
369 while stream.peek() in self.production.firsts(grammar=grammar):
370 result = self.production.parse(stream, grammar=grammar)
371 if result is None:
372 break
373 results.append(result)
374 return self.capture(results, grammar=grammar)
375
376 def firsts(self, grammar=None):
377 return self.production.firsts(grammar=grammar)
378
379 def is_nullable(self, grammar=None):
380 return True
381
382
383 class Optional(Production):
384 """
385 >>> p = Optional(Terminal('cat'))
386 >>> 'cat' in p.firsts()
387 True
388 >>> s = Stream(['cat'])
389 >>> p.parse(s)
390 ['cat']
391 >>> s.peek() is None
392 True
393 """
394
395 def __init__(self, production):
396 self.production = production
397
398 def parse(self, stream, grammar=None):
399 results = []
400 if stream.peek() in self.production.firsts(grammar=grammar):
401 result = self.production.parse(stream, grammar=grammar)
402 if result is not None:
403 results.append(result)
404 return self.capture(results, grammar=grammar)
405
406 def firsts(self, grammar=None):
407 return self.production.firsts(grammar=grammar)
408
409 def is_nullable(self, grammar=None):
410 return True
411
412
413 class NonTerminal(Production):
414 def __init__(self, name):
415 self.name = name
416
417 def _production(self, grammar):
418 if not grammar:
419 raise TypeError, "need grammar to use NonTerminal"
420 return grammar[self.name]
421
422 def parse(self, stream, grammar=None):
423 result = self._production(grammar).parse(stream, grammar=grammar)
424 return self.capture(result, grammar=grammar)
425
426 def firsts(self, grammar=None):
427 return self._production(grammar).firsts(grammar=grammar)
428
429 def is_nullable(self, grammar=None):
430 return self._production(grammar).is_nullable(grammar=grammar)
431
432
433 class Grammar(object):
434 """Container for a set of named productions.
435
436 >>> g = Grammar()
437 >>> g['Expr'] = Sequence(Terminal('('),Asteration(Terminal('*')),Terminal(')'))
438 >>> g.parse('Expr', Stream(['(','*','*',')']))
439 ['(', ['*', '*'], ')']
440 >>> g['Expr'] = Sequence(Terminal('('),Asteration(NonTerminal('Expr')),Terminal(')'))
441 >>> g.parse('Expr', Stream(['(',')']))
442 ['(', [], ')']
443 >>> s = Stream(['(','(',')',')'])
444 >>> g.parse('Expr', s)
445 ['(', [['(', [], ')']], ')']
446 >>> s.peek() is None
447 True
448 >>> g.parse('Expr', Stream(['(','(',')'])) is None
449 True
450 """
451
452 trace = False
453
454 def __init__(self, parent=None):
455 self.productions = {}
456 self.parent = parent
457
458 def __getitem__(self, key):
459 if self.trace:
460 print "Reading production ", key
461 if key in self.productions:
462 return self.productions[key]
463 elif self.parent:
464 return self.parent[key]
465 else:
466 raise KeyError, "No production '%s' in grammar, and no parent grammar" % key
467
468 def __setitem__(self, key, value):
469 self.productions[key] = value
470
471 def parse(self, name, stream):
472 return self[name].parse(stream, grammar=self)
473
474
475 if __name__ == "__main__":
476 import doctest
477 doctest.testmod()
0 #!/usr/bin/python
1 # -*- coding: utf-8 -*-
2
3 """
4 Test suite for (Python implementations of) the Eightebed programming language.
5 """
6
7 import doctest
8
9 from .parser import parse, TypeError
10 from .context import Context
11 from .drivers import parse_and_check, load_and_go
12
13
14 class Tests(object):
15 """Class containing test cases for Eightebed.
16
17 >>> p = parse(Tests.simple_ok)
18 >>> print p
19 Eightebed([], [VarDecl('jim', TypeInt())], Block([AssignStmt(VarRef('jim'), IntConst(4))]))
20 >>> p = parse_and_check(Tests.simple_ok)
21
22 >>> parse_and_check(Tests.double_declaration)
23 Traceback (most recent call last):
24 ...
25 KeyError: 'jim already declared'
26
27 >>> parse_and_check(Tests.ptr_to_ptr)
28 Traceback (most recent call last):
29 ...
30 TypeError: Pointer type must point to named type
31
32 >>> parse_and_check(Tests.ptr_to_int)
33 Traceback (most recent call last):
34 ...
35 TypeError: Pointer type must point to named type
36
37 >>> parse_and_check(Tests.struct_within_struct)
38 Traceback (most recent call last):
39 ...
40 TypeError: Structs may not contain other structs
41
42 >>> parse_and_check(Tests.named_int)
43 Traceback (most recent call last):
44 ...
45 TypeError: Only structs may be named
46
47 >>> parse_and_check(Tests.dereference_outside_conditional)
48 Traceback (most recent call last):
49 ...
50 TypeError: Attempt to dereference jim in non-safe context
51
52 >>> parse_and_check(Tests.dereference_outside_safe_area)
53 Traceback (most recent call last):
54 ...
55 TypeError: Attempt to dereference jim in non-safe context
56
57 >>> p = parse_and_check(Tests.dereference_within_nested_safe_area)
58 >>> p is None
59 False
60
61 >>> parse_and_check(Tests.dereference_after_free)
62 Traceback (most recent call last):
63 ...
64 TypeError: Attempt to dereference jim in non-safe context
65
66 Hey! Enough of the static tests. Let's run some actual Eightebed
67 programs and check their output.
68
69 >>> p = parse_and_check(Tests.allocated_values_initialized)
70 >>> load_and_go(p)
71 0
72
73 >>> p = parse_and_check(Tests.simple_arith)
74 >>> load_and_go(p)
75 4
76
77 >>> p = parse_and_check(Tests.loop_1)
78 >>> load_and_go(p)
79 5 4 3 2 1
80
81 >>> p = parse_and_check(Tests.allocating_loop)
82 >>> load_and_go(p)
83
84 >>> p = parse_and_check(Tests.free_invalidates)
85 >>> load_and_go(p)
86 53
87
88 >>> p = parse_and_check(Tests.alias_is_invalidated)
89 >>> load_and_go(p)
90 100 99 98 97 96 95 94 93 92 91 90 89 88
91
92 In principle, this test demonstrates that the memory freed by the
93 free command can be re-used by a subsequent malloc expression. Of
94 course, since nothing is actually forcing the backend C compiler's
95 implementation of malloc() to re-use memory that was previously
96 free()'d, we don't actually test it here.
97
98 >>> p = parse_and_check(Tests.allocate_and_free_loop)
99 >>> load_and_go(p)
100 50
101
102 """
103
104 simple_ok = """\
105 var int jim;
106 {
107 jim = 4;
108 }
109 """
110 simple_arith = """\
111 {
112 if (((3 * 3) = (10 - 1)) & (4 > 3)) {
113 print ((4 + 8) / 3);
114 }
115 }
116 """
117 double_declaration = """\
118 var int jim;
119 var ptr to node jim;
120 {
121 print 3;
122 }
123 """
124 ptr_to_ptr = """\
125 type node struct {
126 int value;
127 ptr to ptr to node next;
128 };
129 var node jim;
130 {
131 print [jim].value;
132 }
133 """
134 ptr_to_int = """\
135 var ptr to int kelly;
136 {
137 if valid kelly { print @kelly; }
138 }
139 """
140 struct_within_struct = """\
141 type kooba struct {
142 int value;
143 struct {
144 int whirlygig;
145 } barnard;
146 };
147 var kooba jim;
148 {
149 print [jim].value;
150 }
151 """
152 named_int = """\
153 type kooba int;
154 var kooba jim;
155 {
156 print jim;
157 }
158 """
159 dereference_outside_conditional = """\
160 type node struct {
161 int value;
162 ptr to node next;
163 };
164 var ptr to node jim;
165 {
166 jim = malloc node;
167 print [@jim].value;
168 free jim;
169 }
170 """
171 dereference_outside_safe_area = """\
172 type node struct {
173 int value;
174 ptr to node next;
175 };
176 var ptr to node jim;
177 var ptr to node murray;
178 {
179 jim = malloc node;
180 if valid jim {
181 jim = murray;
182 print [@jim].value;
183 }
184 free jim;
185 }
186 """
187 dereference_after_free = """\
188 type node struct {
189 int value;
190 ptr to node next;
191 };
192 var ptr to node jim;
193 var ptr to node donald;
194 {
195 jim = malloc node;
196 donald = jim;
197 if valid jim {
198 free donald;
199 print [@jim].value;
200 }
201 }
202 """
203 dereference_within_nested_safe_area = """\
204 type node struct {
205 int value;
206 ptr to node next;
207 };
208 var ptr to node jim;
209 {
210 jim = malloc node;
211 if valid jim {
212 [@jim].next = malloc node;
213 }
214 if valid jim {
215 if valid [@jim].next {
216 print [@jim].value;
217 }
218 }
219 free jim;
220 }
221 """
222 allocated_values_initialized = """\
223 type node struct {
224 int value;
225 ptr to node next;
226 };
227 var ptr to node jim;
228 var ptr to node nestor;
229 {
230 jim = malloc node;
231 if valid jim {
232 print [@jim].value;
233 nestor = [@jim].next;
234 if valid nestor {
235 print 99;
236 }
237 }
238 free jim;
239 }
240 """
241 loop_1 = """\
242 var int i;
243 {
244 i = 5;
245 while i {
246 print i;
247 i = (i - 1);
248 }
249 }
250 """
251 allocating_loop = """\
252 type node struct {
253 int value;
254 ptr to node next;
255 };
256 var ptr to node jim;
257 var ptr to node harry;
258 var int i;
259 {
260 jim = malloc node;
261 harry = jim;
262 i = 100;
263 while i {
264 harry = malloc node;
265 if valid jim {
266 [@jim].value = i;
267 }
268 if valid jim {
269 [@jim].next = harry;
270 if valid harry {
271 jim = harry;
272 }
273 }
274 i = (i - 1);
275 }
276 }
277 """
278 free_invalidates = """\
279 type node struct {
280 int value;
281 ptr to node next;
282 };
283 var ptr to node jim;
284 {
285 jim = malloc node;
286 if valid jim {
287 free jim;
288 }
289 if valid jim {
290 print 42;
291 }
292 print 53;
293 }
294 """
295 alias_is_invalidated = """\
296 type node struct {
297 int value;
298 ptr to node next;
299 };
300 var ptr to node jim;
301 var ptr to node harry;
302 var ptr to node bertie;
303 var ptr to node albert;
304 var int i;
305 {
306 albert = malloc node;
307 jim = albert;
308 harry = jim;
309 i = 100;
310 while i {
311 harry = malloc node;
312 if valid jim {
313 [@jim].value = i;
314 }
315 if (i = 87) {
316 bertie = jim;
317 }
318 if valid jim {
319 [@jim].next = harry;
320 if valid harry {
321 jim = harry;
322 }
323 }
324 i = (i - 1);
325 }
326 free bertie;
327 jim = albert;
328 while valid jim {
329 if valid jim {
330 print [@jim].value;
331 jim = [@jim].next;
332 }
333 }
334 }
335 """
336 allocate_and_free_loop = """\
337 type node struct {
338 int value;
339 ptr to node next;
340 };
341 var ptr to node fred;
342 var ptr to node george;
343 var int i;
344 {
345 i = 100;
346 while i {
347 fred = malloc node;
348 if valid fred {
349 [@fred].value = i;
350 }
351 if (i = 50) {
352 george = fred;
353 } else {
354 free fred;
355 }
356 i = (i - 1);
357 }
358 if valid george {
359 print [@george].value;
360 }
361 }
362 """
363
364
365 if __name__ == "__main__":
366 import doctest
367 doctest.testmod()