diff --git a/doc/TODO.markdown b/doc/TODO.markdown index b11bc4b..7c50f3d 100644 --- a/doc/TODO.markdown +++ b/doc/TODO.markdown @@ -1,7 +1,6 @@ TODO ---- -* analyzer needs to resolve module '' → current module * including files, library files should be **handled by the implementation** * document, too, the implementation-dependent nature of input and output * define a stringify-repr operation on terms @@ -10,14 +9,14 @@ * `emit` must be 8-bit clean, i.e. can emit `\x00` * tests for `emit` * option for ref interp to not output result (or by default, don't) -* "fakie" interpreter +* "mini" interpreter that handles variables (ouch) -### lower-priority/experimental ### +### lower-priority ### +* $:reverse as a builtin +* $:equal should be proper equality of terms * error reporting: line number * error handling: skip to next sentinel and report more errors -* regex-like shortcuts: `\w` for "word", `\s` for "whitespace", etc. -* EOF and nil are the same? it would make sense... call it `end`? * module-level updatable variables. * tests for failing when utf8 scanner hits badly-encoded utf8 * numeric values... somehow. number('65') = #65. decode(ascii, 'A') = #65. @@ -25,35 +24,41 @@ * `using` production x: x's scanner defaults to utf8, not x * figure out good way to do aliases with the Tamsin-parser-in-Tamsin (dynamic grammar is really more of a Zz thing...) +* should be able to import ("open") other modules into your own namespace. +* meta-circular implementation of compiler! +* pattern match in send: + * `fields → F@fields(H,T) & H` +* maps, implemented as hash tables. + * `Table ← {} & fields → F@fields(H,T) & Table[H] ← T` +* on that topic — production values and/or lambda productions... +* pretty-print AST for error messages +* `$.alpha` +* `$.digit` +* don't consume stdin until asked to scan. +* full term expressions -- maybe +* non-backtracking versions of `|` and `{}`? `|!` and `{}!` + +### wild ideas ### + +* regex-like shortcuts: `\w` for "word", `\s` for "whitespace", etc. +* EOF and nil are the same? it would make sense... call it `end`? * productions with names with arbitrary characters in them. * something like «foo» but foo is the name of a *non*terminal — symbolic production references (like Perl's horrible globs as a cheap substitute for actual function references or lambdas.) * turn system library back into built-in keywords (esp. if : can be used) -* should be able to import ("open") other modules into your own namespace. -* meta-circular implementation of compiler! * Tamsin scanner: more liberal (every non-alphanum+_ symbol scans as itself, incl. ones that have no meaning currently like `*` and `?`) * use `←` instead of `@`, why not? -* pattern match in send: - * `fields → F@fields(H,T) & H` -* maps, implemented as hash tables. - * `Table ← {} & fields → F@fields(H,T) & Table[H] ← T` -* on that topic — production values and/or lambda productions... * auto-generate terms from productions, like Rooibos does * `;` = `&`? -* pretty-print AST for error messages -* `$.alpha` -* `$.digit` -* don't consume stdin until asked to scan. * token classes... somehow. (then numeric is just a special token class?) -* term expressions -- harder than it sounds + a token class is just the "call stack" of productions at the time it + was scanned * be generous and allow "xyz" in term context position? -* non-backtracking versions of `|` and `{}`? (very advanced) * «» could be an alias w/right sym (`,,`, `„`) (still need to scan it specially though) -* special form that consumes rest of input from the Tamsin source -- gimmick +* special form that consumes rest of input from the Tamsin source -- + maybe not such a gimmick since micro-tamsin does this * feature-testing: `$.exists('$.blargh') | do_without_blargh` * ternary: `foo ? bar : baz` -- if foo succeeded, do bar, else do baz. -* a second implementation, in C -- with compiler to C and meta-circular - implementation, this can be generated! diff --git a/doc/Tamsin.markdown b/doc/Tamsin.markdown index 61b4605..0f8e1fb 100644 --- a/doc/Tamsin.markdown +++ b/doc/Tamsin.markdown @@ -558,7 +558,7 @@ | main = S ← blerf & "x" & frelb. + x - ? no 'frelb' production defined + ? no 'main:frelb' production defined ### Aside: ← vs. → ### @@ -1047,26 +1047,60 @@ + yy@ = @ -`:foo` (and indeed `foo`) should refer to the production `foo` in the -same module as the production where it's called from, but this doesn't work yet. - - | blah { - | expr = :goo. - | goo = "y". - | } - | main = blah:expr. - | goo = "x". - + y - = y - - | blah { - | expr = goo. - | goo = "y". - | } - | main = blah:expr. - | goo = "x". - + y - = y +`:foo` (and indeed `foo`) refers to the production `foo` in the +same module as the production where it's called from. + + | blah { + | expr = :goo. + | goo = "y". + | } + | main = blah:expr. + | goo = "x". + + y + = y + + | foo { + | expr = goo. + | goo = "6". + | } + | bar { + | expr = goo. + | goo = "4". + | } + | main = foo:goo & bar:goo. + + 64 + = 4 + +Can't call a production or a module that doesn't exist. + + | foo { + | expr = goo. + | goo = "6". + | } + | main = foo:zoo. + ? no 'foo:zoo' production defined + + | foo { + | expr = goo. + | goo = "6". + | } + | main = zoo. + ? no 'main:zoo' production defined + + | foo { + | expr = goo. + | goo = "6". + | } + | main = boo:zoo. + ? no 'boo' module defined + +You can have a Tamsin program that is all modules and no productions, but +you can't run it. + + | foo { + | main = "6". + | } + ? no 'main:main' production defined Evaluation ---------- diff --git a/src/tamsin/analyzer.py b/src/tamsin/analyzer.py index af9b146..94df7bc 100644 --- a/src/tamsin/analyzer.py +++ b/src/tamsin/analyzer.py @@ -15,12 +15,17 @@ """The Analyzer takes a desugared AST, walks it, and returns a new AST. It is responsible for: - * Looking for undefined nonterminals and raising an error if such found. - (this includes 'main') * Finding the set of local variable names used in each production and sticking that in the locals_ field of the new Production node. + * Creating a map from module name -> Module and + sticking that in the modmap field of the Program node. * Creating a map from production name -> list of productions and - sticking that in the prodmap field of the new Program node. + sticking that in the prodmap field of the each Module node. + * Resolving any '' modules in Prodrefs to the name of the current + module. + + * Looking for undefined nonterminals and raising an error if such found. + (this includes 'main') (this is done at the end by analyze_prodrefs) TODO: it should also find any locals that are accessed before being set """ @@ -29,27 +34,38 @@ self.program = program self.prodnames = set() self.modnames = set() + self.current_module = None def analyze(self, ast): if isinstance(ast, Program): for mod in ast.modlist: self.modnames.add(mod.name) modmap = {} + modlist = [] for mod in ast.modlist: mod = self.analyze(mod) + modlist.append(mod) modmap[mod.name] = mod + if 'main' not in modmap: + raise ValueError("no 'main' module defined") if 'main' not in modmap['main'].prodmap: - raise ValueError("no 'main' production defined") - return Program(modmap, ast.modlist) + raise ValueError("no 'main:main' production defined") + self.program = Program(modmap, modlist) + self.analyze_prodrefs(self.program) + return self.program elif isinstance(ast, Module): + self.current_module = ast for prod in ast.prodlist: self.prodnames.add(prod.name) prodmap = {} + prodlist = [] for prod in ast.prodlist: prod = self.analyze(prod) prod.rank = len(prodmap.setdefault(prod.name, [])) prodmap[prod.name].append(prod) - return Module(ast.name, prodmap, ast.prodlist) + prodlist.append(prod) + self.current_module = None + return Module(ast.name, prodmap, prodlist) elif isinstance(ast, Production): locals_ = set() body = self.analyze(ast.body) @@ -60,13 +76,9 @@ elif isinstance(ast, And): return And(self.analyze(ast.lhs), self.analyze(ast.rhs)) elif isinstance(ast, Using): - return Using(self.analyze(ast.rule), ast.prodref) + return Using(self.analyze(ast.rule), self.analyze(ast.prodref)) elif isinstance(ast, Call): - prodref = ast.prodref - if prodref.module == '' and prodref.name not in self.prodnames: - raise ValueError("no '%s' production defined" % prodref.name) - # TODO: also check builtins? - return ast + return Call(self.analyze(ast.prodref), ast.args, ast.ibuf) elif isinstance(ast, Send): assert isinstance(ast.variable, Variable), ast return Send(self.analyze(ast.rule), ast.variable) @@ -81,6 +93,12 @@ return Concat(self.analyze(ast.lhs), self.analyze(ast.rhs)) elif isinstance(ast, Term): return ast + elif isinstance(ast, Prodref): + module = ast.module + if module == '': + module = self.current_module.name + new = Prodref(module, ast.name) + return new else: raise NotImplementedError(repr(ast)) @@ -103,3 +121,47 @@ locals_.add(ast.variable.name) elif isinstance(ast, Not) or isinstance(ast, While): self.collect_locals(ast.rule, locals_) + + def analyze_prodrefs(self, ast): + """does not return anything""" + if isinstance(ast, Program): + for mod in ast.modlist: + self.analyze_prodrefs(mod) + elif isinstance(ast, Module): + for prod in ast.prodlist: + self.analyze_prodrefs(prod) + elif isinstance(ast, Production): + self.analyze_prodrefs(ast.body) + elif isinstance(ast, Or) or isinstance(ast, And): + self.analyze_prodrefs(ast.lhs) + self.analyze_prodrefs(ast.rhs) + elif isinstance(ast, Using): + self.analyze_prodrefs(ast.rule) + self.analyze_prodrefs(ast.prodref) + elif isinstance(ast, Call): + self.analyze_prodrefs(ast.prodref) + elif isinstance(ast, Send): + self.analyze_prodrefs(ast.rule) + elif isinstance(ast, Set): + pass + elif isinstance(ast, Not): + self.analyze_prodrefs(ast.rule) + elif isinstance(ast, While): + self.analyze_prodrefs(ast.rule) + elif isinstance(ast, Concat): + pass + elif isinstance(ast, Term): + pass + elif isinstance(ast, Prodref): + assert ast.module != '', repr(ast) + if ast.module == '$': + return # TODO: also check builtins? + if ast.module not in self.program.modmap: + raise KeyError("no '%s' module defined" % ast.module) + module = self.program.modmap[ast.module] + if ast.name not in module.prodmap: + raise KeyError("no '%s:%s' production defined" % + (ast.module, ast.name) + ) + else: + raise NotImplementedError(repr(ast)) diff --git a/src/tamsin/ast.py b/src/tamsin/ast.py index 928ebfb..a6b7d8e 100644 --- a/src/tamsin/ast.py +++ b/src/tamsin/ast.py @@ -30,8 +30,7 @@ def find_productions(self, prodref): mod = prodref.module name = prodref.name - if mod == '': - mod = 'main' + assert mod != '' if mod == '$': formals = { 'equal': [Variable('L'), Variable('R')], @@ -45,11 +44,16 @@ }.get(name, []) return [Production('$.%s' % name, 0, formals, [], None)] else: - return self.modmap[mod].prodmap[name] + if mod not in self.modmap: + raise KeyError("no '%s' module defined" % mod) + prodmap = self.modmap[mod].prodmap + if name not in prodmap: + raise KeyError("no '%s:%s' production defined" % (mod, name)) + return prodmap[name] def __repr__(self): - return "Program(%r, %r, %r, %r)" % ( + return "Program(%r, %r)" % ( self.modmap, self.modlist ) diff --git a/src/tamsin/interpreter.py b/src/tamsin/interpreter.py index 276dc9d..bc200fc 100644 --- a/src/tamsin/interpreter.py +++ b/src/tamsin/interpreter.py @@ -234,7 +234,7 @@ return self.interpret(ast.rhs) elif isinstance(ast, Call): prodref = ast.prodref - #prodmod = prodref[1] + module = prodref.module name = prodref.name args = ast.args ibuf = ast.ibuf diff --git a/test.sh b/test.sh index 179980c..4a5a42f 100755 --- a/test.sh +++ b/test.sh @@ -7,11 +7,12 @@ if [ x$1 = x ]; then $0 interpreter && $0 compiler && - $0 scanner && - $0 parser && - $0 ast && + #$0 scanner && + #$0 parser && + #$0 ast && $0 compiledast && $0 compileddesugarer && + $0 micro && echo "All tests passed!" exit $? fi @@ -90,8 +91,7 @@ echo "Testing Micro-Tamsin interpreter..." FILES="doc/Micro-Tamsin.markdown" falderal $VERBOSE --substring-error fixture/micro-tamsin.markdown $FILES -elif [ x$1 = xinterpreter ]; then +elif [ x$1 = xinterpreter -o x$1 = xi ]; then echo "Testing Python interpreter..." falderal $VERBOSE --substring-error fixture/tamsin.py.markdown $FILES fi - \ No newline at end of file