Implement `@` (change implicit buffer) in Python. All tests pass.
Cat's Eye Technologies
11 years ago
88 | 88 | | main = $.return(ok). |
89 | 89 | = ok |
90 | 90 | |
91 | ### Implicit Buffer ### | |
92 | ||
93 | Object-oriented languages sometimes have an "implicit self". That means | |
94 | when you say just `foo`, it's assumed (at least, to begin with,) to be a | |
95 | method or field on the current object that is in context. | |
96 | ||
97 | Tamsin, clearly, has an _implicit buffer_. This is the buffer on which | |
98 | scanning/parsing operations like terminals operate. When you call another | |
99 | production from a production, that production you call gets the same | |
100 | implicit buffer you were working on. And `main` gets standard input as | |
101 | its implicit buffer. | |
102 | ||
103 | So, also clearly, there should be some way to alter the implicit buffer | |
104 | when you call another production. And there is. | |
105 | ||
106 | The syntax for this is postfix `@`, because you're pointing the production | |
107 | "at" some other text... | |
108 | ||
109 | | main = set T = 't(a,t(b,c))' & tree @ T. | |
110 | | tree = "t" & "(" & tree → L & "," & tree → R & ")" & return fwee(L, R) | |
111 | | | "a" | "b" | "c". | |
112 | + doesn't matter | |
113 | = fwee(a, fwee(b, c)) | |
114 | ||
115 | 91 | ### Rule Formals ### |
116 | 92 | |
117 | 93 | Then we no longer pattern-match terms. They're just strings. So we... we |
1330 | 1330 | Advanced Scanning |
1331 | 1331 | ----------------- |
1332 | 1332 | |
1333 | ### Changing the scanner in use ### | |
1334 | ||
1335 | There is an implicit scanner in effect at any given point in the program. | |
1333 | ### Implicit Buffer ### | |
1334 | ||
1335 | Object-oriented languages sometimes have an "implicit self". That means | |
1336 | when you say just `foo`, it's (generally) assumed to refer to a method or | |
1337 | field on the current object that is in context. | |
1338 | ||
1339 | Tamsin, clearly, has an _implicit buffer_. This is the buffer on which | |
1340 | scanning/parsing operations like terminals operate. When you call another | |
1341 | production from a production, that production you call gets the same | |
1342 | implicit buffer you were working on. And `main` gets its implicit buffer | |
1343 | from some implementation-defined place (in the reference interpreter, it | |
1344 | gets its from Python's idea of "standard input" to the program.) | |
1345 | ||
1346 | So, also clearly, there should be some way to alter the implicit buffer | |
1347 | when you call another production. And there is. | |
1348 | ||
1349 | The syntax for this is postfix `@`, because you're pointing the production | |
1350 | "at" some other text... | |
1351 | ||
1352 | | main = set T = 't(a,t(b,c))' & tree @ T. | |
1353 | | tree = "t" & "(" & tree → L & "," & tree → R & ")" & return fwee(L, R) | |
1354 | | | "a" | "b" | "c". | |
1355 | + doesn't matter | |
1356 | = fwee(a, fwee(b, c)) | |
1357 | ||
1358 | This is a good way to process atoms in Tamsin. | |
1359 | ||
1360 | | main = print_each_char @ 'Hello'. | |
1361 | | print_each_char = any → C & print C & print_each_char | return 'ok'. | |
1362 | + doesn't matter | |
1363 | = H | |
1364 | = e | |
1365 | = l | |
1366 | = l | |
1367 | = o | |
1368 | = ok | |
1369 | ||
1370 | The term doesn't have to be an atom. The term expression will be flattened. | |
1371 | ||
1372 | | main = print_each_char @ f(b). | |
1373 | | print_each_char = any → C & print C & print_each_char | return 'ok'. | |
1374 | + doesn't matter | |
1375 | = f | |
1376 | = ( | |
1377 | = b | |
1378 | = ) | |
1379 | = ok | |
1380 | ||
1381 | This can be wrapped up to make the term an argument to a production call: | |
1382 | ||
1383 | | main = print_each_char(fo+ob+ar). | |
1384 | | print_each_char(X) = print_each_char_r @ X. | |
1385 | | print_each_char_r = any → C & print C & print_each_char_r | return 'ok'. | |
1386 | + doesn't matter | |
1387 | = f | |
1388 | = o | |
1389 | = o | |
1390 | = b | |
1391 | = a | |
1392 | = r | |
1393 | = ok | |
1394 | ||
1395 | The rule being applied to the specified buffer doesn't have to be a | |
1396 | non-terminal, either. It can be any rule (but watch the precedence.) | |
1397 | ||
1398 | | main = $:alnum @ 'Hi!'. | |
1399 | = H | |
1400 | ||
1401 | | main = {$:alnum} @ 'Hi!'. | |
1402 | = i | |
1403 | ||
1404 | `@`'s nest. | |
1405 | ||
1406 | | main = one @ 'I process this string until ! where I digress a bit' & ''. | |
1407 | | one = {"!" & two @ 'Here I digress' | any → C & $:emit(C)}. | |
1408 | | two = {any → C & $:emit(C)}. | |
1409 | = I process this string until Here I digress where I digress a bit | |
1410 | ||
1411 | ### Implicit Scanner ### | |
1412 | ||
1413 | Actually, the implicit buffer is just one component of the _implicit scanner_ | |
1414 | that is in effect at any given point in a Tamsin program. Not only may its | |
1415 | buffer be changed, but its scanning rules, and thus the set of tokens it | |
1416 | returns, may be changed as well. | |
1417 | ||
1336 | 1418 | As you have seen, the default scanner returns single characters. |
1337 | 1419 | |
1338 | 1420 | | main = "a" & "b" & "c". |
1725 | 1807 | | subprogram = "stu" & "uuu". |
1726 | 1808 | + abc stu uuu bac |
1727 | 1809 | = bac |
1810 | ||
1811 | ### Combining `using` and `@` ### | |
1812 | ||
1813 | It is entirely possible to do so. | |
1814 | ||
1815 | | main = {any → T using scanner & print T} & 'ok'. | |
1816 | | scanner = scan using $:utf8. | |
1817 | | scan = S ← '' & {$:alnum → C & S ← S + C} & {" " | "," | "."} & return S. | |
1818 | + This, this is my string. | |
1819 | = This | |
1820 | = this | |
1821 | = is | |
1822 | = my | |
1823 | = string | |
1824 | = ok | |
1825 | ||
1826 | | main = {any → T using scanner & print T} @ 'This, this is my string.' & | |
1827 | | 'ok'. | |
1828 | | scanner = scan using $:utf8. | |
1829 | | scan = S ← '' & {$:alnum → C & S ← S + C} & {" " | "," | "."} & return S. | |
1830 | = This | |
1831 | = this | |
1832 | = is | |
1833 | = my | |
1834 | = string | |
1835 | = ok | |
1728 | 1836 | |
1729 | 1837 | Implementation-Defined Matters |
1730 | 1838 | ------------------------------ |
5 | 5 | from tamsin.ast import ( |
6 | 6 | Program, Module, Production, ProdBranch, |
7 | 7 | And, Or, Not, While, Call, Send, Set, |
8 | Variable, Using, Concat, Prodref, | |
8 | Variable, Using, On, Concat, Prodref, | |
9 | 9 | TermNode, VariableNode, AtomNode, ConstructorNode |
10 | 10 | ) |
11 | 11 | from tamsin.term import Term, Constructor, Atom |
64 | 64 | return And(self.analyze(ast.lhs), self.analyze(ast.rhs)) |
65 | 65 | elif isinstance(ast, Using): |
66 | 66 | return Using(self.analyze(ast.rule), self.analyze(ast.prodref)) |
67 | elif isinstance(ast, On): | |
68 | return On(self.analyze(ast.rule), self.analyze(ast.texpr)) | |
67 | 69 | elif isinstance(ast, Call): |
68 | return Call(self.analyze(ast.prodref), ast.args, ast.ibuf) | |
70 | return Call(self.analyze(ast.prodref), ast.args) | |
69 | 71 | elif isinstance(ast, Send): |
70 | 72 | assert isinstance(ast.variable, VariableNode), ast |
71 | 73 | return Send(self.analyze(ast.rule), ast.variable) |
100 | 102 | self.collect_locals(ast.rhs, locals_) |
101 | 103 | elif isinstance(ast, Using): |
102 | 104 | self.collect_locals(ast.rule, locals_) |
105 | elif isinstance(ast, On): | |
106 | self.collect_locals(ast.rule, locals_) | |
107 | self.collect_locals(ast.texpr, locals_) | |
103 | 108 | elif isinstance(ast, Call): |
104 | 109 | pass |
105 | 110 | elif isinstance(ast, Send): |
141 | 146 | elif isinstance(ast, Using): |
142 | 147 | self.analyze_prodrefs(ast.rule) |
143 | 148 | self.analyze_prodrefs(ast.prodref) |
149 | elif isinstance(ast, On): | |
150 | self.analyze_prodrefs(ast.rule) | |
144 | 151 | elif isinstance(ast, Call): |
145 | 152 | self.analyze_prodrefs(ast.prodref) |
146 | 153 | elif isinstance(ast, Send): |
234 | 234 | |
235 | 235 | |
236 | 236 | class Call(AST): |
237 | def __init__(self, prodref, args, ibuf): | |
237 | def __init__(self, prodref, args): | |
238 | 238 | self.prodref = prodref |
239 | 239 | for a in args: |
240 | 240 | assert isinstance(a, AST) |
241 | 241 | self.args = args |
242 | self.ibuf = ibuf | |
243 | ||
244 | def __repr__(self): | |
245 | return u"Call(%r, %r, %r)" % ( | |
242 | ||
243 | def __repr__(self): | |
244 | return u"Call(%r, %r)" % ( | |
246 | 245 | self.prodref, |
247 | 246 | self.args, |
248 | self.ibuf | |
249 | 247 | ) |
250 | 248 | |
251 | 249 | def __str__(self): |
308 | 306 | |
309 | 307 | def __str__(self): |
310 | 308 | return "using(%s, %s)" % (self.rule, self.prodref) |
309 | ||
310 | ||
311 | class On(AST): | |
312 | def __init__(self, rule, texpr): | |
313 | self.rule = rule | |
314 | self.texpr = texpr | |
315 | ||
316 | def __repr__(self): | |
317 | return u"On(%r, %r)" % (self.rule, self.texpr) | |
318 | ||
319 | def __str__(self): | |
320 | return "on(%s, %s)" % (self.rule, self.texpr) | |
311 | 321 | |
312 | 322 | |
313 | 323 | class Fold(AST): |
8 | 8 | |
9 | 9 | from tamsin.ast import ( |
10 | 10 | Production, ProdBranch, |
11 | And, Or, Not, While, Call, Send, Set, Concat, Using, Prodref, | |
11 | And, Or, Not, While, Call, Send, Set, Concat, Using, On, Prodref, | |
12 | 12 | TermNode, VariableNode |
13 | 13 | ) |
14 | 14 | from tamsin.term import Atom, Constructor, Variable |
345 | 345 | self.emit("}") |
346 | 346 | elif isinstance(ast, Using): |
347 | 347 | prodref = ast.prodref |
348 | scanner_mod = prodref.module or 'main' | |
348 | scanner_mod = prodref.module | |
349 | 349 | scanner_name = prodref.name |
350 | 350 | if scanner_mod == '$': |
351 | 351 | if scanner_name == 'utf8': |
358 | 358 | )) |
359 | 359 | self.compile_r(ast.rule) |
360 | 360 | self.emit("scanner_pop_engine(scanner);") |
361 | elif isinstance(ast, On): | |
362 | self.emit("{") | |
363 | self.indent() | |
364 | name = self.compile_r(ast.texpr) | |
365 | flat_name = self.new_name() | |
366 | self.emit("struct term *%s = term_flatten(%s);" % (flat_name, name)) | |
367 | self.emit_decl_state() | |
368 | self.emit_save_state() | |
369 | self.emit("scanner->buffer = %s->atom;" % flat_name); | |
370 | self.emit("scanner->size = %s->size;" % flat_name); | |
371 | self.emit("scanner->position = 0;"); | |
372 | self.emit("scanner->reset_position = 0;"); | |
373 | self.compile_r(ast.rule); | |
374 | self.emit_restore_state() | |
375 | self.outdent() | |
376 | self.emit("}") | |
361 | 377 | elif isinstance(ast, Concat): |
362 | 378 | name_lhs = self.compile_r(ast.lhs); |
363 | 379 | name_rhs = self.compile_r(ast.rhs); |
387 | 403 | self.emit("struct term *save_%s;" % local) |
388 | 404 | self.emit("int position;") |
389 | 405 | self.emit("int reset_position;") |
406 | self.emit("const char *buffer;") | |
407 | self.emit("int buffer_size;") | |
390 | 408 | |
391 | 409 | def emit_save_state(self): |
392 | 410 | for local in self.current_branch.locals_: |
393 | 411 | self.emit("save_%s = %s;" % (local, local)) |
394 | 412 | self.emit("position = scanner->position;") |
395 | 413 | self.emit("reset_position = scanner->reset_position;") |
414 | self.emit("buffer = scanner->buffer;") | |
415 | self.emit("buffer_size = scanner->size;") | |
396 | 416 | |
397 | 417 | def emit_restore_state(self): |
398 | 418 | self.emit("scanner->position = position;") |
399 | 419 | self.emit("scanner->reset_position = reset_position;") |
420 | self.emit("scanner->buffer = buffer;") | |
421 | self.emit("scanner->size = buffer_size;") | |
400 | 422 | for local in self.current_branch.locals_: |
401 | 423 | self.emit("%s = save_%s;" % (local, local)) |
402 | 424 |
5 | 5 | from tamsin.ast import ( |
6 | 6 | Program, Module, Production, ProdBranch, |
7 | 7 | And, Or, Not, While, Call, Send, Set, |
8 | Variable, Using, Concat, Fold, Prodref, | |
8 | Variable, Using, On, Concat, Fold, Prodref, | |
9 | 9 | TermNode, VariableNode, AtomNode, ConstructorNode |
10 | 10 | ) |
11 | 11 | from tamsin.term import Term, Atom, Constructor |
59 | 59 | return And(self.desugar(ast.lhs), self.desugar(ast.rhs)) |
60 | 60 | elif isinstance(ast, Using): |
61 | 61 | return Using(self.desugar(ast.rule), ast.prodref) |
62 | elif isinstance(ast, On): | |
63 | return On(self.desugar(ast.rule), self.desugar(ast.texpr)) | |
62 | 64 | elif isinstance(ast, Call): |
63 | 65 | return ast |
64 | 66 | elif isinstance(ast, Send): |
84 | 86 | acc_ = Set(under1, |
85 | 87 | ConstructorNode(ast.constratom.text, |
86 | 88 | [under2, under1])) |
87 | return_ = Call(Prodref('$', 'return'), [under1], None) | |
89 | return_ = Call(Prodref('$', 'return'), [under1]) | |
88 | 90 | return And(And(set_, While(And(send_, acc_))), return_) |
89 | 91 | else: |
90 | 92 | raise NotImplementedError(repr(ast)) |
5 | 5 | import sys |
6 | 6 | |
7 | 7 | from tamsin.ast import ( |
8 | Production, ProdBranch, And, Or, Not, While, Call, Send, Set, Using, | |
8 | Production, ProdBranch, And, Or, Not, While, Call, Send, Set, Using, On, | |
9 | 9 | Prodref, Concat, TermNode |
10 | 10 | ) |
11 | 11 | from tamsin.term import Term, EOF, Atom, Constructor |
183 | 183 | if bindings != False: |
184 | 184 | branch = b |
185 | 185 | break |
186 | # if ibuf is not None: | |
187 | # return self.interpret_on_buffer( | |
188 | # prod, unicode(ibuf.expand(self.context)), | |
189 | # bindings=bindings | |
190 | # ) | |
191 | # else: | |
192 | 186 | # else: |
193 | 187 | # self.event('call_newfangled_parsing_args', prod) |
194 | 188 | # # start a new scope. arg bindings will appear here. |
245 | 239 | prodref = ast.prodref |
246 | 240 | name = prodref.name |
247 | 241 | args = ast.args |
248 | ibuf = ast.ibuf | |
249 | 242 | prod = self.program.find_production(prodref) |
250 | 243 | if prod is None: |
251 | 244 | raise ValueError("internal error: unresolved: " + repr(prodref)) |
279 | 272 | self.event('leave_with', succeeded, result) |
280 | 273 | self.scanner.pop_engine() |
281 | 274 | return (succeeded, result) |
275 | elif isinstance(ast, On): | |
276 | (success, result) = self.interpret(ast.texpr) | |
277 | buffer = str(result.expand(self.context)) | |
278 | self.event('interpret_on_buffer', buffer) | |
279 | saved_scanner_state = self.scanner.get_state() | |
280 | self.scanner.buffer = buffer | |
281 | self.scanner.position = 0 | |
282 | self.scanner.reset_position = 0 | |
283 | (success, result) = self.interpret(ast.rule) | |
284 | self.scanner.install_state(saved_scanner_state) | |
285 | return (success, result) | |
282 | 286 | elif isinstance(ast, Set): |
283 | 287 | (success, variable) = self.interpret(ast.variable) |
284 | 288 | (success, term) = self.interpret(ast.texpr) |
326 | 330 | return (True, ast.to_term()) |
327 | 331 | else: |
328 | 332 | raise NotImplementedError(repr(ast)) |
329 | ||
330 | def interpret_on_buffer(self, ast, buffer, bindings=None): | |
331 | self.event('interpret_on_buffer', buffer) | |
332 | saved_scanner_state = self.scanner.get_state() | |
333 | self.scanner.buffer = buffer | |
334 | self.scanner.position = 0 | |
335 | self.scanner.reset_position = 0 | |
336 | result = self.interpret(ast, bindings=bindings) | |
337 | self.scanner.install_state(saved_scanner_state) | |
338 | return result |
5 | 5 | from tamsin.ast import ( |
6 | 6 | AST, Module, Program, Production, ProdBranch, |
7 | 7 | And, Or, Not, While, Call, Prodref, |
8 | Send, Set, Concat, Using, Fold, | |
8 | Send, Set, Concat, Using, On, Fold, | |
9 | 9 | AtomNode, VariableNode, ConstructorNode, |
10 | 10 | ) |
11 | 11 | from tamsin.term import ( |
130 | 130 | if self.consume('using'): |
131 | 131 | prodref = self.prodref() |
132 | 132 | lhs = Using(lhs, prodref) |
133 | elif self.consume('@'): | |
134 | texpr = self.texpr() | |
135 | lhs = On(lhs, texpr) | |
133 | 136 | return lhs |
134 | 137 | |
135 | 138 | def expr3(self): |
158 | 161 | e = self.expr0() |
159 | 162 | self.expect(']') |
160 | 163 | return Or(e, |
161 | Call(Prodref('$', 'return'), [AtomNode('nil')], None) | |
164 | Call(Prodref('$', 'return'), [AtomNode('nil')]) | |
162 | 165 | ) |
163 | 166 | elif self.consume('{'): |
164 | 167 | e = self.expr0() |
166 | 169 | return While(e) |
167 | 170 | elif self.peek()[0] == '"': |
168 | 171 | s = self.consume_any()[1:-1] |
169 | return Call(Prodref('$', 'expect'), [AtomNode(s)], None) | |
172 | return Call(Prodref('$', 'expect'), [AtomNode(s)]) | |
170 | 173 | elif self.consume(u'«') or self.consume('<<'): |
171 | 174 | t = self.texpr() |
172 | 175 | if self.consume(u'»') or self.consume('>>'): |
173 | return Call(Prodref('$', 'expect'), [t], None) | |
176 | return Call(Prodref('$', 'expect'), [t]) | |
174 | 177 | else: |
175 | 178 | self.error("'>>'") |
176 | 179 | elif self.consume('!'): |
187 | 190 | if self.consume(u'←') or self.consume('<-'): |
188 | 191 | t = self.texpr() |
189 | 192 | else: |
190 | return Call(Prodref('$', 'return'), [v], None) | |
193 | return Call(Prodref('$', 'return'), [v]) | |
191 | 194 | return Set(v, t) |
192 | 195 | else: |
193 | 196 | # implied return of term |
194 | 197 | if self.peek()[0].isupper() or self.peek()[0] == "'": |
195 | 198 | t = self.texpr() |
196 | return Call(Prodref('$', 'return'), [t], None) | |
199 | return Call(Prodref('$', 'return'), [t]) | |
197 | 200 | prohibit_aliases = False |
198 | 201 | if self.peek() == ':': |
199 | 202 | # bleah |
216 | 219 | while self.consume(','): |
217 | 220 | args.append(self.texpr()) |
218 | 221 | self.expect(')') |
219 | ibuf = None | |
220 | if self.consume('@'): | |
221 | ibuf = self.texpr() | |
222 | return Call(prodref, args, ibuf) | |
222 | return Call(prodref, args) | |
223 | 223 | |
224 | 224 | def prodref(self): |
225 | 225 | if self.consume('$'): |