Merge pull request #16 from catseye/goto-at-end-of-block
goto only at end of block, not only in tail position
Chris Pressey authored 6 years ago
GitHub committed 6 years ago
3 | 3 | 0.18 |
4 | 4 | ---- |
5 | 5 | |
6 | * Syntactically, `goto` may only appear at the end of a block. | |
7 | It need no longer be the final instruction in a routine, | |
8 | as long as the type context is consistent at every exit. | |
6 | 9 | * `cmp` instruction can now perform a 16-bit unsigned comparison |
7 | 10 | of `word` memory locations (at the cost of trashing `a`.) |
8 | 11 | * Fixed pathological memory use in the lexical scanner - should |
54 | 54 | |
55 | 55 | ### Tail-call optimization |
56 | 56 | |
57 | More generally, define a block as having zero or one `goto`s at the end. (and `goto`s cannot | |
58 | appear elsewhere.) | |
59 | ||
60 | 57 | If a block ends in a `call` can that be converted to end in a `goto`? Why not? I think it can, |
61 | 58 | if the block is in tail position. The constraints should iron out the same both ways. |
62 | 59 | |
63 | And - once we have this - why do we need `goto` to be in tail position, strictly? | |
64 | 60 | As long as the routine has consistent type context every place it exits, that should be fine. |
65 | 61 | |
66 | 62 | ### "Include" directives |
33 | 33 | pass |
34 | 34 | |
35 | 35 | |
36 | class InconsistentExitError(StaticAnalysisError): | |
37 | """The type context differs at two different exit points of the routine.""" | |
38 | pass | |
39 | ||
40 | ||
36 | 41 | class ForbiddenWriteError(StaticAnalysisError): |
37 | 42 | pass |
38 | 43 | |
42 | 47 | |
43 | 48 | |
44 | 49 | class IllegalJumpError(StaticAnalysisError): |
50 | pass | |
51 | ||
52 | ||
53 | class TerminatedContextError(StaticAnalysisError): | |
54 | """What the program is doing here is not valid, due to preceding `goto`s, | |
55 | which make this dead code.""" | |
45 | 56 | pass |
46 | 57 | |
47 | 58 | |
100 | 111 | self._touched = set() |
101 | 112 | self._range = dict() |
102 | 113 | self._writeable = set() |
114 | self._terminated = False | |
103 | 115 | self._gotos_encountered = set() |
104 | 116 | |
105 | 117 | for ref in inputs: |
131 | 143 | c._writeable = set(self._writeable) |
132 | 144 | return c |
133 | 145 | |
146 | def update_from(self, other): | |
147 | self.routines = other.routines | |
148 | self.routine = other.routine | |
149 | self._touched = set(other._touched) | |
150 | self._range = dict(other._range) | |
151 | self._writeable = set(other._writeable) | |
152 | self._terminated = other._terminated | |
153 | self._gotos_encounters = set(other._gotos_encountered) | |
154 | ||
134 | 155 | def each_meaningful(self): |
135 | 156 | for ref in self._range.keys(): |
136 | 157 | yield ref |
137 | 158 | |
138 | 159 | def each_touched(self): |
139 | 160 | for ref in self._touched: |
161 | yield ref | |
162 | ||
163 | def each_writeable(self): | |
164 | for ref in self._writeable: | |
140 | 165 | yield ref |
141 | 166 | |
142 | 167 | def assert_meaningful(self, *refs, **kwargs): |
278 | 303 | def encountered_gotos(self): |
279 | 304 | return self._gotos_encountered |
280 | 305 | |
306 | def set_terminated(self): | |
307 | # Having a terminated context and having encountered gotos is not the same thing. | |
308 | self._terminated = True | |
309 | ||
310 | def has_terminated(self): | |
311 | return self._terminated | |
312 | ||
281 | 313 | def assert_types_for_read_table(self, instr, src, dest, type_): |
282 | 314 | if (not TableType.is_a_table_type(src.ref.type, type_)) or (not dest.type == type_): |
283 | 315 | raise TypeMismatchError(instr, '{} and {}'.format(src.ref.name, dest.name)) |
363 | 395 | |
364 | 396 | def analyze_routine(self, routine): |
365 | 397 | assert isinstance(routine, Routine) |
366 | self.current_routine = routine | |
367 | 398 | if routine.block is None: |
368 | 399 | # it's an extern, that's fine |
369 | 400 | return |
401 | ||
402 | self.current_routine = routine | |
370 | 403 | type_ = routine.location.type |
371 | 404 | context = Context(self.routines, routine, type_.inputs, type_.outputs, type_.trashes) |
405 | self.exit_contexts = [] | |
372 | 406 | |
373 | 407 | if self.debug: |
374 | 408 | print("at start of routine `{}`:".format(routine.name)) |
375 | 409 | print(context) |
376 | 410 | |
377 | 411 | self.analyze_block(routine.block, context) |
378 | trashed = set(context.each_touched()) - set(context.each_meaningful()) | |
379 | 412 | |
380 | 413 | if self.debug: |
381 | 414 | print("at end of routine `{}`:".format(routine.name)) |
389 | 422 | print('-' * 79) |
390 | 423 | print('') |
391 | 424 | |
392 | # even if we goto another routine, we can't trash an output. | |
425 | if self.exit_contexts: | |
426 | # check that they are all consistent | |
427 | exit_context = self.exit_contexts[0] | |
428 | exit_meaningful = set(exit_context.each_meaningful()) | |
429 | exit_touched = set(exit_context.each_touched()) | |
430 | exit_writeable = set(exit_context.each_writeable()) | |
431 | for ex in self.exit_contexts[1:]: | |
432 | if set(ex.each_meaningful()) != exit_meaningful: | |
433 | raise InconsistentExitError("Exit contexts are not consistent") | |
434 | if set(ex.each_touched()) != exit_touched: | |
435 | raise InconsistentExitError("Exit contexts are not consistent") | |
436 | if set(ex.each_writeable()) != exit_writeable: | |
437 | raise InconsistentExitError("Exit contexts are not consistent") | |
438 | context.update_from(exit_context) | |
439 | ||
440 | trashed = set(context.each_touched()) - set(context.each_meaningful()) | |
441 | ||
442 | # these all apply whether we encountered goto(s) in this routine, or not...: | |
443 | ||
444 | # can't trash an output. | |
393 | 445 | for ref in trashed: |
394 | 446 | if ref in type_.outputs: |
395 | 447 | raise UnmeaningfulOutputError(routine, ref.name) |
396 | 448 | |
397 | if not context.encountered_gotos(): | |
398 | for ref in type_.outputs: | |
399 | context.assert_meaningful(ref, exception_class=UnmeaningfulOutputError) | |
400 | for ref in context.each_touched(): | |
401 | if ref not in type_.outputs and ref not in type_.trashes and not routine_has_static(routine, ref): | |
402 | raise ForbiddenWriteError(routine, ref.name) | |
449 | # all outputs are meaningful. | |
450 | for ref in type_.outputs: | |
451 | context.assert_meaningful(ref, exception_class=UnmeaningfulOutputError) | |
452 | ||
453 | # if something was touched, then it should have been declared to be writable. | |
454 | for ref in context.each_touched(): | |
455 | if ref not in type_.outputs and ref not in type_.trashes and not routine_has_static(routine, ref): | |
456 | raise ForbiddenWriteError(routine, ref.name) | |
457 | ||
458 | self.exit_contexts = None | |
403 | 459 | self.current_routine = None |
404 | 460 | return context |
405 | 461 | |
432 | 488 | dest = instr.dest |
433 | 489 | src = instr.src |
434 | 490 | |
435 | if context.encountered_gotos(): | |
436 | raise IllegalJumpError(instr, instr) | |
491 | if context.has_terminated(): | |
492 | raise TerminatedContextError(instr, instr) | |
437 | 493 | |
438 | 494 | if opcode == 'ld': |
439 | 495 | if isinstance(src, IndexedRef): |
677 | 733 | self.assert_affected_within('trashes', type_, current_type) |
678 | 734 | |
679 | 735 | context.encounter_gotos(set([instr.location])) |
736 | ||
737 | # Now that we have encountered a goto, we update the | |
738 | # context here to match what someone calling the goto'ed | |
739 | # function directly, would expect. (which makes sense | |
740 | # when you think about it; if this goto's F, then calling | |
741 | # this is like calling F, from the perspective of what is | |
742 | # returned.) | |
743 | # | |
744 | # However, this isn't the current context anymore. This | |
745 | # is an exit context of this routine. | |
746 | ||
747 | exit_context = context.clone() | |
748 | ||
749 | for ref in type_.outputs: | |
750 | exit_context.set_touched(ref) # ? | |
751 | exit_context.set_written(ref) | |
752 | ||
753 | for ref in type_.trashes: | |
754 | exit_context.assert_writeable(ref) | |
755 | exit_context.set_touched(ref) | |
756 | exit_context.set_unmeaningful(ref) | |
757 | ||
758 | self.exit_contexts.append(exit_context) | |
759 | ||
760 | # When we get to the end, we'll check that all the | |
761 | # exit contexts are consistent with each other. | |
762 | ||
763 | # We set the current context as having terminated. | |
764 | # If we are in a branch, the merge will deal with | |
765 | # having terminated. If we are at the end of the | |
766 | # routine, the routine end will deal with that. | |
767 | ||
768 | context.set_terminated() | |
769 | ||
680 | 770 | elif opcode == 'trash': |
681 | 771 | context.set_touched(instr.dest) |
682 | 772 | context.set_unmeaningful(instr.dest) |
715 | 805 | message='initialized in block 2 but not in block 1 of `if {}`'.format(instr.src) |
716 | 806 | ) |
717 | 807 | |
718 | # merge the contexts. this used to be a method called `set_from` | |
719 | context._touched = set(context1._touched) | set(context2._touched) | |
720 | context.set_meaningful(*list(outgoing_meaningful)) | |
721 | context._writeable = set(context1._writeable) | set(context2._writeable) | |
808 | # merge the contexts. | |
809 | ||
810 | # first, the easy case: if one of the contexts has terminated, just use the other one. | |
811 | # if both have terminated, we return a terminated context, and that's OK. | |
812 | ||
813 | if context1.has_terminated(): | |
814 | context.update_from(context2) | |
815 | elif context2.has_terminated(): | |
816 | context.update_from(context1) | |
817 | else: | |
818 | # the more complicated case: merge the contents of the contexts. | |
819 | context._touched = set(context1._touched) | set(context2._touched) | |
820 | context.set_meaningful(*list(outgoing_meaningful)) | |
821 | context._writeable = set(context1._writeable) | set(context2._writeable) | |
822 | ||
823 | # in both cases, we need to merge the encountered gotos, in order that | |
824 | # fallthru optimization continues to work correctly. | |
722 | 825 | context.encounter_gotos(context1.encountered_gotos() | context2.encountered_gotos()) |
723 | 826 | |
724 | 827 | for ref in outgoing_trashes: |
731 | 834 | self.analyze_block(instr.block, context) |
732 | 835 | if instr.src is not None: # None indicates 'repeat forever' |
733 | 836 | context.assert_meaningful(instr.src) |
837 | ||
838 | if context.encountered_gotos(): | |
839 | raise IllegalJumpError(instr, instr) | |
734 | 840 | |
735 | 841 | # now analyze it having been executed a second time, with the context |
736 | 842 | # of it having already been executed. |
379 | 379 | self.scanner.expect('{') |
380 | 380 | while not self.scanner.on('}'): |
381 | 381 | instrs.append(self.instr()) |
382 | if isinstance(instrs[-1], SingleOp) and instrs[-1].opcode == 'goto': | |
383 | break | |
382 | 384 | self.scanner.expect('}') |
383 | 385 | return Block(self.scanner.line_number, instrs=instrs) |
384 | 386 |
2325 | 2325 | | } |
2326 | 2326 | ? TypeMismatchError |
2327 | 2327 | |
2328 | A `goto` cannot appear within a `save` block, even if it is otherwise in tail position. | |
2328 | A `goto` cannot appear within a `save` block. | |
2329 | 2329 | |
2330 | 2330 | | define other routine |
2331 | 2331 | | trashes a, z, n |
2370 | 2370 | | } |
2371 | 2371 | = ok |
2372 | 2372 | |
2373 | A `goto` cannot appear within a `with interrupts` block, even if it is | |
2374 | otherwise in tail position. | |
2373 | A `goto` cannot appear within a `with interrupts` block. | |
2375 | 2374 | |
2376 | 2375 | | vector routine |
2377 | 2376 | | inputs x |
3018 | 3017 | | } |
3019 | 3018 | ? UnmeaningfulOutputError: x |
3020 | 3019 | |
3021 | `goto`, if present, must be in tail position (the final instruction in a routine.) | |
3020 | For now at least, you cannot have a `goto` inside a `repeat` loop. | |
3022 | 3021 | |
3023 | 3022 | | define bar routine trashes x, z, n { |
3024 | 3023 | | ld x, 200 |
3026 | 3025 | | |
3027 | 3026 | | define main routine trashes x, z, n { |
3028 | 3027 | | ld x, 0 |
3029 | | goto bar | |
3030 | | } | |
3031 | = ok | |
3028 | | repeat { | |
3029 | | inc x | |
3030 | | goto bar | |
3031 | | } until z | |
3032 | | } | |
3033 | ? IllegalJumpError | |
3034 | ||
3035 | `goto`, as a matter of syntax, can only appear at the end | |
3036 | of a block; but it need not be the final instruction in a | |
3037 | routine. | |
3032 | 3038 | |
3033 | 3039 | | define bar routine trashes x, z, n { |
3034 | 3040 | | ld x, 200 |
3035 | 3041 | | } |
3036 | 3042 | | |
3037 | 3043 | | define main routine trashes x, z, n { |
3044 | | ld x, 0 | |
3038 | 3045 | | goto bar |
3039 | | ld x, 0 | |
3040 | | } | |
3041 | ? IllegalJumpError | |
3046 | | } | |
3047 | = ok | |
3042 | 3048 | |
3043 | 3049 | | define bar routine trashes x, z, n { |
3044 | 3050 | | ld x, 200 |
3063 | 3069 | | ld x, 1 |
3064 | 3070 | | goto bar |
3065 | 3071 | | } |
3066 | | ld x, 0 | |
3067 | | } | |
3068 | ? IllegalJumpError | |
3072 | | goto bar | |
3073 | | } | |
3074 | = ok | |
3075 | ||
3076 | | define bar routine trashes x, z, n { | |
3077 | | ld x, 200 | |
3078 | | } | |
3079 | | | |
3080 | | define main routine trashes x, z, n { | |
3081 | | ld x, 0 | |
3082 | | if z { | |
3083 | | ld x, 1 | |
3084 | | goto bar | |
3085 | | } | |
3086 | | ld x, 0 | |
3087 | | } | |
3088 | = ok | |
3069 | 3089 | |
3070 | 3090 | | define bar routine trashes x, z, n { |
3071 | 3091 | | ld x, 200 |
3098 | 3118 | | } |
3099 | 3119 | = ok |
3100 | 3120 | |
3101 | For the purposes of `goto`, the end of a loop is never tail position. | |
3102 | ||
3103 | 3121 | | define bar routine trashes x, z, n { |
3104 | 3122 | | ld x, 200 |
3105 | 3123 | | } |
3106 | 3124 | | |
3107 | 3125 | | define main routine trashes x, z, n { |
3108 | 3126 | | ld x, 0 |
3109 | | repeat { | |
3110 | | inc x | |
3127 | | if z { | |
3128 | | ld x, 1 | |
3111 | 3129 | | goto bar |
3112 | | } until z | |
3113 | | } | |
3114 | ? IllegalJumpError | |
3130 | | } else { | |
3131 | | ld x, 0 | |
3132 | | } | |
3133 | | ld x, 0 | |
3134 | | } | |
3135 | = ok | |
3136 | ||
3137 | | define bar routine trashes x, z, n { | |
3138 | | ld x, 200 | |
3139 | | } | |
3140 | | | |
3141 | | define main routine trashes x, z, n { | |
3142 | | ld x, 0 | |
3143 | | if z { | |
3144 | | ld x, 1 | |
3145 | | goto bar | |
3146 | | } else { | |
3147 | | ld x, 0 | |
3148 | | } | |
3149 | | goto bar | |
3150 | | } | |
3151 | = ok | |
3152 | ||
3153 | Even though `goto` can only appear at the end of a block, | |
3154 | you can still wind up with dead code; the analysis detects | |
3155 | this. | |
3156 | ||
3157 | | define bar routine trashes x, z, n { | |
3158 | | ld x, 200 | |
3159 | | } | |
3160 | | | |
3161 | | define main routine trashes x, z, n { | |
3162 | | ld x, 0 | |
3163 | | if z { | |
3164 | | ld x, 1 | |
3165 | | goto bar | |
3166 | | } else { | |
3167 | | ld x, 0 | |
3168 | | goto bar | |
3169 | | } | |
3170 | | ld x, 100 | |
3171 | | } | |
3172 | ? TerminatedContextError | |
3173 | ||
3174 | It is important that the type context at every | |
3175 | `goto` is compatible with the type context at the end of | |
3176 | the routine. | |
3177 | ||
3178 | | define bar routine | |
3179 | | inputs x | |
3180 | | trashes x, z, n | |
3181 | | { | |
3182 | | ld x, 200 | |
3183 | | } | |
3184 | | | |
3185 | | define main routine trashes x, z, n { | |
3186 | | ld x, 0 | |
3187 | | if z { | |
3188 | | ld x, 1 | |
3189 | | goto bar | |
3190 | | } else { | |
3191 | | ld x, 0 | |
3192 | | } | |
3193 | | ld x, 1 | |
3194 | | } | |
3195 | = ok | |
3196 | ||
3197 | Here, we try to trash `x` before `goto`ing a routine that inputs `x`. | |
3198 | ||
3199 | | define bar routine | |
3200 | | inputs x | |
3201 | | trashes x, z, n | |
3202 | | { | |
3203 | | ld x, 200 | |
3204 | | } | |
3205 | | | |
3206 | | define main routine | |
3207 | | outputs a | |
3208 | | trashes x, z, n | |
3209 | | { | |
3210 | | ld x, 0 | |
3211 | | if z { | |
3212 | | trash x | |
3213 | | goto bar | |
3214 | | } else { | |
3215 | | trash x | |
3216 | | } | |
3217 | | ld a, 1 | |
3218 | | } | |
3219 | ? UnmeaningfulReadError: x | |
3220 | ||
3221 | Here, we declare that main outputs `a`, but we `goto` a routine that does not output `a`. | |
3222 | ||
3223 | | define bar routine | |
3224 | | inputs x | |
3225 | | trashes x, z, n | |
3226 | | { | |
3227 | | ld x, 200 | |
3228 | | } | |
3229 | | | |
3230 | | define main routine | |
3231 | | outputs a | |
3232 | | trashes x, z, n | |
3233 | | { | |
3234 | | ld x, 0 | |
3235 | | if z { | |
3236 | | ld x, 1 | |
3237 | | goto bar | |
3238 | | } else { | |
3239 | | ld x, 2 | |
3240 | | } | |
3241 | | ld a, 1 | |
3242 | | } | |
3243 | ? UnmeaningfulOutputError: a | |
3244 | ||
3245 | Here, we declare that main outputs a, and we goto a routine that outputs a so that's OK. | |
3246 | ||
3247 | | define bar routine | |
3248 | | inputs x | |
3249 | | outputs a | |
3250 | | trashes x, z, n | |
3251 | | { | |
3252 | | ld x, 200 | |
3253 | | ld a, 1 | |
3254 | | } | |
3255 | | | |
3256 | | define main routine | |
3257 | | outputs a | |
3258 | | trashes x, z, n | |
3259 | | { | |
3260 | | ld x, 0 | |
3261 | | if z { | |
3262 | | ld x, 1 | |
3263 | | goto bar | |
3264 | | } else { | |
3265 | | ld x, 2 | |
3266 | | } | |
3267 | | ld a, 1 | |
3268 | | } | |
3269 | = ok | |
3270 | ||
3271 | Here, we declare that main outputs `a`, and we `goto` two routines, and they both output `a`. | |
3272 | ||
3273 | | define bar0 routine | |
3274 | | inputs x | |
3275 | | outputs a | |
3276 | | trashes x, z, n | |
3277 | | { | |
3278 | | ld a, x | |
3279 | | } | |
3280 | | | |
3281 | | define bar1 routine | |
3282 | | inputs x | |
3283 | | outputs a | |
3284 | | trashes x, z, n | |
3285 | | { | |
3286 | | ld a, 200 | |
3287 | | } | |
3288 | | | |
3289 | | define main routine | |
3290 | | outputs a | |
3291 | | trashes x, z, n | |
3292 | | { | |
3293 | | ld x, 0 | |
3294 | | if z { | |
3295 | | ld x, 1 | |
3296 | | goto bar0 | |
3297 | | } else { | |
3298 | | ld x, 2 | |
3299 | | goto bar1 | |
3300 | | } | |
3301 | | } | |
3302 | = ok | |
3303 | ||
3304 | Here is like just above, but one routine doesn't output `a`. | |
3305 | ||
3306 | | define bar0 routine | |
3307 | | inputs x | |
3308 | | outputs a | |
3309 | | trashes x, z, n | |
3310 | | { | |
3311 | | ld a, x | |
3312 | | } | |
3313 | | | |
3314 | | define bar1 routine | |
3315 | | inputs x | |
3316 | | trashes x, z, n | |
3317 | | { | |
3318 | | ld x, 200 | |
3319 | | } | |
3320 | | | |
3321 | | define main routine | |
3322 | | outputs a | |
3323 | | trashes x, z, n | |
3324 | | { | |
3325 | | ld x, 0 | |
3326 | | if z { | |
3327 | | ld x, 1 | |
3328 | | goto bar0 | |
3329 | | } else { | |
3330 | | ld x, 2 | |
3331 | | goto bar1 | |
3332 | | } | |
3333 | | } | |
3334 | ? InconsistentExitError | |
3335 | ||
3336 | Here is like the above, but the two routines have different inputs, and that's OK. | |
3337 | ||
3338 | | define bar0 routine | |
3339 | | inputs x | |
3340 | | outputs a | |
3341 | | trashes x, z, n | |
3342 | | { | |
3343 | | ld a, x | |
3344 | | } | |
3345 | | | |
3346 | | define bar1 routine | |
3347 | | outputs a | |
3348 | | trashes x, z, n | |
3349 | | { | |
3350 | | ld a, 200 | |
3351 | | } | |
3352 | | | |
3353 | | define main routine | |
3354 | | outputs a | |
3355 | | trashes x, z, n | |
3356 | | { | |
3357 | | ld x, 0 | |
3358 | | if z { | |
3359 | | ld x, 1 | |
3360 | | goto bar0 | |
3361 | | } else { | |
3362 | | ld x, 2 | |
3363 | | goto bar1 | |
3364 | | } | |
3365 | | } | |
3366 | = ok | |
3367 | ||
3368 | TODO: we should have a lot more test cases for the above, here. | |
3115 | 3369 | |
3116 | 3370 | Can't `goto` a routine that outputs or trashes more than the current routine. |
3117 | 3371 |
550 | 550 | | } |
551 | 551 | = ok |
552 | 552 | |
553 | The label doesn't have to be defined yet at the point | |
554 | in the program text where it is `goto`d. | |
555 | ||
553 | 556 | | define main routine { |
554 | 557 | | goto foo |
555 | 558 | | } |
558 | 561 | | } |
559 | 562 | = ok |
560 | 563 | |
564 | Syntactically, you can `goto` a vector. | |
565 | ||
561 | 566 | | vector routine foo |
562 | 567 | | |
563 | 568 | | define main routine { |
565 | 570 | | } |
566 | 571 | = ok |
567 | 572 | |
573 | But you can't `goto` a label that never gets defined. | |
574 | ||
568 | 575 | | define main routine { |
569 | 576 | | goto foo |
570 | 577 | | } |
571 | 578 | ? SyntaxError |
579 | ||
580 | `goto` may only be the final instruction in a block. | |
581 | ||
582 | | define bar routine trashes x, z, n { | |
583 | | ld x, 200 | |
584 | | } | |
585 | | | |
586 | | define main routine trashes x, z, n { | |
587 | | goto bar | |
588 | | ld x, 0 | |
589 | | } | |
590 | ? Expected '}', but found 'ld' | |
572 | 591 | |
573 | 592 | Buffers and pointers. |
574 | 593 |