git @ Cat's Eye Technologies SixtyPical / 70ba40b
Merge pull request #16 from catseye/goto-at-end-of-block goto only at end of block, not only in tail position Chris Pressey authored 6 years ago GitHub committed 6 years ago
6 changed file(s) with 419 addition(s) and 39 deletion(s). Raw diff Collapse all Expand all
33 0.18
44 ----
55
6 * Syntactically, `goto` may only appear at the end of a block.
7 It need no longer be the final instruction in a routine,
8 as long as the type context is consistent at every exit.
69 * `cmp` instruction can now perform a 16-bit unsigned comparison
710 of `word` memory locations (at the cost of trashing `a`.)
811 * Fixed pathological memory use in the lexical scanner - should
5454
5555 ### Tail-call optimization
5656
57 More generally, define a block as having zero or one `goto`s at the end. (and `goto`s cannot
58 appear elsewhere.)
59
6057 If a block ends in a `call` can that be converted to end in a `goto`? Why not? I think it can,
6158 if the block is in tail position. The constraints should iron out the same both ways.
6259
63 And - once we have this - why do we need `goto` to be in tail position, strictly?
6460 As long as the routine has consistent type context every place it exits, that should be fine.
6561
6662 ### "Include" directives
3333 pass
3434
3535
36 class InconsistentExitError(StaticAnalysisError):
37 """The type context differs at two different exit points of the routine."""
38 pass
39
40
3641 class ForbiddenWriteError(StaticAnalysisError):
3742 pass
3843
4247
4348
4449 class IllegalJumpError(StaticAnalysisError):
50 pass
51
52
53 class TerminatedContextError(StaticAnalysisError):
54 """What the program is doing here is not valid, due to preceding `goto`s,
55 which make this dead code."""
4556 pass
4657
4758
100111 self._touched = set()
101112 self._range = dict()
102113 self._writeable = set()
114 self._terminated = False
103115 self._gotos_encountered = set()
104116
105117 for ref in inputs:
131143 c._writeable = set(self._writeable)
132144 return c
133145
146 def update_from(self, other):
147 self.routines = other.routines
148 self.routine = other.routine
149 self._touched = set(other._touched)
150 self._range = dict(other._range)
151 self._writeable = set(other._writeable)
152 self._terminated = other._terminated
153 self._gotos_encounters = set(other._gotos_encountered)
154
134155 def each_meaningful(self):
135156 for ref in self._range.keys():
136157 yield ref
137158
138159 def each_touched(self):
139160 for ref in self._touched:
161 yield ref
162
163 def each_writeable(self):
164 for ref in self._writeable:
140165 yield ref
141166
142167 def assert_meaningful(self, *refs, **kwargs):
278303 def encountered_gotos(self):
279304 return self._gotos_encountered
280305
306 def set_terminated(self):
307 # Having a terminated context and having encountered gotos is not the same thing.
308 self._terminated = True
309
310 def has_terminated(self):
311 return self._terminated
312
281313 def assert_types_for_read_table(self, instr, src, dest, type_):
282314 if (not TableType.is_a_table_type(src.ref.type, type_)) or (not dest.type == type_):
283315 raise TypeMismatchError(instr, '{} and {}'.format(src.ref.name, dest.name))
363395
364396 def analyze_routine(self, routine):
365397 assert isinstance(routine, Routine)
366 self.current_routine = routine
367398 if routine.block is None:
368399 # it's an extern, that's fine
369400 return
401
402 self.current_routine = routine
370403 type_ = routine.location.type
371404 context = Context(self.routines, routine, type_.inputs, type_.outputs, type_.trashes)
405 self.exit_contexts = []
372406
373407 if self.debug:
374408 print("at start of routine `{}`:".format(routine.name))
375409 print(context)
376410
377411 self.analyze_block(routine.block, context)
378 trashed = set(context.each_touched()) - set(context.each_meaningful())
379412
380413 if self.debug:
381414 print("at end of routine `{}`:".format(routine.name))
389422 print('-' * 79)
390423 print('')
391424
392 # even if we goto another routine, we can't trash an output.
425 if self.exit_contexts:
426 # check that they are all consistent
427 exit_context = self.exit_contexts[0]
428 exit_meaningful = set(exit_context.each_meaningful())
429 exit_touched = set(exit_context.each_touched())
430 exit_writeable = set(exit_context.each_writeable())
431 for ex in self.exit_contexts[1:]:
432 if set(ex.each_meaningful()) != exit_meaningful:
433 raise InconsistentExitError("Exit contexts are not consistent")
434 if set(ex.each_touched()) != exit_touched:
435 raise InconsistentExitError("Exit contexts are not consistent")
436 if set(ex.each_writeable()) != exit_writeable:
437 raise InconsistentExitError("Exit contexts are not consistent")
438 context.update_from(exit_context)
439
440 trashed = set(context.each_touched()) - set(context.each_meaningful())
441
442 # these all apply whether we encountered goto(s) in this routine, or not...:
443
444 # can't trash an output.
393445 for ref in trashed:
394446 if ref in type_.outputs:
395447 raise UnmeaningfulOutputError(routine, ref.name)
396448
397 if not context.encountered_gotos():
398 for ref in type_.outputs:
399 context.assert_meaningful(ref, exception_class=UnmeaningfulOutputError)
400 for ref in context.each_touched():
401 if ref not in type_.outputs and ref not in type_.trashes and not routine_has_static(routine, ref):
402 raise ForbiddenWriteError(routine, ref.name)
449 # all outputs are meaningful.
450 for ref in type_.outputs:
451 context.assert_meaningful(ref, exception_class=UnmeaningfulOutputError)
452
453 # if something was touched, then it should have been declared to be writable.
454 for ref in context.each_touched():
455 if ref not in type_.outputs and ref not in type_.trashes and not routine_has_static(routine, ref):
456 raise ForbiddenWriteError(routine, ref.name)
457
458 self.exit_contexts = None
403459 self.current_routine = None
404460 return context
405461
432488 dest = instr.dest
433489 src = instr.src
434490
435 if context.encountered_gotos():
436 raise IllegalJumpError(instr, instr)
491 if context.has_terminated():
492 raise TerminatedContextError(instr, instr)
437493
438494 if opcode == 'ld':
439495 if isinstance(src, IndexedRef):
677733 self.assert_affected_within('trashes', type_, current_type)
678734
679735 context.encounter_gotos(set([instr.location]))
736
737 # Now that we have encountered a goto, we update the
738 # context here to match what someone calling the goto'ed
739 # function directly, would expect. (which makes sense
740 # when you think about it; if this goto's F, then calling
741 # this is like calling F, from the perspective of what is
742 # returned.)
743 #
744 # However, this isn't the current context anymore. This
745 # is an exit context of this routine.
746
747 exit_context = context.clone()
748
749 for ref in type_.outputs:
750 exit_context.set_touched(ref) # ?
751 exit_context.set_written(ref)
752
753 for ref in type_.trashes:
754 exit_context.assert_writeable(ref)
755 exit_context.set_touched(ref)
756 exit_context.set_unmeaningful(ref)
757
758 self.exit_contexts.append(exit_context)
759
760 # When we get to the end, we'll check that all the
761 # exit contexts are consistent with each other.
762
763 # We set the current context as having terminated.
764 # If we are in a branch, the merge will deal with
765 # having terminated. If we are at the end of the
766 # routine, the routine end will deal with that.
767
768 context.set_terminated()
769
680770 elif opcode == 'trash':
681771 context.set_touched(instr.dest)
682772 context.set_unmeaningful(instr.dest)
715805 message='initialized in block 2 but not in block 1 of `if {}`'.format(instr.src)
716806 )
717807
718 # merge the contexts. this used to be a method called `set_from`
719 context._touched = set(context1._touched) | set(context2._touched)
720 context.set_meaningful(*list(outgoing_meaningful))
721 context._writeable = set(context1._writeable) | set(context2._writeable)
808 # merge the contexts.
809
810 # first, the easy case: if one of the contexts has terminated, just use the other one.
811 # if both have terminated, we return a terminated context, and that's OK.
812
813 if context1.has_terminated():
814 context.update_from(context2)
815 elif context2.has_terminated():
816 context.update_from(context1)
817 else:
818 # the more complicated case: merge the contents of the contexts.
819 context._touched = set(context1._touched) | set(context2._touched)
820 context.set_meaningful(*list(outgoing_meaningful))
821 context._writeable = set(context1._writeable) | set(context2._writeable)
822
823 # in both cases, we need to merge the encountered gotos, in order that
824 # fallthru optimization continues to work correctly.
722825 context.encounter_gotos(context1.encountered_gotos() | context2.encountered_gotos())
723826
724827 for ref in outgoing_trashes:
731834 self.analyze_block(instr.block, context)
732835 if instr.src is not None: # None indicates 'repeat forever'
733836 context.assert_meaningful(instr.src)
837
838 if context.encountered_gotos():
839 raise IllegalJumpError(instr, instr)
734840
735841 # now analyze it having been executed a second time, with the context
736842 # of it having already been executed.
379379 self.scanner.expect('{')
380380 while not self.scanner.on('}'):
381381 instrs.append(self.instr())
382 if isinstance(instrs[-1], SingleOp) and instrs[-1].opcode == 'goto':
383 break
382384 self.scanner.expect('}')
383385 return Block(self.scanner.line_number, instrs=instrs)
384386
23252325 | }
23262326 ? TypeMismatchError
23272327
2328 A `goto` cannot appear within a `save` block, even if it is otherwise in tail position.
2328 A `goto` cannot appear within a `save` block.
23292329
23302330 | define other routine
23312331 | trashes a, z, n
23702370 | }
23712371 = ok
23722372
2373 A `goto` cannot appear within a `with interrupts` block, even if it is
2374 otherwise in tail position.
2373 A `goto` cannot appear within a `with interrupts` block.
23752374
23762375 | vector routine
23772376 | inputs x
30183017 | }
30193018 ? UnmeaningfulOutputError: x
30203019
3021 `goto`, if present, must be in tail position (the final instruction in a routine.)
3020 For now at least, you cannot have a `goto` inside a `repeat` loop.
30223021
30233022 | define bar routine trashes x, z, n {
30243023 | ld x, 200
30263025 |
30273026 | define main routine trashes x, z, n {
30283027 | ld x, 0
3029 | goto bar
3030 | }
3031 = ok
3028 | repeat {
3029 | inc x
3030 | goto bar
3031 | } until z
3032 | }
3033 ? IllegalJumpError
3034
3035 `goto`, as a matter of syntax, can only appear at the end
3036 of a block; but it need not be the final instruction in a
3037 routine.
30323038
30333039 | define bar routine trashes x, z, n {
30343040 | ld x, 200
30353041 | }
30363042 |
30373043 | define main routine trashes x, z, n {
3044 | ld x, 0
30383045 | goto bar
3039 | ld x, 0
3040 | }
3041 ? IllegalJumpError
3046 | }
3047 = ok
30423048
30433049 | define bar routine trashes x, z, n {
30443050 | ld x, 200
30633069 | ld x, 1
30643070 | goto bar
30653071 | }
3066 | ld x, 0
3067 | }
3068 ? IllegalJumpError
3072 | goto bar
3073 | }
3074 = ok
3075
3076 | define bar routine trashes x, z, n {
3077 | ld x, 200
3078 | }
3079 |
3080 | define main routine trashes x, z, n {
3081 | ld x, 0
3082 | if z {
3083 | ld x, 1
3084 | goto bar
3085 | }
3086 | ld x, 0
3087 | }
3088 = ok
30693089
30703090 | define bar routine trashes x, z, n {
30713091 | ld x, 200
30983118 | }
30993119 = ok
31003120
3101 For the purposes of `goto`, the end of a loop is never tail position.
3102
31033121 | define bar routine trashes x, z, n {
31043122 | ld x, 200
31053123 | }
31063124 |
31073125 | define main routine trashes x, z, n {
31083126 | ld x, 0
3109 | repeat {
3110 | inc x
3127 | if z {
3128 | ld x, 1
31113129 | goto bar
3112 | } until z
3113 | }
3114 ? IllegalJumpError
3130 | } else {
3131 | ld x, 0
3132 | }
3133 | ld x, 0
3134 | }
3135 = ok
3136
3137 | define bar routine trashes x, z, n {
3138 | ld x, 200
3139 | }
3140 |
3141 | define main routine trashes x, z, n {
3142 | ld x, 0
3143 | if z {
3144 | ld x, 1
3145 | goto bar
3146 | } else {
3147 | ld x, 0
3148 | }
3149 | goto bar
3150 | }
3151 = ok
3152
3153 Even though `goto` can only appear at the end of a block,
3154 you can still wind up with dead code; the analysis detects
3155 this.
3156
3157 | define bar routine trashes x, z, n {
3158 | ld x, 200
3159 | }
3160 |
3161 | define main routine trashes x, z, n {
3162 | ld x, 0
3163 | if z {
3164 | ld x, 1
3165 | goto bar
3166 | } else {
3167 | ld x, 0
3168 | goto bar
3169 | }
3170 | ld x, 100
3171 | }
3172 ? TerminatedContextError
3173
3174 It is important that the type context at every
3175 `goto` is compatible with the type context at the end of
3176 the routine.
3177
3178 | define bar routine
3179 | inputs x
3180 | trashes x, z, n
3181 | {
3182 | ld x, 200
3183 | }
3184 |
3185 | define main routine trashes x, z, n {
3186 | ld x, 0
3187 | if z {
3188 | ld x, 1
3189 | goto bar
3190 | } else {
3191 | ld x, 0
3192 | }
3193 | ld x, 1
3194 | }
3195 = ok
3196
3197 Here, we try to trash `x` before `goto`ing a routine that inputs `x`.
3198
3199 | define bar routine
3200 | inputs x
3201 | trashes x, z, n
3202 | {
3203 | ld x, 200
3204 | }
3205 |
3206 | define main routine
3207 | outputs a
3208 | trashes x, z, n
3209 | {
3210 | ld x, 0
3211 | if z {
3212 | trash x
3213 | goto bar
3214 | } else {
3215 | trash x
3216 | }
3217 | ld a, 1
3218 | }
3219 ? UnmeaningfulReadError: x
3220
3221 Here, we declare that main outputs `a`, but we `goto` a routine that does not output `a`.
3222
3223 | define bar routine
3224 | inputs x
3225 | trashes x, z, n
3226 | {
3227 | ld x, 200
3228 | }
3229 |
3230 | define main routine
3231 | outputs a
3232 | trashes x, z, n
3233 | {
3234 | ld x, 0
3235 | if z {
3236 | ld x, 1
3237 | goto bar
3238 | } else {
3239 | ld x, 2
3240 | }
3241 | ld a, 1
3242 | }
3243 ? UnmeaningfulOutputError: a
3244
3245 Here, we declare that main outputs a, and we goto a routine that outputs a so that's OK.
3246
3247 | define bar routine
3248 | inputs x
3249 | outputs a
3250 | trashes x, z, n
3251 | {
3252 | ld x, 200
3253 | ld a, 1
3254 | }
3255 |
3256 | define main routine
3257 | outputs a
3258 | trashes x, z, n
3259 | {
3260 | ld x, 0
3261 | if z {
3262 | ld x, 1
3263 | goto bar
3264 | } else {
3265 | ld x, 2
3266 | }
3267 | ld a, 1
3268 | }
3269 = ok
3270
3271 Here, we declare that main outputs `a`, and we `goto` two routines, and they both output `a`.
3272
3273 | define bar0 routine
3274 | inputs x
3275 | outputs a
3276 | trashes x, z, n
3277 | {
3278 | ld a, x
3279 | }
3280 |
3281 | define bar1 routine
3282 | inputs x
3283 | outputs a
3284 | trashes x, z, n
3285 | {
3286 | ld a, 200
3287 | }
3288 |
3289 | define main routine
3290 | outputs a
3291 | trashes x, z, n
3292 | {
3293 | ld x, 0
3294 | if z {
3295 | ld x, 1
3296 | goto bar0
3297 | } else {
3298 | ld x, 2
3299 | goto bar1
3300 | }
3301 | }
3302 = ok
3303
3304 Here is like just above, but one routine doesn't output `a`.
3305
3306 | define bar0 routine
3307 | inputs x
3308 | outputs a
3309 | trashes x, z, n
3310 | {
3311 | ld a, x
3312 | }
3313 |
3314 | define bar1 routine
3315 | inputs x
3316 | trashes x, z, n
3317 | {
3318 | ld x, 200
3319 | }
3320 |
3321 | define main routine
3322 | outputs a
3323 | trashes x, z, n
3324 | {
3325 | ld x, 0
3326 | if z {
3327 | ld x, 1
3328 | goto bar0
3329 | } else {
3330 | ld x, 2
3331 | goto bar1
3332 | }
3333 | }
3334 ? InconsistentExitError
3335
3336 Here is like the above, but the two routines have different inputs, and that's OK.
3337
3338 | define bar0 routine
3339 | inputs x
3340 | outputs a
3341 | trashes x, z, n
3342 | {
3343 | ld a, x
3344 | }
3345 |
3346 | define bar1 routine
3347 | outputs a
3348 | trashes x, z, n
3349 | {
3350 | ld a, 200
3351 | }
3352 |
3353 | define main routine
3354 | outputs a
3355 | trashes x, z, n
3356 | {
3357 | ld x, 0
3358 | if z {
3359 | ld x, 1
3360 | goto bar0
3361 | } else {
3362 | ld x, 2
3363 | goto bar1
3364 | }
3365 | }
3366 = ok
3367
3368 TODO: we should have a lot more test cases for the above, here.
31153369
31163370 Can't `goto` a routine that outputs or trashes more than the current routine.
31173371
550550 | }
551551 = ok
552552
553 The label doesn't have to be defined yet at the point
554 in the program text where it is `goto`d.
555
553556 | define main routine {
554557 | goto foo
555558 | }
558561 | }
559562 = ok
560563
564 Syntactically, you can `goto` a vector.
565
561566 | vector routine foo
562567 |
563568 | define main routine {
565570 | }
566571 = ok
567572
573 But you can't `goto` a label that never gets defined.
574
568575 | define main routine {
569576 | goto foo
570577 | }
571578 ? SyntaxError
579
580 `goto` may only be the final instruction in a block.
581
582 | define bar routine trashes x, z, n {
583 | ld x, 200
584 | }
585 |
586 | define main routine trashes x, z, n {
587 | goto bar
588 | ld x, 0
589 | }
590 ? Expected '}', but found 'ld'
572591
573592 Buffers and pointers.
574593