git @ Cat's Eye Technologies SixtyPical / 92b1cfe
Implement tail-call optimization. Chris Pressey 2 years ago
4 changed file(s) with 116 addition(s) and 90 deletion(s). Raw diff Collapse all Expand all
1313 * Added `--prune-unreachable-routines` option, which causes
1414 the compiler to in fact omit routines determined to be
1515 unreachable as described above.
16 * Code generation now performs modest peephole optimization,
17 generating better code for `goto`s and `if` blocks at the
18 end of a routine.
16 * Code generation now performs modest peephole optimization
17 at the end of each routine. This results in better code
18 generation for constructs in tail position, notably
19 tail optimization of `calls`, but also for `goto`s and
20 `if` blocks at the end of a routine.
1921 * The `dcc6502-adapter` test adapter was updated to conform
2022 to the output of the latest version of `dcc6502`.
2123
8282 use up a word in zero-page, which we consider a precious resource, it allow those
8383 zero-page locations to be re-used.
8484
85 ### Tail-call optimization
86
87 If a block ends in a `call` can that be converted to end in a `goto`? Why not? I think it can,
88 if the block is in tail position. The constraints should iron out the same both ways.
89
90 As long as the routine has consistent type context every place it exits, that should be fine.
91
9285 Implementation
9386 --------------
9487
166166
167167 needs_rts = True
168168 last_op = self.emitter.get_tail()
169
170 if isinstance(last_op, JSR):
171 if isinstance(last_op.operand, Absolute):
172 if isinstance(last_op.operand.value, Label):
173 label = last_op.operand.value
174 self.emitter.retract()
175 self.emitter.emit(JMP(Absolute(label)))
176 last_op = self.emitter.get_tail()
177
169178 if isinstance(last_op, JMP):
170179 needs_rts = False
171180 if isinstance(last_op.operand, Absolute):
5050 | {
5151 | ld a, 65
5252 | call chrout
53 | ld a, 0
5354 | }
5455 = $080D LDA #$41
5556 = $080F JSR $FFD2
56 = $0812 RTS
57 = $0812 LDA #$00
58 = $0814 RTS
5759
5860 Call defined routine.
5961
7072 | trashes a, x, y, z, n
7173 | {
7274 | call foo
73 | }
74 = $080D JSR $0811
75 = $0810 RTS
76 = $0811 LDA #$00
77 = $0813 LDX #$00
78 = $0815 LDY #$00
79 = $0817 RTS
75 | ld a, 1
76 | }
77 = $080D JSR $0813
78 = $0810 LDA #$01
79 = $0812 RTS
80 = $0813 LDA #$00
81 = $0815 LDX #$00
82 = $0817 LDY #$00
83 = $0819 RTS
84
85 Tail call is optimized into a jump.
86
87 | define foo routine
88 | outputs a, x, y
89 | trashes z, n
90 | {
91 | ld a, 0
92 | ld x, 0
93 | ld y, 0
94 | }
95 |
96 | define main routine
97 | trashes a, x, y, z, n
98 | {
99 | ld a, 1
100 | call foo
101 | }
102 = $080D LDA #$01
103 = $080F JMP $0812
104 = $0812 LDA #$00
105 = $0814 LDX #$00
106 = $0816 LDY #$00
107 = $0818 RTS
80108
81109 Access a defined memory location.
82110
10521080 = $0848 STA $084D
10531081 = $084B RTS
10541082
1055 Indirect call.
1083 Indirect call. TODO: we don't need the final RTS here, omit it.
10561084
10571085 | vector routine
10581086 | outputs x
10731101 | copy bar, foo
10741102 | call foo
10751103 | }
1076 = $080D LDA #$1B
1077 = $080F STA $0822
1104 = $080D LDA #$1A
1105 = $080F STA $0821
10781106 = $0812 LDA #$08
1079 = $0814 STA $0823
1080 = $0817 JSR $081E
1081 = $081A RTS
1082 = $081B LDX #$C8
1083 = $081D RTS
1084 = $081E JMP ($0822)
1085 = $0821 RTS
1107 = $0814 STA $0822
1108 = $0817 JMP $081D
1109 = $081A LDX #$C8
1110 = $081C RTS
1111 = $081D JMP ($0821)
1112 = $0820 RTS
10861113
10871114 Compiling `goto`. Note that no `RTS` is emitted after the `JMP`.
10881115
11361163 | call one
11371164 | }
11381165 = $080D LDX #$00
1139 = $080F LDA #$3F
1140 = $0811 STA $0846
1166 = $080F LDA #$3E
1167 = $0811 STA $0845
11411168 = $0814 LDA #$08
1142 = $0816 STA $0847
1143 = $0819 LDA #$3F
1144 = $081B STA $0848,X
1169 = $0816 STA $0846
1170 = $0819 LDA #$3E
1171 = $081B STA $0847,X
11451172 = $081E LDA #$08
1146 = $0820 STA $0948,X
1147 = $0823 LDA $0846
1148 = $0826 STA $0848,X
1149 = $0829 LDA $0847
1150 = $082C STA $0948,X
1151 = $082F LDA $0848,X
1152 = $0832 STA $0846
1153 = $0835 LDA $0948,X
1154 = $0838 STA $0847
1155 = $083B JSR $0842
1156 = $083E RTS
1157 = $083F LDX #$C8
1158 = $0841 RTS
1159 = $0842 JMP ($0846)
1160 = $0845 RTS
1173 = $0820 STA $0947,X
1174 = $0823 LDA $0845
1175 = $0826 STA $0847,X
1176 = $0829 LDA $0846
1177 = $082C STA $0947,X
1178 = $082F LDA $0847,X
1179 = $0832 STA $0845
1180 = $0835 LDA $0947,X
1181 = $0838 STA $0846
1182 = $083B JMP $0841
1183 = $083E LDX #$C8
1184 = $0840 RTS
1185 = $0841 JMP ($0845)
1186 = $0844 RTS
11611187
11621188 Copying to and from a vector table, with constant offsets.
11631189
11871213 | call one
11881214 | }
11891215 = $080D LDX #$00
1190 = $080F LDA #$3F
1191 = $0811 STA $0846
1216 = $080F LDA #$3E
1217 = $0811 STA $0845
11921218 = $0814 LDA #$08
1193 = $0816 STA $0847
1194 = $0819 LDA #$3F
1195 = $081B STA $0849,X
1219 = $0816 STA $0846
1220 = $0819 LDA #$3E
1221 = $081B STA $0848,X
11961222 = $081E LDA #$08
1197 = $0820 STA $0949,X
1198 = $0823 LDA $0846
1199 = $0826 STA $084A,X
1200 = $0829 LDA $0847
1201 = $082C STA $094A,X
1202 = $082F LDA $084B,X
1203 = $0832 STA $0846
1204 = $0835 LDA $094B,X
1205 = $0838 STA $0847
1206 = $083B JSR $0842
1207 = $083E RTS
1208 = $083F LDX #$C8
1209 = $0841 RTS
1210 = $0842 JMP ($0846)
1211 = $0845 RTS
1223 = $0820 STA $0948,X
1224 = $0823 LDA $0845
1225 = $0826 STA $0849,X
1226 = $0829 LDA $0846
1227 = $082C STA $0949,X
1228 = $082F LDA $084A,X
1229 = $0832 STA $0845
1230 = $0835 LDA $094A,X
1231 = $0838 STA $0846
1232 = $083B JMP $0841
1233 = $083E LDX #$C8
1234 = $0840 RTS
1235 = $0841 JMP ($0845)
1236 = $0844 RTS
12121237
12131238 ### add, sub
12141239
16941719 | ld x, t
16951720 | call foo
16961721 | }
1697 = $080D LDX $081F
1698 = $0810 JSR $0814
1699 = $0813 RTS
1700 = $0814 STX $081E
1701 = $0817 INC $081E
1702 = $081A LDX $081E
1703 = $081D RTS
1704 = $081E .byte $FF
1705 = $081F .byte $07
1722 = $080D LDX $081E
1723 = $0810 JMP $0813
1724 = $0813 STX $081D
1725 = $0816 INC $081D
1726 = $0819 LDX $081D
1727 = $081C RTS
1728 = $081D .byte $FF
1729 = $081E .byte $07
17061730
17071731 Memory locations defined local dynamic to a routine are allocated
17081732 just the same as uninitialized global storage locations are.
17271751 | call foo
17281752 | }
17291753 = $080D LDX #$00
1730 = $080F STX $0821
1731 = $0812 JSR $0816
1732 = $0815 RTS
1733 = $0816 STX $0820
1734 = $0819 INC $0820
1735 = $081C LDX $0820
1736 = $081F RTS
1754 = $080F STX $0820
1755 = $0812 JMP $0815
1756 = $0815 STX $081F
1757 = $0818 INC $081F
1758 = $081B LDX $081F
1759 = $081E RTS
17371760
17381761 Memory locations defined local dynamic to a routine are allocated
17391762 just the same as uninitialized global storage locations are, even
17601783 | }
17611784 = $080D LDX #$00
17621785 = $080F STX $0401
1763 = $0812 JSR $0816
1764 = $0815 RTS
1765 = $0816 STX $0400
1766 = $0819 INC $0400
1767 = $081C LDX $0400
1768 = $081F RTS
1786 = $0812 JMP $0815
1787 = $0815 STX $0400
1788 = $0818 INC $0400
1789 = $081B LDX $0400
1790 = $081E RTS