Vastly simplify the fallthru analysis algorithm.
Chris Pressey
4 years ago
60 | 60 | if options.optimize_fallthru: |
61 | 61 | from sixtypical.fallthru import FallthruAnalyzer |
62 | 62 | |
63 | def dump(label, data): | |
63 | def dump(data, label=None): | |
64 | 64 | import json |
65 | 65 | if not options.dump_fallthru_info: |
66 | 66 | return |
71 | 71 | |
72 | 72 | fa = FallthruAnalyzer(debug=options.debug) |
73 | 73 | fa.analyze_program(program) |
74 | dump(None, fa.fall_in_map) | |
75 | ||
76 | fa.find_cycles() | |
77 | while fa.cycles_found: | |
78 | if options.debug: | |
79 | dump('ancestors', fa.ancestor_map) | |
80 | dump('cycles found', sorted(fa.cycles_found)) | |
81 | fa.break_cycle() | |
82 | dump('after breaking cycle', fa.fall_in_map) | |
83 | fa.find_cycles() | |
84 | ||
85 | 74 | routines_list = fa.serialize() |
86 | dump('serialization', routines_list) | |
75 | dump(routines_list) | |
87 | 76 | |
88 | 77 | if options.analyze_only: |
89 | 78 | return |
2 | 2 | from copy import copy |
3 | 3 | |
4 | 4 | from sixtypical.model import RoutineType |
5 | ||
6 | ||
7 | def make_transitive_closure(d, key, s): | |
8 | for sub in d.get(key, []): | |
9 | if sub not in s: | |
10 | s.add(sub) | |
11 | make_transitive_closure(d, sub, s) | |
12 | ||
13 | ||
14 | def find_chains(d, key, pred): | |
15 | chains = [] | |
16 | for sub in d.get(key, []): | |
17 | if pred(sub): | |
18 | subchains = find_chains(d, sub, pred) | |
19 | for subchain in subchains: | |
20 | chains.append([key] + subchain) | |
21 | chains.append([key]) | |
22 | return chains | |
23 | 5 | |
24 | 6 | |
25 | 7 | class FallthruAnalyzer(object): |
29 | 11 | |
30 | 12 | def analyze_program(self, program): |
31 | 13 | self.program = program |
32 | fall_in_map = {} | |
14 | ||
15 | self.fallthru_map = {} | |
33 | 16 | for routine in program.routines: |
34 | 17 | encountered_gotos = list(routine.encountered_gotos) |
35 | 18 | if len(encountered_gotos) == 1 and isinstance(encountered_gotos[0].type, RoutineType): |
36 | fall_in_map.setdefault(encountered_gotos[0].name, set()).add(routine.name) | |
37 | self.fall_in_map = dict([(k, sorted(v)) for k, v in fall_in_map.iteritems()]) | |
38 | return self.fall_in_map | |
19 | self.fallthru_map[routine.name] = encountered_gotos[0].name | |
20 | else: | |
21 | self.fallthru_map[routine.name] = None | |
39 | 22 | |
40 | def find_cycles(self): | |
41 | self.ancestor_map = {} | |
42 | for key in self.fall_in_map: | |
43 | ancestors = set() | |
44 | make_transitive_closure(self.fall_in_map, key, ancestors) | |
45 | self.ancestor_map[key] = sorted(ancestors) | |
46 | ||
47 | self.cycles_found = set() | |
48 | for key in self.ancestor_map: | |
49 | if key in self.ancestor_map[key]: | |
50 | self.cycles_found.add(key) | |
51 | ||
52 | return self.cycles_found | |
53 | ||
54 | def break_cycle(self): | |
55 | cycle_to_break = sorted(self.cycles_found)[0] | |
56 | cycles_to_break = set([cycle_to_break]) | |
57 | ||
58 | new_fall_in_map = {} | |
59 | for key in self.fall_in_map: | |
60 | values = set(self.fall_in_map[key]) - cycles_to_break | |
61 | if values: | |
62 | new_fall_in_map[key] = sorted(values) | |
63 | self.fall_in_map = new_fall_in_map | |
23 | def find_chain(self, routine_name, available): | |
24 | chain = [routine_name] | |
25 | seen = set(chain) | |
26 | while True: | |
27 | next = self.fallthru_map.get(routine_name) | |
28 | if next is None or next in seen or next not in available: | |
29 | return chain | |
30 | seen.add(next) | |
31 | chain.append(next) | |
64 | 32 | |
65 | 33 | def serialize(self): |
66 | # NOTE, we can probably do this completely differently; | |
67 | # construct the fall_out map | |
68 | # construct fall_out chains | |
69 | # sort these by length | |
70 | # do the longest ones first | |
34 | pending_routines = copy(self.fallthru_map) | |
35 | roster = [] | |
71 | 36 | |
72 | pending_routines = sorted(self.fall_in_map.keys()) | |
73 | routine_names = sorted([routine.name for routine in self.program.routines]) | |
74 | for routine_name in routine_names: | |
75 | if routine_name not in pending_routines: | |
76 | pending_routines.append(routine_name) | |
37 | main_chain = self.find_chain('main', pending_routines) | |
38 | roster.append(main_chain) | |
39 | for k in main_chain: | |
40 | del pending_routines[k] | |
77 | 41 | |
78 | # make sure `main` appears first, whatever else may be the case. | |
79 | pending_routines.remove('main') | |
80 | pending_routines = ['main'] + pending_routines | |
81 | ||
82 | roster = [] | |
83 | 42 | while pending_routines: |
84 | # Pick a routine that is still pending to be serialized. | |
85 | key = pending_routines[0] | |
86 | ||
87 | in_set = self.fall_in_map.get(key, []) | |
88 | ||
89 | # Find the longest chain of routines r1,r2,...rn in R | |
90 | # where out(r1) = {r2}, out(r2} = {r3}, ... out(rn-1) = {rn}, and rn = r. | |
91 | chains = find_chains(self.fall_in_map, key, lambda k: k in pending_routines) | |
43 | chains = [self.find_chain(k, pending_routines) for k in pending_routines.keys()] | |
92 | 44 | chains.sort(key=len, reverse=True) |
93 | routines = chains[0] | |
94 | routines.reverse() | |
95 | ||
96 | # Append (r1,r2,...,rn) to the roster and remove r1,r2,...rn from R. | |
97 | # A sublist like this appearing in the roster has meaning | |
98 | # "optimize the final goto out of all but the last routine in the sublist". | |
99 | for r in routines: | |
100 | pending_routines.remove(r) | |
101 | roster.append(routines) | |
45 | c = chains[0] | |
46 | roster.append(c) | |
47 | for k in c: | |
48 | del pending_routines[k] | |
102 | 49 | |
103 | 50 | return roster |
17 | 17 | A *fixed* routine means, a routine which is known at compile time, not a |
18 | 18 | `goto` through a vector. |
19 | 19 | |
20 | Consider the set R of all routines in the program. | |
21 | ||
22 | Every routine r1 ∈ R either potentially falls through to a single routine | |
23 | r2 ∈ R (r2 ≠ r1) or it does not potentially fall through to any routine. | |
24 | We can say out(r1) = {r2} or out(r1) = ∅. | |
25 | ||
26 | Every routine r ∈ R in this set also has a set of zero or more | |
27 | routines from which it is potentially falled through to by. Call this | |
28 | in(r). It is the case that out(r1) = {r2} → r1 ∈ in(r2). | |
29 | ||
30 | We can trace out the connections by following the in- or our- sets of | |
31 | a given routine. Because each routine potentially falls through to only | |
32 | a single routine, the structures we find will be tree-like, not DAG-like. | |
33 | ||
34 | But they do permit cycles. | |
35 | ||
36 | So, we first break those cycles. (Is there a "best" way to do this? | |
37 | Perhaps. But for now, we just break them arbitrarily; pick a r1 that | |
38 | has a cycle and remove it from in(r2) for all r2. This also means | |
39 | that, now, out(r1) = ∅. Then check if there are still cycles, and keep | |
40 | picking one and breaking it until there are no cycles remaining.) | |
41 | ||
42 | We will be left with out() sets which are disjoint trees, i.e. | |
43 | if r1 ∈ in(r2), then r1 ∉ in(r3) for all r3 ≠ r2. Also, | |
44 | out(r1) = ∅ → for all r2, r1 ∉ in(r2). | |
45 | ||
46 | We then follow an algorithm something like this. Treat R as a mutable | |
47 | set and start with an empty list L. Then, | |
48 | ||
49 | - Pick a routine r from R where out(r) = ∅. | |
50 | - Find the longest chain of routines r1,r2,...rn in R where out(r1) = {r2}, | |
51 | out(r2} = {r3}, ... out(rn-1) = {rn}, and rn = r. | |
52 | - Remove (r1,r2,...,rn) from R and append them to L in that order. | |
53 | Mark (r1,r2,...rn-1) as "will have their final `goto` removed." | |
54 | - Repeat until R is empty. | |
20 | Consider the set R of all available routines in the program. | |
21 | ||
22 | Every routine either potentially falls through to a single other routine | |
23 | or it does not potentially fall through to any routine. | |
24 | ||
25 | More formally, we can say | |
26 | ||
27 | fall : R → R ∪ {nil}, fall(r) ≠ r | |
28 | ||
29 | where `nil` is an atom that represents no routine. | |
30 | ||
31 | Now consider an operation chain() vaguely similar to a transitive closure | |
32 | on fall(). Starting with r, we construct a list of r, fall(r), | |
33 | fall(fall(r)), ... with the following restrictions: | |
34 | ||
35 | - we stop when we reach `nil` (because fall(`nil`) is not defined) | |
36 | - we stop when we see an element that is not in R. | |
37 | - we stop when we see an element that we have already added to the | |
38 | list (this is to prevent infinite lists due to cycles.) | |
39 | ||
40 | With these definitions, our algorithm is something like this. | |
41 | ||
42 | Treat R as a mutable set and start with an empty list of lists L. Then, | |
43 | ||
44 | - For all r ∈ R, find all chain(r). | |
45 | - Pick a longest such chain. Call it C. | |
46 | - Append C to L. | |
47 | - Remove all elements occurring in C, from R. | |
48 | - Repeat until R is empty. | |
55 | 49 | |
56 | 50 | When times comes to generate code, generate it in the order given by L. |
51 | In addition, each sublist in L represents a number of routines to | |
52 | generate; all except the final routine in such a sublist need not have | |
53 | any jump instruction generated for its final `goto`. | |
54 | ||
55 | The tests in this document test against the list L. | |
56 | ||
57 | Note that this optimization is a feature of the SixtyPical's reference | |
58 | compiler, not the language. So an implementation is not required | |
59 | to pass these tests to be considered an implementation of SixtyPical. | |
57 | 60 | |
58 | 61 | [Falderal]: http://catseye.tc/node/Falderal |
59 | 62 | |
68 | 71 | | define main routine |
69 | 72 | | { |
70 | 73 | | } |
71 | = {} | |
72 | = *** serialization: | |
73 | 74 | = [ |
74 | 75 | = [ |
75 | 76 | = "main" |
87 | 88 | | { |
88 | 89 | | goto foo |
89 | 90 | | } |
90 | = { | |
91 | = "foo": [ | |
92 | = "main" | |
93 | = ] | |
94 | = } | |
95 | = *** serialization: | |
96 | 91 | = [ |
97 | 92 | = [ |
98 | 93 | = "main", |
121 | 116 | | { |
122 | 117 | | goto foo |
123 | 118 | | } |
124 | = { | |
125 | = "foo": [ | |
126 | = "bar", | |
127 | = "main" | |
128 | = ] | |
129 | = } | |
130 | = *** serialization: | |
131 | = [ | |
132 | = [ | |
133 | = "main" | |
134 | = ], | |
135 | = [ | |
136 | = "bar", | |
137 | = "foo" | |
119 | = [ | |
120 | = [ | |
121 | = "main", | |
122 | = "foo" | |
123 | = ], | |
124 | = [ | |
125 | = "bar" | |
138 | 126 | = ] |
139 | 127 | = ] |
140 | 128 | |
157 | 145 | | define main routine trashes a, z, n |
158 | 146 | | { |
159 | 147 | | } |
160 | = { | |
161 | = "bar": [ | |
162 | = "foo" | |
163 | = ], | |
164 | = "foo": [ | |
165 | = "bar" | |
166 | = ] | |
167 | = } | |
168 | = *** cycles found: | |
169 | = [ | |
170 | = "bar", | |
171 | = "foo" | |
172 | = ] | |
173 | = *** after breaking cycle: | |
174 | = { | |
175 | = "bar": [ | |
176 | = "foo" | |
177 | = ] | |
178 | = } | |
179 | = *** serialization: | |
180 | = [ | |
181 | = [ | |
182 | = "main" | |
183 | = ], | |
184 | = [ | |
185 | = "foo", | |
186 | = "bar" | |
148 | = [ | |
149 | = [ | |
150 | = "main" | |
151 | = ], | |
152 | = [ | |
153 | = "bar", | |
154 | = "foo" | |
187 | 155 | = ] |
188 | 156 | = ] |
189 | 157 | |
209 | 177 | | goto bar |
210 | 178 | | } |
211 | 179 | | } |
212 | = {} | |
213 | = *** serialization: | |
214 | 180 | = [ |
215 | 181 | = [ |
216 | 182 | = "main" |
244 | 210 | | copy bar, vec |
245 | 211 | | goto vec |
246 | 212 | | } |
247 | = {} | |
248 | = *** serialization: | |
249 | 213 | = [ |
250 | 214 | = [ |
251 | 215 | = "main" |