git @ Cat's Eye Technologies SixtyPical / ac24f98
Vastly simplify the fallthru analysis algorithm. Chris Pressey 3 years ago
3 changed file(s) with 79 addition(s) and 179 deletion(s). Raw diff Collapse all Expand all
6060 if options.optimize_fallthru:
6161 from sixtypical.fallthru import FallthruAnalyzer
6262
63 def dump(label, data):
63 def dump(data, label=None):
6464 import json
6565 if not options.dump_fallthru_info:
6666 return
7171
7272 fa = FallthruAnalyzer(debug=options.debug)
7373 fa.analyze_program(program)
74 dump(None, fa.fall_in_map)
75
76 fa.find_cycles()
77 while fa.cycles_found:
78 if options.debug:
79 dump('ancestors', fa.ancestor_map)
80 dump('cycles found', sorted(fa.cycles_found))
81 fa.break_cycle()
82 dump('after breaking cycle', fa.fall_in_map)
83 fa.find_cycles()
84
8574 routines_list = fa.serialize()
86 dump('serialization', routines_list)
75 dump(routines_list)
8776
8877 if options.analyze_only:
8978 return
22 from copy import copy
33
44 from sixtypical.model import RoutineType
5
6
7 def make_transitive_closure(d, key, s):
8 for sub in d.get(key, []):
9 if sub not in s:
10 s.add(sub)
11 make_transitive_closure(d, sub, s)
12
13
14 def find_chains(d, key, pred):
15 chains = []
16 for sub in d.get(key, []):
17 if pred(sub):
18 subchains = find_chains(d, sub, pred)
19 for subchain in subchains:
20 chains.append([key] + subchain)
21 chains.append([key])
22 return chains
235
246
257 class FallthruAnalyzer(object):
2911
3012 def analyze_program(self, program):
3113 self.program = program
32 fall_in_map = {}
14
15 self.fallthru_map = {}
3316 for routine in program.routines:
3417 encountered_gotos = list(routine.encountered_gotos)
3518 if len(encountered_gotos) == 1 and isinstance(encountered_gotos[0].type, RoutineType):
36 fall_in_map.setdefault(encountered_gotos[0].name, set()).add(routine.name)
37 self.fall_in_map = dict([(k, sorted(v)) for k, v in fall_in_map.iteritems()])
38 return self.fall_in_map
19 self.fallthru_map[routine.name] = encountered_gotos[0].name
20 else:
21 self.fallthru_map[routine.name] = None
3922
40 def find_cycles(self):
41 self.ancestor_map = {}
42 for key in self.fall_in_map:
43 ancestors = set()
44 make_transitive_closure(self.fall_in_map, key, ancestors)
45 self.ancestor_map[key] = sorted(ancestors)
46
47 self.cycles_found = set()
48 for key in self.ancestor_map:
49 if key in self.ancestor_map[key]:
50 self.cycles_found.add(key)
51
52 return self.cycles_found
53
54 def break_cycle(self):
55 cycle_to_break = sorted(self.cycles_found)[0]
56 cycles_to_break = set([cycle_to_break])
57
58 new_fall_in_map = {}
59 for key in self.fall_in_map:
60 values = set(self.fall_in_map[key]) - cycles_to_break
61 if values:
62 new_fall_in_map[key] = sorted(values)
63 self.fall_in_map = new_fall_in_map
23 def find_chain(self, routine_name, available):
24 chain = [routine_name]
25 seen = set(chain)
26 while True:
27 next = self.fallthru_map.get(routine_name)
28 if next is None or next in seen or next not in available:
29 return chain
30 seen.add(next)
31 chain.append(next)
6432
6533 def serialize(self):
66 # NOTE, we can probably do this completely differently;
67 # construct the fall_out map
68 # construct fall_out chains
69 # sort these by length
70 # do the longest ones first
34 pending_routines = copy(self.fallthru_map)
35 roster = []
7136
72 pending_routines = sorted(self.fall_in_map.keys())
73 routine_names = sorted([routine.name for routine in self.program.routines])
74 for routine_name in routine_names:
75 if routine_name not in pending_routines:
76 pending_routines.append(routine_name)
37 main_chain = self.find_chain('main', pending_routines)
38 roster.append(main_chain)
39 for k in main_chain:
40 del pending_routines[k]
7741
78 # make sure `main` appears first, whatever else may be the case.
79 pending_routines.remove('main')
80 pending_routines = ['main'] + pending_routines
81
82 roster = []
8342 while pending_routines:
84 # Pick a routine that is still pending to be serialized.
85 key = pending_routines[0]
86
87 in_set = self.fall_in_map.get(key, [])
88
89 # Find the longest chain of routines r1,r2,...rn in R
90 # where out(r1) = {r2}, out(r2} = {r3}, ... out(rn-1) = {rn}, and rn = r.
91 chains = find_chains(self.fall_in_map, key, lambda k: k in pending_routines)
43 chains = [self.find_chain(k, pending_routines) for k in pending_routines.keys()]
9244 chains.sort(key=len, reverse=True)
93 routines = chains[0]
94 routines.reverse()
95
96 # Append (r1,r2,...,rn) to the roster and remove r1,r2,...rn from R.
97 # A sublist like this appearing in the roster has meaning
98 # "optimize the final goto out of all but the last routine in the sublist".
99 for r in routines:
100 pending_routines.remove(r)
101 roster.append(routines)
45 c = chains[0]
46 roster.append(c)
47 for k in c:
48 del pending_routines[k]
10249
10350 return roster
1717 A *fixed* routine means, a routine which is known at compile time, not a
1818 `goto` through a vector.
1919
20 Consider the set R of all routines in the program.
21
22 Every routine r1 ∈ R either potentially falls through to a single routine
23 r2 ∈ R (r2 ≠ r1) or it does not potentially fall through to any routine.
24 We can say out(r1) = {r2} or out(r1) = ∅.
25
26 Every routine r ∈ R in this set also has a set of zero or more
27 routines from which it is potentially falled through to by. Call this
28 in(r). It is the case that out(r1) = {r2} → r1 ∈ in(r2).
29
30 We can trace out the connections by following the in- or our- sets of
31 a given routine. Because each routine potentially falls through to only
32 a single routine, the structures we find will be tree-like, not DAG-like.
33
34 But they do permit cycles.
35
36 So, we first break those cycles. (Is there a "best" way to do this?
37 Perhaps. But for now, we just break them arbitrarily; pick a r1 that
38 has a cycle and remove it from in(r2) for all r2. This also means
39 that, now, out(r1) = ∅. Then check if there are still cycles, and keep
40 picking one and breaking it until there are no cycles remaining.)
41
42 We will be left with out() sets which are disjoint trees, i.e.
43 if r1 ∈ in(r2), then r1 ∉ in(r3) for all r3 ≠ r2. Also,
44 out(r1) = ∅ → for all r2, r1 ∉ in(r2).
45
46 We then follow an algorithm something like this. Treat R as a mutable
47 set and start with an empty list L. Then,
48
49 - Pick a routine r from R where out(r) = ∅.
50 - Find the longest chain of routines r1,r2,...rn in R where out(r1) = {r2},
51 out(r2} = {r3}, ... out(rn-1) = {rn}, and rn = r.
52 - Remove (r1,r2,...,rn) from R and append them to L in that order.
53 Mark (r1,r2,...rn-1) as "will have their final `goto` removed."
54 - Repeat until R is empty.
20 Consider the set R of all available routines in the program.
21
22 Every routine either potentially falls through to a single other routine
23 or it does not potentially fall through to any routine.
24
25 More formally, we can say
26
27 fall : R → R ∪ {nil}, fall(r) ≠ r
28
29 where `nil` is an atom that represents no routine.
30
31 Now consider an operation chain() vaguely similar to a transitive closure
32 on fall(). Starting with r, we construct a list of r, fall(r),
33 fall(fall(r)), ... with the following restrictions:
34
35 - we stop when we reach `nil` (because fall(`nil`) is not defined)
36 - we stop when we see an element that is not in R.
37 - we stop when we see an element that we have already added to the
38 list (this is to prevent infinite lists due to cycles.)
39
40 With these definitions, our algorithm is something like this.
41
42 Treat R as a mutable set and start with an empty list of lists L. Then,
43
44 - For all r ∈ R, find all chain(r).
45 - Pick a longest such chain. Call it C.
46 - Append C to L.
47 - Remove all elements occurring in C, from R.
48 - Repeat until R is empty.
5549
5650 When times comes to generate code, generate it in the order given by L.
51 In addition, each sublist in L represents a number of routines to
52 generate; all except the final routine in such a sublist need not have
53 any jump instruction generated for its final `goto`.
54
55 The tests in this document test against the list L.
56
57 Note that this optimization is a feature of the SixtyPical's reference
58 compiler, not the language. So an implementation is not required
59 to pass these tests to be considered an implementation of SixtyPical.
5760
5861 [Falderal]: http://catseye.tc/node/Falderal
5962
6871 | define main routine
6972 | {
7073 | }
71 = {}
72 = *** serialization:
7374 = [
7475 = [
7576 = "main"
8788 | {
8889 | goto foo
8990 | }
90 = {
91 = "foo": [
92 = "main"
93 = ]
94 = }
95 = *** serialization:
9691 = [
9792 = [
9893 = "main",
121116 | {
122117 | goto foo
123118 | }
124 = {
125 = "foo": [
126 = "bar",
127 = "main"
128 = ]
129 = }
130 = *** serialization:
131 = [
132 = [
133 = "main"
134 = ],
135 = [
136 = "bar",
137 = "foo"
119 = [
120 = [
121 = "main",
122 = "foo"
123 = ],
124 = [
125 = "bar"
138126 = ]
139127 = ]
140128
157145 | define main routine trashes a, z, n
158146 | {
159147 | }
160 = {
161 = "bar": [
162 = "foo"
163 = ],
164 = "foo": [
165 = "bar"
166 = ]
167 = }
168 = *** cycles found:
169 = [
170 = "bar",
171 = "foo"
172 = ]
173 = *** after breaking cycle:
174 = {
175 = "bar": [
176 = "foo"
177 = ]
178 = }
179 = *** serialization:
180 = [
181 = [
182 = "main"
183 = ],
184 = [
185 = "foo",
186 = "bar"
148 = [
149 = [
150 = "main"
151 = ],
152 = [
153 = "bar",
154 = "foo"
187155 = ]
188156 = ]
189157
209177 | goto bar
210178 | }
211179 | }
212 = {}
213 = *** serialization:
214180 = [
215181 = [
216182 = "main"
244210 | copy bar, vec
245211 | goto vec
246212 | }
247 = {}
248 = *** serialization:
249213 = [
250214 = [
251215 = "main"