git @ Cat's Eye Technologies
Vastly simplify the fallthru analysis algorithm. Chris Pressey 4 years ago
3 changed file(s) with 79 addition(s) and 179 deletion(s).
 60 60 if options.optimize_fallthru: 61 61 from sixtypical.fallthru import FallthruAnalyzer 62 62 63 def dump(label, data): 63 def dump(data, label=None): 64 64 import json 65 65 if not options.dump_fallthru_info: 66 66 return 71 71 72 72 fa = FallthruAnalyzer(debug=options.debug) 73 73 fa.analyze_program(program) 74 dump(None, fa.fall_in_map) 75 76 fa.find_cycles() 77 while fa.cycles_found: 78 if options.debug: 79 dump('ancestors', fa.ancestor_map) 80 dump('cycles found', sorted(fa.cycles_found)) 81 fa.break_cycle() 82 dump('after breaking cycle', fa.fall_in_map) 83 fa.find_cycles() 84 85 74 routines_list = fa.serialize() 86 dump('serialization', routines_list) 75 dump(routines_list) 87 76 88 77 if options.analyze_only: 89 78 return
 2 2 from copy import copy 3 3 4 4 from sixtypical.model import RoutineType 5 6 7 def make_transitive_closure(d, key, s): 8 for sub in d.get(key, []): 9 if sub not in s: 10 s.add(sub) 11 make_transitive_closure(d, sub, s) 12 13 14 def find_chains(d, key, pred): 15 chains = [] 16 for sub in d.get(key, []): 17 if pred(sub): 18 subchains = find_chains(d, sub, pred) 19 for subchain in subchains: 20 chains.append([key] + subchain) 21 chains.append([key]) 22 return chains 23 5 24 6 25 7 class FallthruAnalyzer(object): 29 11 30 12 def analyze_program(self, program): 31 13 self.program = program 32 fall_in_map = {} 14 15 self.fallthru_map = {} 33 16 for routine in program.routines: 34 17 encountered_gotos = list(routine.encountered_gotos) 35 18 if len(encountered_gotos) == 1 and isinstance(encountered_gotos[0].type, RoutineType): 36 fall_in_map.setdefault(encountered_gotos[0].name, set()).add(routine.name) 37 self.fall_in_map = dict([(k, sorted(v)) for k, v in fall_in_map.iteritems()]) 38 return self.fall_in_map 19 self.fallthru_map[routine.name] = encountered_gotos[0].name 20 else: 21 self.fallthru_map[routine.name] = None 39 22 40 def find_cycles(self): 41 self.ancestor_map = {} 42 for key in self.fall_in_map: 43 ancestors = set() 44 make_transitive_closure(self.fall_in_map, key, ancestors) 45 self.ancestor_map[key] = sorted(ancestors) 46 47 self.cycles_found = set() 48 for key in self.ancestor_map: 49 if key in self.ancestor_map[key]: 50 self.cycles_found.add(key) 51 52 return self.cycles_found 53 54 def break_cycle(self): 55 cycle_to_break = sorted(self.cycles_found)[0] 56 cycles_to_break = set([cycle_to_break]) 57 58 new_fall_in_map = {} 59 for key in self.fall_in_map: 60 values = set(self.fall_in_map[key]) - cycles_to_break 61 if values: 62 new_fall_in_map[key] = sorted(values) 63 self.fall_in_map = new_fall_in_map 23 def find_chain(self, routine_name, available): 24 chain = [routine_name] 25 seen = set(chain) 26 while True: 27 next = self.fallthru_map.get(routine_name) 28 if next is None or next in seen or next not in available: 29 return chain 30 seen.add(next) 31 chain.append(next) 64 32 65 33 def serialize(self): 66 # NOTE, we can probably do this completely differently; 67 # construct the fall_out map 68 # construct fall_out chains 69 # sort these by length 70 # do the longest ones first 34 pending_routines = copy(self.fallthru_map) 35 roster = [] 71 36 72 pending_routines = sorted(self.fall_in_map.keys()) 73 routine_names = sorted([routine.name for routine in self.program.routines]) 74 for routine_name in routine_names: 75 if routine_name not in pending_routines: 76 pending_routines.append(routine_name) 37 main_chain = self.find_chain('main', pending_routines) 38 roster.append(main_chain) 39 for k in main_chain: 40 del pending_routines[k] 77 41 78 # make sure `main` appears first, whatever else may be the case. 79 pending_routines.remove('main') 80 pending_routines = ['main'] + pending_routines 81 82 roster = [] 83 42 while pending_routines: 84 # Pick a routine that is still pending to be serialized. 85 key = pending_routines[0] 86 87 in_set = self.fall_in_map.get(key, []) 88 89 # Find the longest chain of routines r1,r2,...rn in R 90 # where out(r1) = {r2}, out(r2} = {r3}, ... out(rn-1) = {rn}, and rn = r. 91 chains = find_chains(self.fall_in_map, key, lambda k: k in pending_routines) 43 chains = [self.find_chain(k, pending_routines) for k in pending_routines.keys()] 92 44 chains.sort(key=len, reverse=True) 93 routines = chains[0] 94 routines.reverse() 95 96 # Append (r1,r2,...,rn) to the roster and remove r1,r2,...rn from R. 97 # A sublist like this appearing in the roster has meaning 98 # "optimize the final goto out of all but the last routine in the sublist". 99 for r in routines: 100 pending_routines.remove(r) 101 roster.append(routines) 45 c = chains[0] 46 roster.append(c) 47 for k in c: 48 del pending_routines[k] 102 49 103 50 return roster
 17 17 A *fixed* routine means, a routine which is known at compile time, not a 18 18 `goto` through a vector. 19 19 20 Consider the set R of all routines in the program. 21 22 Every routine r1 ∈ R either potentially falls through to a single routine 23 r2 ∈ R (r2 ≠ r1) or it does not potentially fall through to any routine. 24 We can say out(r1) = {r2} or out(r1) = ∅. 25 26 Every routine r ∈ R in this set also has a set of zero or more 27 routines from which it is potentially falled through to by. Call this 28 in(r). It is the case that out(r1) = {r2} → r1 ∈ in(r2). 29 30 We can trace out the connections by following the in- or our- sets of 31 a given routine. Because each routine potentially falls through to only 32 a single routine, the structures we find will be tree-like, not DAG-like. 33 34 But they do permit cycles. 35 36 So, we first break those cycles. (Is there a "best" way to do this? 37 Perhaps. But for now, we just break them arbitrarily; pick a r1 that 38 has a cycle and remove it from in(r2) for all r2. This also means 39 that, now, out(r1) = ∅. Then check if there are still cycles, and keep 40 picking one and breaking it until there are no cycles remaining.) 41 42 We will be left with out() sets which are disjoint trees, i.e. 43 if r1 ∈ in(r2), then r1 ∉ in(r3) for all r3 ≠ r2. Also, 44 out(r1) = ∅ → for all r2, r1 ∉ in(r2). 45 46 We then follow an algorithm something like this. Treat R as a mutable 47 set and start with an empty list L. Then, 48 49 - Pick a routine r from R where out(r) = ∅. 50 - Find the longest chain of routines r1,r2,...rn in R where out(r1) = {r2}, 51 out(r2} = {r3}, ... out(rn-1) = {rn}, and rn = r. 52 - Remove (r1,r2,...,rn) from R and append them to L in that order. 53 Mark (r1,r2,...rn-1) as "will have their final `goto` removed." 54 - Repeat until R is empty. 20 Consider the set R of all available routines in the program. 21 22 Every routine either potentially falls through to a single other routine 23 or it does not potentially fall through to any routine. 24 25 More formally, we can say 26 27 fall : R → R ∪ {nil}, fall(r) ≠ r 28 29 where `nil` is an atom that represents no routine. 30 31 Now consider an operation chain() vaguely similar to a transitive closure 32 on fall(). Starting with r, we construct a list of r, fall(r), 33 fall(fall(r)), ... with the following restrictions: 34 35 - we stop when we reach `nil` (because fall(`nil`) is not defined) 36 - we stop when we see an element that is not in R. 37 - we stop when we see an element that we have already added to the 38 list (this is to prevent infinite lists due to cycles.) 39 40 With these definitions, our algorithm is something like this. 41 42 Treat R as a mutable set and start with an empty list of lists L. Then, 43 44 - For all r ∈ R, find all chain(r). 45 - Pick a longest such chain. Call it C. 46 - Append C to L. 47 - Remove all elements occurring in C, from R. 48 - Repeat until R is empty. 55 49 56 50 When times comes to generate code, generate it in the order given by L. 51 In addition, each sublist in L represents a number of routines to 52 generate; all except the final routine in such a sublist need not have 53 any jump instruction generated for its final `goto`. 54 55 The tests in this document test against the list L. 56 57 Note that this optimization is a feature of the SixtyPical's reference 58 compiler, not the language. So an implementation is not required 59 to pass these tests to be considered an implementation of SixtyPical. 57 60 58 61 [Falderal]: http://catseye.tc/node/Falderal 59 62 68 71 | define main routine 69 72 | { 70 73 | } 71 = {} 72 = *** serialization: 73 74 = [ 74 75 = [ 75 76 = "main" 87 88 | { 88 89 | goto foo 89 90 | } 90 = { 91 = "foo": [ 92 = "main" 93 = ] 94 = } 95 = *** serialization: 96 91 = [ 97 92 = [ 98 93 = "main", 121 116 | { 122 117 | goto foo 123 118 | } 124 = { 125 = "foo": [ 126 = "bar", 127 = "main" 128 = ] 129 = } 130 = *** serialization: 131 = [ 132 = [ 133 = "main" 134 = ], 135 = [ 136 = "bar", 137 = "foo" 119 = [ 120 = [ 121 = "main", 122 = "foo" 123 = ], 124 = [ 125 = "bar" 138 126 = ] 139 127 = ] 140 128 157 145 | define main routine trashes a, z, n 158 146 | { 159 147 | } 160 = { 161 = "bar": [ 162 = "foo" 163 = ], 164 = "foo": [ 165 = "bar" 166 = ] 167 = } 168 = *** cycles found: 169 = [ 170 = "bar", 171 = "foo" 172 = ] 173 = *** after breaking cycle: 174 = { 175 = "bar": [ 176 = "foo" 177 = ] 178 = } 179 = *** serialization: 180 = [ 181 = [ 182 = "main" 183 = ], 184 = [ 185 = "foo", 186 = "bar" 148 = [ 149 = [ 150 = "main" 151 = ], 152 = [ 153 = "bar", 154 = "foo" 187 155 = ] 188 156 = ] 189 157 209 177 | goto bar 210 178 | } 211 179 | } 212 = {} 213 = *** serialization: 214 180 = [ 215 181 = [ 216 182 = "main" 244 210 | copy bar, vec 245 211 | goto vec 246 212 | } 247 = {} 248 = *** serialization: 249 213 = [ 250 214 = [ 251 215 = "main"