More sophisticated static analysis: writeable, touched, meaningful.
Chris Pressey
9 years ago
7 | 7 | ) |
8 | 8 | |
9 | 9 | |
10 | UNINITIALIZED = 'UNINITIALIZED' | |
11 | INITIALIZED = 'INITIALIZED' | |
12 | ||
13 | ||
14 | 10 | class StaticAnalysisError(ValueError): |
15 | 11 | pass |
16 | 12 | |
40 | 36 | |
41 | 37 | |
42 | 38 | class Context(): |
39 | """ | |
40 | A location is touched if it was changed (or even potentially | |
41 | changed) during this routine, or some routine called by this routine. | |
42 | ||
43 | A location is meaningful if it was an input to this routine, | |
44 | or if it was set to a meaningful value by some operation in this | |
45 | routine (or some routine called by this routine. | |
46 | ||
47 | A location is writeable if it was listed in the outputs and trashes | |
48 | lists of this routine. | |
49 | """ | |
43 | 50 | def __init__(self, inputs, outputs, trashes): |
44 | self._store = {} # Ref -> INITALIZED/UNINITIALIZED | |
45 | self._writeables = set() | |
51 | self._touched = set() | |
52 | self._meaningful = set() | |
53 | self._writeable = set() | |
46 | 54 | |
47 | 55 | for ref in inputs: |
48 | self._store.setdefault(ref, INITIALIZED) | |
56 | self._meaningful.add(ref) | |
49 | 57 | output_names = set() |
50 | 58 | for ref in outputs: |
51 | 59 | output_names.add(ref.name) |
52 | self._store.setdefault(ref, UNINITIALIZED) | |
53 | self._writeables.add(ref.name) | |
60 | self._writeable.add(ref) | |
54 | 61 | for ref in trashes: |
55 | 62 | if ref.name in output_names: |
56 | 63 | raise UsageClashError(ref.name) |
57 | self._store.setdefault(ref, UNINITIALIZED) | |
58 | self._writeables.add(ref.name) | |
64 | self._writeable.add(ref) | |
59 | 65 | |
60 | 66 | def clone(self): |
61 | 67 | c = Context([], [], []) |
62 | c._store = dict(self._store) | |
63 | c._writeables = set(self._writeables) | |
68 | c._touched = set(self._touched) | |
69 | c._meaningful = set(self._meaningful) | |
70 | c._writeable = set(self._writeable) | |
64 | 71 | return c |
65 | 72 | |
66 | 73 | def set_from(self, c): |
67 | self._store = dict(c._store) | |
68 | self._writeables = set(c._writeables) | |
69 | ||
70 | def each_initialized(self): | |
71 | for key, value in self._store.iteritems(): | |
72 | if value == INITIALIZED: | |
73 | yield key | |
74 | ||
75 | def assert_initialized(self, *refs, **kwargs): | |
74 | self._touched = set(c._touched) | |
75 | self._meaningful = set(c._meaningful) | |
76 | self._writeable = set(c._writeable) | |
77 | ||
78 | def each_meaningful(self): | |
79 | for ref in self._meaningful: | |
80 | yield ref | |
81 | ||
82 | def each_touched(self): | |
83 | for ref in self._touched: | |
84 | yield ref | |
85 | ||
86 | def assert_meaningful(self, *refs, **kwargs): | |
76 | 87 | exception_class = kwargs.get('exception_class', UninitializedAccessError) |
77 | 88 | for ref in refs: |
78 | 89 | if isinstance(ref, ConstantRef): |
79 | 90 | pass |
80 | 91 | elif isinstance(ref, LocationRef): |
81 | if self.get(ref) != INITIALIZED: | |
92 | if ref not in self._meaningful: | |
82 | 93 | raise exception_class(ref.name) |
83 | 94 | else: |
84 | 95 | raise ValueError(ref) |
85 | 96 | |
86 | 97 | def assert_writeable(self, *refs): |
87 | 98 | for ref in refs: |
88 | if ref.name not in self._writeables: | |
99 | if ref not in self._writeable: | |
89 | 100 | raise IllegalWriteError(ref.name) |
90 | 101 | |
91 | def set_initialized(self, *refs): | |
92 | for ref in refs: | |
93 | self.set(ref, INITIALIZED) | |
94 | ||
95 | def set_uninitialized(self, *refs): | |
96 | for ref in refs: | |
97 | self.set(ref, UNINITIALIZED) | |
98 | ||
99 | def get(self, ref): | |
100 | if isinstance(ref, ConstantRef): | |
101 | return INITIALIZED | |
102 | elif isinstance(ref, LocationRef): | |
103 | if ref not in self._store: | |
104 | return UNINITIALIZED | |
105 | return self._store[ref] | |
106 | else: | |
107 | raise ValueError(ref) | |
108 | ||
109 | def set(self, ref, value): | |
110 | assert isinstance(ref, LocationRef) | |
111 | self._store[ref] = value | |
112 | ||
102 | def set_touched(self, *refs): | |
103 | for ref in refs: | |
104 | self._touched.add(ref) | |
105 | ||
106 | def set_meaningful(self, *refs): | |
107 | for ref in refs: | |
108 | self._meaningful.add(ref) | |
109 | ||
110 | def set_unmeaningful(self, *refs): | |
111 | for ref in refs: | |
112 | if ref in self._meaningful: | |
113 | self._meaningful.remove(ref) | |
114 | ||
115 | def set_written(self, *refs): | |
116 | """A "helper" method which does the following common sequence for | |
117 | the given refs: asserts they're all writable, and sets them all | |
118 | as touched and meaningful.""" | |
119 | self.assert_writeable(*refs) | |
120 | self.set_touched(*refs) | |
121 | self.set_meaningful(*refs) | |
113 | 122 | |
114 | 123 | def analyze_program(program): |
115 | 124 | assert isinstance(program, Program) |
126 | 135 | context = Context(routine.inputs, routine.outputs, routine.trashes) |
127 | 136 | analyze_block(routine.block, context, routines) |
128 | 137 | for ref in routine.outputs: |
129 | context.assert_initialized(ref, exception_class=UninitializedOutputError) | |
138 | context.assert_meaningful(ref, exception_class=UninitializedOutputError) | |
139 | for ref in context.each_touched(): | |
140 | if ref not in routine.outputs and ref not in routine.trashes: | |
141 | raise IllegalWriteError(ref.name) | |
130 | 142 | |
131 | 143 | |
132 | 144 | def analyze_block(block, context, routines): |
149 | 161 | raise TypeMismatchError((src, dest)) |
150 | 162 | elif src.type != dest.type: |
151 | 163 | raise TypeMismatchError((src, dest)) |
152 | context.assert_initialized(src) | |
153 | context.assert_writeable(dest, FLAG_Z, FLAG_N) | |
154 | context.set_initialized(dest, FLAG_Z, FLAG_N) | |
164 | context.assert_meaningful(src) | |
165 | context.set_written(dest, FLAG_Z, FLAG_N) | |
155 | 166 | elif opcode == 'st': |
156 | 167 | if instr.index: |
157 | 168 | if src.type == TYPE_BYTE and dest.type == TYPE_BYTE_TABLE: |
160 | 171 | raise TypeMismatchError((src, dest)) |
161 | 172 | elif src.type != dest.type: |
162 | 173 | raise TypeMismatchError((src, dest)) |
163 | context.assert_initialized(src) | |
164 | context.assert_writeable(dest) | |
165 | context.set_initialized(dest) | |
174 | context.assert_meaningful(src) | |
175 | context.set_written(dest) | |
166 | 176 | elif opcode in ('add', 'sub'): |
167 | context.assert_initialized(src, dest, FLAG_C) | |
168 | context.assert_writeable(dest, FLAG_Z, FLAG_N, FLAG_C, FLAG_V) | |
169 | context.set_initialized(dest, FLAG_Z, FLAG_N, FLAG_C, FLAG_V) | |
177 | context.assert_meaningful(src, dest, FLAG_C) | |
178 | context.set_written(dest, FLAG_Z, FLAG_N, FLAG_C, FLAG_V) | |
170 | 179 | elif opcode in ('inc', 'dec'): |
171 | context.assert_initialized(dest) | |
172 | context.assert_writeable(dest, FLAG_Z, FLAG_N) | |
173 | context.set_initialized(dest, FLAG_Z, FLAG_N) | |
180 | context.assert_meaningful(dest) | |
181 | context.set_written(dest, FLAG_Z, FLAG_N) | |
174 | 182 | elif opcode == 'cmp': |
175 | context.assert_initialized(src, dest) | |
176 | context.assert_writeable(FLAG_Z, FLAG_N, FLAG_C) | |
177 | context.set_initialized(FLAG_Z, FLAG_N, FLAG_C) | |
183 | context.assert_meaningful(src, dest) | |
184 | context.set_written(FLAG_Z, FLAG_N, FLAG_C) | |
178 | 185 | elif opcode in ('and', 'or', 'xor'): |
179 | context.assert_initialized(src, dest) | |
180 | context.assert_writeable(dest, FLAG_Z, FLAG_N) | |
181 | context.set_initialized(dest, FLAG_Z, FLAG_N) | |
186 | context.assert_meaningful(src, dest) | |
187 | context.set_written(dest, FLAG_Z, FLAG_N) | |
182 | 188 | elif opcode in ('shl', 'shr'): |
183 | context.assert_initialized(dest, FLAG_C) | |
184 | context.assert_writeable(dest, FLAG_Z, FLAG_N, FLAG_C) | |
185 | context.set_initialized(dest, FLAG_Z, FLAG_N, FLAG_C) | |
189 | context.assert_meaningful(dest, FLAG_C) | |
190 | context.set_written(dest, FLAG_Z, FLAG_N, FLAG_C) | |
186 | 191 | elif opcode == 'call': |
187 | 192 | routine = routines[instr.name] |
188 | 193 | for ref in routine.inputs: |
189 | context.assert_initialized(ref) | |
194 | context.assert_meaningful(ref) | |
190 | 195 | for ref in routine.outputs: |
191 | context.assert_writeable(ref) | |
192 | context.set_initialized(ref) | |
196 | context.set_written(ref) | |
193 | 197 | for ref in routine.trashes: |
194 | 198 | context.assert_writeable(ref) |
195 | context.set_uninitialized(ref) | |
199 | context.set_touched(ref) | |
200 | context.set_unmeaningful(ref) | |
196 | 201 | elif opcode == 'if': |
197 | 202 | context1 = context.clone() |
198 | 203 | context2 = context.clone() |
199 | 204 | analyze_block(instr.block1, context1, routines) |
200 | 205 | if instr.block2 is not None: |
201 | 206 | analyze_block(instr.block2, context2, routines) |
202 | for ref in context1.each_initialized(): | |
203 | context2.assert_initialized(ref, exception_class=InconsistentInitializationError) | |
204 | for ref in context2.each_initialized(): | |
205 | context1.assert_initialized(ref, exception_class=InconsistentInitializationError) | |
207 | # TODO may we need to deal with touched separately here too? | |
208 | for ref in context1.each_meaningful(): | |
209 | context2.assert_meaningful(ref, exception_class=InconsistentInitializationError) | |
210 | for ref in context2.each_meaningful(): | |
211 | context1.assert_meaningful(ref, exception_class=InconsistentInitializationError) | |
206 | 212 | context.set_from(context1) |
207 | 213 | elif opcode == 'repeat': |
208 | 214 | # it will always be executed at least once, so analyze it having |
221 | 227 | pass |
222 | 228 | else: |
223 | 229 | raise TypeMismatchError((src, dest)) |
224 | context.assert_initialized(src) | |
225 | context.assert_writeable(dest) | |
226 | context.set_initialized(dest) | |
227 | context.set_uninitialized(REG_A, FLAG_Z, FLAG_N) | |
230 | context.assert_meaningful(src) | |
231 | context.set_written(dest) | |
232 | context.set_touched(REG_A, FLAG_Z, FLAG_N) | |
233 | context.set_unmeaningful(REG_A, FLAG_Z, FLAG_N) | |
228 | 234 | else: |
229 | 235 | raise NotImplementedError(opcode) |