Buffers go back to being mutable. Beautiful mutable buffers.
Chris Pressey
10 years ago
8 | 8 | class Buffer(object): |
9 | 9 | """Abstract base class for all Buffer objects. |
10 | 10 | |
11 | You should treat Buffer objects as immutable. | |
11 | Buffer objects are mutable, but must be capable of saving and restoring | |
12 | their state indefinitely. | |
12 | 13 | |
13 | 14 | """ |
14 | 15 | def __init__(self, filename='<data>', position=0, line_number=1, column_number=1): |
16 | 17 | be given too, to match. |
17 | 18 | |
18 | 19 | """ |
19 | self._filename = filename | |
20 | self.filename = filename | |
20 | 21 | self.position = position |
21 | self._line_number = line_number | |
22 | self._column_number = column_number | |
22 | self.line_number = line_number | |
23 | self.column_number = column_number | |
23 | 24 | |
24 | @property | |
25 | def filename(self): | |
26 | return self._filename | |
25 | def save_state(self): | |
26 | raise NotImplementedError | |
27 | 27 | |
28 | @property | |
29 | def line_number(self): | |
30 | return self._line_number | |
28 | def restore_state(self): | |
29 | raise NotImplementedError | |
31 | 30 | |
32 | @property | |
33 | def column_number(self): | |
34 | return self._column_number | |
31 | def pop_state(self): | |
32 | raise NotImplementedError | |
35 | 33 | |
36 | 34 | def advance(self, inp): |
37 | 35 | """Given a string that we have just consumed from the buffer, |
73 | 71 | """ |
74 | 72 | assert not isinstance(string, unicode) |
75 | 73 | self.string = string |
74 | self.stack = [] | |
76 | 75 | Buffer.__init__(self, **kwargs) |
76 | ||
77 | def save_state(self): | |
78 | self.stack.append((self.position, self.line_number, self.column_number)) | |
79 | ||
80 | def restore_state(self): | |
81 | (self.position, self.line_number, self.column_number) = self.stack.pop() | |
82 | ||
83 | def pop_state(self): | |
84 | self.stack.pop() | |
77 | 85 | |
78 | 86 | def __str__(self): |
79 | 87 | return self.string |
86 | 94 | def chop(self, amount): |
87 | 95 | assert self.position <= len(self.string) - amount, \ |
88 | 96 | "attempt made to chop past end of buffer" |
89 | chars = self.string[self.position:self.position + amount] | |
97 | bytes = self.string[self.position:self.position + amount] | |
90 | 98 | |
91 | (line_number, column_number) = self.advance(chars) | |
92 | new_buffer = StringBuffer(self.string, | |
93 | filename=self._filename, | |
94 | position=self.position + amount, | |
95 | line_number=line_number, | |
96 | column_number=column_number | |
97 | ) | |
99 | self.position += amount | |
100 | (self.line_number, self.column_number) = self.advance(bytes) | |
98 | 101 | |
99 | return (chars, new_buffer) | |
102 | return bytes | |
100 | 103 | |
101 | 104 | def first(self, amount): |
102 | #assert self.position <= len(self.string) - amount, \ | |
103 | # "attempt made to first past end of buffer" | |
104 | #if self.position > len(self.string) - amount: | |
105 | # return None | |
106 | chars = self.string[self.position:self.position + amount] | |
105 | bytes = self.string[self.position:self.position + amount] | |
107 | 106 | |
108 | # did not modify self, so it's OK to return it | |
109 | return (chars, self) | |
107 | return bytes | |
110 | 108 | |
111 | 109 | |
112 | 110 | class FileBuffer(Buffer): |
114 | 112 | # filehandles are mutable :/ ... and Buffers can't be mutable |
115 | 113 | # at all. TODO: make it so that Buffers can be mutable, oi |
116 | 114 | def __init__(self, file, pre_buffer='', **kwargs): |
115 | assert False, 'NO' | |
117 | 116 | self.file = file |
118 | 117 | self.pre_buffer = pre_buffer |
118 | self.stack = [] | |
119 | 119 | Buffer.__init__(self, **kwargs) |
120 | ||
121 | def save_state(self): | |
122 | self.stack.append((self.position, self.line_number, self.column_number)) | |
123 | ||
124 | def restore_state(self): | |
125 | (self.position, self.line_number, self.column_number) = self.stack.pop() | |
126 | ||
127 | def pop_state(self): | |
128 | self.stack.pop() | |
120 | 129 | |
121 | 130 | def get_bytes(self, amount): |
122 | 131 | """Returns a new pre_buffer.""" |
128 | 137 | |
129 | 138 | def chop(self, amount): |
130 | 139 | pre_buffer = self.get_bytes(amount) |
131 | chars = pre_buffer[0:amount] | |
140 | bytes = pre_buffer[0:amount] | |
132 | 141 | remaining = pre_buffer[amount:] |
133 | 142 | |
134 | (line_number, column_number) = self.advance(chars) | |
135 | new_buffer = FileBuffer(self.file, | |
136 | filename=self._filename, | |
137 | pre_buffer=remaining, | |
138 | position=self.position + amount, | |
139 | line_number=line_number, | |
140 | column_number=column_number | |
141 | ) | |
142 | self.file = None | |
143 | return (chars, new_buffer) | |
143 | (self.line_number, self.column_number) = self.advance(bytes) | |
144 | return bytes | |
144 | 145 | |
145 | 146 | def first(self, amount): |
146 | 147 | pre_buffer = self.get_bytes(amount) |
147 | chars = pre_buffer[0:amount] | |
148 | bytes = pre_buffer[0:amount] | |
148 | 149 | |
149 | new_buffer = FileBuffer(self.file, | |
150 | filename=self._filename, | |
151 | pre_buffer=pre_buffer, | |
152 | position=self.position, | |
153 | line_number=self._line_number, | |
154 | column_number=self._column_number | |
155 | ) | |
156 | self.file = None | |
157 | return (chars, new_buffer) | |
150 | return bytes |
134 | 134 | return (success, value_rhs) |
135 | 135 | elif isinstance(ast, Or): |
136 | 136 | saved_context = self.context.clone() |
137 | saved_scanner_state = self.scanner.get_state() | |
138 | self.event('begin_or', ast.lhs, ast.rhs, saved_context, saved_scanner_state) | |
137 | self.scanner.save_state() | |
138 | self.event('begin_or', ast.lhs, ast.rhs, saved_context) | |
139 | 139 | (succeeded, result) = self.interpret(ast.lhs) |
140 | 140 | if succeeded: |
141 | 141 | self.event('succeed_or', result) |
142 | self.scanner.pop_state() | |
142 | 143 | return (True, result) |
143 | 144 | else: |
144 | 145 | self.event('fail_or', self.context, self.scanner, result) |
145 | 146 | self.context = saved_context |
146 | self.scanner.install_state(saved_scanner_state) | |
147 | self.scanner.restore_state() | |
147 | 148 | return self.interpret(ast.rhs) |
148 | 149 | elif isinstance(ast, Call): |
149 | 150 | prodref = ast.prodref |
186 | 187 | (success, result) = self.interpret(ast.texpr) |
187 | 188 | buffer = str(result.expand(self.context)) |
188 | 189 | self.event('interpret_on_buffer', buffer) |
189 | saved_scanner_state = self.scanner.get_state() | |
190 | new_state = StringBuffer(buffer) | |
191 | self.scanner.install_state(new_state) | |
190 | previous_buffer = self.scanner.get_buffer() | |
191 | self.scanner.install_buffer(StringBuffer(buffer)) | |
192 | 192 | (success, result) = self.interpret(ast.rule) |
193 | self.scanner.install_state(saved_scanner_state) | |
193 | self.scanner.install_buffer(previous_buffer) | |
194 | 194 | return (success, result) |
195 | 195 | elif isinstance(ast, Set): |
196 | 196 | (success, variable) = self.interpret(ast.variable) |
201 | 201 | elif isinstance(ast, Not): |
202 | 202 | expr = ast.rule |
203 | 203 | saved_context = self.context.clone() |
204 | saved_scanner_state = self.scanner.get_state() | |
205 | self.event('begin_not', expr, saved_context, saved_scanner_state) | |
204 | self.scanner.save_state() | |
205 | self.event('begin_not', expr, saved_context) | |
206 | 206 | (succeeded, result) = self.interpret(expr) |
207 | 207 | self.context = saved_context |
208 | self.scanner.install_state(saved_scanner_state) | |
208 | self.scanner.restore_state() | |
209 | 209 | if succeeded: |
210 | 210 | return (False, Atom(self.scanner.error_message( |
211 | 211 | "anything else", self.scanner.peek() |
219 | 219 | successful_result = result |
220 | 220 | while succeeded: |
221 | 221 | saved_context = self.context.clone() |
222 | saved_scanner_state = self.scanner.get_state() | |
222 | self.scanner.save_state() | |
223 | 223 | (succeeded, result) = self.interpret(ast.rule) |
224 | 224 | if succeeded: |
225 | self.scanner.pop_state() | |
225 | 226 | successful_result = result |
226 | 227 | self.event('repeating_while', result) |
228 | else: | |
229 | self.scanner.restore_state() | |
227 | 230 | self.context = saved_context |
228 | self.scanner.install_state(saved_scanner_state) | |
229 | 231 | self.event('end_while', result) |
230 | 232 | return (True, successful_result) |
231 | 233 | elif isinstance(ast, Concat): |
22 | 22 | def parse(filename): |
23 | 23 | with open(filename, 'r') as f: |
24 | 24 | scanner = Scanner( |
25 | FileBuffer(f, filename=filename), | |
26 | #StringBuffer(f.read(), filename=filename), | |
25 | #FileBuffer(f, filename=filename), | |
26 | StringBuffer(f.read(), filename=filename), | |
27 | 27 | engines=(TamsinScannerEngine(),) |
28 | 28 | ) |
29 | 29 | parser = Parser(scanner) |
48 | 48 | |
49 | 49 | def run(ast, listeners=None): |
50 | 50 | scanner = Scanner( |
51 | FileBuffer(sys.stdin, filename='<stdin>'), | |
52 | #StringBuffer(sys.stdin.read(), filename='<stdin>'), | |
51 | #FileBuffer(sys.stdin, filename='<stdin>'), | |
52 | StringBuffer(sys.stdin.read(), filename='<stdin>'), | |
53 | 53 | engines=(UTF8ScannerEngine(),), |
54 | 54 | listeners=listeners |
55 | 55 | ) |
29 | 29 | self.buffer, self.position |
30 | 30 | ) |
31 | 31 | |
32 | def get_state(self): | |
33 | """Returns an object which saves the current state of this | |
32 | def get_buffer(self): | |
33 | """Returns an object which represents the current Buffer of this | |
34 | 34 | Scanner. |
35 | 35 | |
36 | 36 | """ |
37 | 37 | return self.buffer |
38 | 38 | |
39 | def install_state(self, state): | |
40 | """Restores the state of this Scanner to that which was saved by | |
41 | a previous call to get_state(). | |
39 | def install_buffer(self, state): | |
40 | """Restores the Buffer of this Scanner to that which was saved by | |
41 | a previous call to get_buffer(). | |
42 | 42 | |
43 | 43 | """ |
44 | 44 | self.buffer = state |
49 | 49 | def pop_engine(self): |
50 | 50 | engine = self.engines.pop() |
51 | 51 | |
52 | # # # # # # # Buffer interface # # # # # # # | |
53 | # | |
54 | # These methods hide the immutability of Buffer. | |
55 | # | |
52 | def save_state(self): | |
53 | return self.buffer.save_state() | |
54 | ||
55 | def restore_state(self): | |
56 | return self.buffer.restore_state() | |
57 | ||
58 | def pop_state(self): | |
59 | return self.buffer.pop_state() | |
56 | 60 | |
57 | 61 | def chop(self, amount): |
58 | 62 | """Returns amount characters from the buffer and advances the |
61 | 65 | Should only be used by ScannerEngines. |
62 | 66 | |
63 | 67 | """ |
64 | (chars, buffer) = self.buffer.chop(amount) | |
65 | self.buffer = buffer | |
66 | return chars | |
68 | return self.buffer.chop(amount) | |
67 | 69 | |
68 | 70 | def first(self, amount): |
69 | 71 | """Returns amount characters from the buffer. Does not advance the |
73 | 75 | reporting. |
74 | 76 | |
75 | 77 | """ |
76 | (chars, buffer) = self.buffer.first(amount) | |
77 | self.buffer = buffer | |
78 | return chars | |
79 | ||
80 | # # # # # # # # # # # # # # # # # # # # # # | |
78 | return self.buffer.first(amount) | |
81 | 79 | |
82 | 80 | def is_at_eof(self): |
83 | 81 | """Returns True iff there is no more input to scan. |
148 | 146 | return token |
149 | 147 | |
150 | 148 | def peek(self): |
151 | backup = self.get_state() | |
149 | self.buffer.save_state() | |
152 | 150 | token = self.scan() |
153 | self.install_state(backup) | |
151 | self.buffer.restore_state() | |
154 | 152 | return token |
155 | 153 | |
156 | 154 | def consume(self, t): |
158 | 156 | t = t.encode('UTF-8') |
159 | 157 | assert not isinstance(t, unicode) |
160 | 158 | self.event('consume', t) |
161 | backup = self.get_state() | |
159 | self.buffer.save_state() | |
162 | 160 | s = self.scan() |
163 | 161 | if s == t: |
162 | self.buffer.pop_state() | |
164 | 163 | return t |
165 | 164 | else: |
166 | self.install_state(backup) | |
165 | self.buffer.restore_state() | |
167 | 166 | return None |
168 | 167 | |
169 | 168 | def expect(self, t): |