Completely replace ScannerState with Buffer.
Chris Pressey
10 years ago
6 | 6 | |
7 | 7 | |
8 | 8 | class Buffer(object): |
9 | """Abstract base class for all Buffer objects. | |
10 | ||
11 | You should treat Buffer objects as immutable. | |
12 | ||
13 | """ | |
9 | 14 | def chop(self, amount): |
10 | 15 | raise NotImplementedError |
11 | 16 | |
15 | 20 | def is_at_eof(self): |
16 | 21 | raise NotImplementedError |
17 | 22 | |
23 | def is_at_utf8(self): | |
24 | k = ord(self.first(1)) | |
25 | if k & 0b11100000 == 0b11000000: | |
26 | return 2 | |
27 | elif k & 0b11110000 == 0b11100000: | |
28 | return 3 | |
29 | elif k & 0b11111000 == 0b11110000: | |
30 | return 4 | |
31 | else: | |
32 | return 0 | |
33 | ||
34 | def isalnum(self): | |
35 | return self.first(1).isalnum() | |
36 | ||
37 | def startswith(self, strings): | |
38 | for s in strings: | |
39 | if self.first(len(s)) == s: | |
40 | return True | |
41 | return False | |
42 | ||
18 | 43 | def copy(self): |
19 | 44 | raise NotImplementedError |
20 | 45 | |
21 | 46 | |
22 | 47 | class StringBuffer(Buffer): |
23 | 48 | def __init__(self, string, filename='<data>', position=0, line_number=1, column_number=1): |
49 | """Create a new StringBuffer object. | |
50 | ||
51 | `string` should be a raw string, not unicode. If `position` is given, | |
52 | `line_number` and `column_number` should be given too, to match. | |
53 | ||
54 | """ | |
24 | 55 | assert not isinstance(string, unicode) |
25 | 56 | self.string = string |
57 | self._filename = filename | |
26 | 58 | self.position = position |
27 | self._filename = filename | |
28 | 59 | self._line_number = line_number |
29 | 60 | self._column_number = column_number |
30 | 61 |
11 | 11 | from tamsin.term import Term, Atom |
12 | 12 | from tamsin.event import EventProducer |
13 | 13 | from tamsin.scanner import ( |
14 | ScannerState, ByteScannerEngine, UTF8ScannerEngine, ProductionScannerEngine | |
14 | ByteScannerEngine, UTF8ScannerEngine, ProductionScannerEngine | |
15 | 15 | ) |
16 | 16 | import tamsin.sysmod |
17 | 17 | |
187 | 187 | buffer = str(result.expand(self.context)) |
188 | 188 | self.event('interpret_on_buffer', buffer) |
189 | 189 | saved_scanner_state = self.scanner.get_state() |
190 | new_state = ScannerState(StringBuffer(buffer)) | |
190 | new_state = StringBuffer(buffer) | |
191 | 191 | self.scanner.install_state(new_state) |
192 | 192 | (success, result) = self.interpret(ast.rule) |
193 | 193 | self.scanner.install_state(saved_scanner_state) |
10 | 10 | from tamsin.event import DebugEventListener |
11 | 11 | from tamsin.term import Atom |
12 | 12 | from tamsin.scanner import ( |
13 | Scanner, ScannerState, EOF, UTF8ScannerEngine, TamsinScannerEngine | |
13 | Scanner, EOF, UTF8ScannerEngine, TamsinScannerEngine | |
14 | 14 | ) |
15 | 15 | from tamsin.parser import Parser |
16 | 16 | from tamsin.interpreter import Interpreter |
23 | 23 | with open(filename, 'r') as f: |
24 | 24 | contents = f.read() |
25 | 25 | scanner = Scanner( |
26 | ScannerState(StringBuffer(contents, filename=filename)), | |
26 | StringBuffer(contents, filename=filename), | |
27 | 27 | engines=(TamsinScannerEngine(),) |
28 | 28 | ) |
29 | 29 | parser = Parser(scanner) |
48 | 48 | |
49 | 49 | def run(ast, listeners=None): |
50 | 50 | scanner = Scanner( |
51 | ScannerState(StringBuffer(sys.stdin.read(), filename='<stdin>')), | |
51 | StringBuffer(sys.stdin.read(), filename='<stdin>'), | |
52 | 52 | engines=(UTF8ScannerEngine(),), |
53 | 53 | listeners=listeners |
54 | 54 | ) |
70 | 70 | if args[0] == 'scan': |
71 | 71 | with open(args[1], 'r') as f: |
72 | 72 | scanner = Scanner( |
73 | ScannerState(f.read(), filename=args[1]), | |
73 | StringBuffer(f.read(), filename=args[1]), | |
74 | 74 | engines=(TamsinScannerEngine(),), |
75 | 75 | listeners=listeners |
76 | 76 | ) |
8 | 8 | Send, Set, Concat, Using, On, Fold, |
9 | 9 | AtomNode, VariableNode, ConstructorNode, |
10 | 10 | ) |
11 | from tamsin.buffer import StringBuffer | |
11 | 12 | from tamsin.event import EventProducer |
12 | 13 | from tamsin.scanner import ( |
13 | EOF, Scanner, ScannerState, TamsinScannerEngine | |
14 | EOF, Scanner, TamsinScannerEngine | |
14 | 15 | ) |
15 | 16 | |
16 | 17 | |
32 | 33 | contents = f.read() |
33 | 34 | return Parser( |
34 | 35 | Scanner( |
35 | ScannerState(contents, filename=filename), | |
36 | StringBuffer(contents, filename=filename), | |
36 | 37 | engines=(TamsinScannerEngine(),), |
37 | 38 | ) |
38 | 39 | ) |
10 | 10 | EOF = object() |
11 | 11 | |
12 | 12 | |
13 | class ScannerState(object): | |
14 | def __init__(self, buffer): | |
15 | """Create a new ScannerState object. | |
16 | ||
17 | You should treat ScannerState objects as immutable. | |
18 | ||
19 | buffer should be a raw string, not unicode. If position is given, | |
20 | line_number and column_number should be given too, to match. | |
21 | ||
22 | """ | |
23 | assert isinstance(buffer, Buffer) | |
24 | self._buffer = buffer | |
25 | ||
26 | @property | |
27 | def filename(self): | |
28 | return self._buffer.filename | |
29 | ||
30 | @property | |
31 | def line_number(self): | |
32 | return self._buffer.line_number | |
33 | ||
34 | @property | |
35 | def column_number(self): | |
36 | return self._buffer.column_number | |
37 | ||
38 | def is_at_eof(self): | |
39 | return self._buffer.is_at_eof() | |
40 | ||
41 | def is_at_utf8(self): | |
42 | k = ord(self._buffer.first(1)) | |
43 | if k & 0b11100000 == 0b11000000: | |
44 | return 2 | |
45 | elif k & 0b11110000 == 0b11100000: | |
46 | return 3 | |
47 | elif k & 0b11111000 == 0b11110000: | |
48 | return 4 | |
49 | else: | |
50 | return 0 | |
51 | ||
52 | def isalnum(self): | |
53 | return self._buffer.first(1).isalnum() | |
54 | ||
55 | def chop(self, amount): | |
56 | (result, new_buffer) = self._buffer.chop(amount) | |
57 | new_state = ScannerState(new_buffer) | |
58 | return (result, new_state) | |
59 | ||
60 | def first(self, amount): | |
61 | return self._buffer.first(amount) | |
62 | ||
63 | def startswith(self, strings): | |
64 | for s in strings: | |
65 | if self._buffer.first(len(s)) == s: | |
66 | return True | |
67 | return False | |
68 | ||
69 | def __eq__(self, other): | |
70 | return self._buffer == other._buffer | |
71 | ||
72 | def __repr__(self): | |
73 | return "ScannerState(%r)" % ( | |
74 | self._buffer | |
75 | ) | |
76 | ||
77 | ||
78 | 13 | class Scanner(EventProducer): |
79 | def __init__(self, state, engines=None, listeners=None): | |
14 | def __init__(self, buffer, engines=None, listeners=None): | |
80 | 15 | """Create a new Scanner object. |
81 | 16 | |
82 | 17 | """ |
83 | 18 | self.listeners = listeners |
84 | 19 | self.event('set_buffer', buffer) |
85 | assert isinstance(state, ScannerState) | |
86 | self.state = state | |
20 | assert isinstance(buffer, Buffer) | |
21 | self.buffer = buffer | |
87 | 22 | self.engines = [] |
88 | 23 | if engines is not None: |
89 | 24 | for engine in engines: |
99 | 34 | Scanner. |
100 | 35 | |
101 | 36 | """ |
102 | return self.state | |
37 | return self.buffer | |
103 | 38 | |
104 | 39 | def install_state(self, state): |
105 | 40 | """Restores the state of this Scanner to that which was saved by |
106 | 41 | a previous call to get_state(). |
107 | 42 | |
108 | 43 | """ |
109 | self.state = state | |
44 | self.buffer = state | |
110 | 45 | |
111 | 46 | def push_engine(self, engine): |
112 | 47 | self.engines.append(engine) |
121 | 56 | to see if ... something |
122 | 57 | |
123 | 58 | """ |
124 | return self.state.is_at_eof() | |
59 | return self.buffer.is_at_eof() | |
125 | 60 | |
126 | 61 | def is_at_utf8(self): |
127 | 62 | """Returns the number of bytes following that comprise a UTF-8 |
130 | 65 | Should only be used by ScannerEngines. |
131 | 66 | |
132 | 67 | """ |
133 | return self.state.is_at_utf8() | |
68 | return self.buffer.is_at_utf8() | |
134 | 69 | |
135 | 70 | def chop(self, amount): |
136 | 71 | """Returns amount characters from the buffer and advances the |
139 | 74 | Should only be used by ScannerEngines. |
140 | 75 | |
141 | 76 | """ |
142 | (result, state) = self.state.chop(amount) | |
143 | self.state = state | |
77 | (result, buffer) = self.buffer.chop(amount) | |
78 | self.buffer = buffer | |
144 | 79 | return result |
145 | 80 | |
146 | 81 | def first(self, amount): |
151 | 86 | reporting. |
152 | 87 | |
153 | 88 | """ |
154 | return self.state.first(amount) | |
89 | return self.buffer.first(amount) | |
155 | 90 | |
156 | 91 | def startswith(self, strings): |
157 | return self.state.startswith(strings) | |
92 | return self.buffer.startswith(strings) | |
158 | 93 | |
159 | 94 | def isalnum(self): |
160 | return self.state.isalnum() | |
95 | return self.buffer.isalnum() | |
161 | 96 | |
162 | 97 | def error_message(self, expected, found): |
163 | 98 | if found is EOF: |
167 | 102 | return ( |
168 | 103 | "expected %s but found %s at line %s, column %s in '%s'" % |
169 | 104 | (expected, found, |
170 | self.state.line_number, | |
171 | self.state.column_number, | |
172 | self.state.filename) | |
105 | self.buffer.line_number, | |
106 | self.buffer.column_number, | |
107 | self.buffer.filename) | |
173 | 108 | ) |
174 | 109 | |
175 | 110 | def error(self, expected, found): |
220 | 155 | def dump(self, indent=1): |
221 | 156 | print "==" * indent + "%r" % self |
222 | 157 | print "--" * indent + "engines: %r" % repr(self.engines) |
223 | print "--" * indent + "state: %r" % self.state | |
158 | print "--" * indent + "buffer: %r" % self.buffer | |
224 | 159 | |
225 | 160 | |
226 | 161 | class ScannerEngine(object): |