git @ Cat's Eye Technologies Tamsin / c04aa7f
Completely replace ScannerState with Buffer. Chris Pressey 10 years ago
5 changed file(s) with 57 addition(s) and 90 deletion(s). Raw diff Collapse all Expand all
66
77
88 class Buffer(object):
9 """Abstract base class for all Buffer objects.
10
11 You should treat Buffer objects as immutable.
12
13 """
914 def chop(self, amount):
1015 raise NotImplementedError
1116
1520 def is_at_eof(self):
1621 raise NotImplementedError
1722
23 def is_at_utf8(self):
24 k = ord(self.first(1))
25 if k & 0b11100000 == 0b11000000:
26 return 2
27 elif k & 0b11110000 == 0b11100000:
28 return 3
29 elif k & 0b11111000 == 0b11110000:
30 return 4
31 else:
32 return 0
33
34 def isalnum(self):
35 return self.first(1).isalnum()
36
37 def startswith(self, strings):
38 for s in strings:
39 if self.first(len(s)) == s:
40 return True
41 return False
42
1843 def copy(self):
1944 raise NotImplementedError
2045
2146
2247 class StringBuffer(Buffer):
2348 def __init__(self, string, filename='<data>', position=0, line_number=1, column_number=1):
49 """Create a new StringBuffer object.
50
51 `string` should be a raw string, not unicode. If `position` is given,
52 `line_number` and `column_number` should be given too, to match.
53
54 """
2455 assert not isinstance(string, unicode)
2556 self.string = string
57 self._filename = filename
2658 self.position = position
27 self._filename = filename
2859 self._line_number = line_number
2960 self._column_number = column_number
3061
1111 from tamsin.term import Term, Atom
1212 from tamsin.event import EventProducer
1313 from tamsin.scanner import (
14 ScannerState, ByteScannerEngine, UTF8ScannerEngine, ProductionScannerEngine
14 ByteScannerEngine, UTF8ScannerEngine, ProductionScannerEngine
1515 )
1616 import tamsin.sysmod
1717
187187 buffer = str(result.expand(self.context))
188188 self.event('interpret_on_buffer', buffer)
189189 saved_scanner_state = self.scanner.get_state()
190 new_state = ScannerState(StringBuffer(buffer))
190 new_state = StringBuffer(buffer)
191191 self.scanner.install_state(new_state)
192192 (success, result) = self.interpret(ast.rule)
193193 self.scanner.install_state(saved_scanner_state)
1010 from tamsin.event import DebugEventListener
1111 from tamsin.term import Atom
1212 from tamsin.scanner import (
13 Scanner, ScannerState, EOF, UTF8ScannerEngine, TamsinScannerEngine
13 Scanner, EOF, UTF8ScannerEngine, TamsinScannerEngine
1414 )
1515 from tamsin.parser import Parser
1616 from tamsin.interpreter import Interpreter
2323 with open(filename, 'r') as f:
2424 contents = f.read()
2525 scanner = Scanner(
26 ScannerState(StringBuffer(contents, filename=filename)),
26 StringBuffer(contents, filename=filename),
2727 engines=(TamsinScannerEngine(),)
2828 )
2929 parser = Parser(scanner)
4848
4949 def run(ast, listeners=None):
5050 scanner = Scanner(
51 ScannerState(StringBuffer(sys.stdin.read(), filename='<stdin>')),
51 StringBuffer(sys.stdin.read(), filename='<stdin>'),
5252 engines=(UTF8ScannerEngine(),),
5353 listeners=listeners
5454 )
7070 if args[0] == 'scan':
7171 with open(args[1], 'r') as f:
7272 scanner = Scanner(
73 ScannerState(f.read(), filename=args[1]),
73 StringBuffer(f.read(), filename=args[1]),
7474 engines=(TamsinScannerEngine(),),
7575 listeners=listeners
7676 )
88 Send, Set, Concat, Using, On, Fold,
99 AtomNode, VariableNode, ConstructorNode,
1010 )
11 from tamsin.buffer import StringBuffer
1112 from tamsin.event import EventProducer
1213 from tamsin.scanner import (
13 EOF, Scanner, ScannerState, TamsinScannerEngine
14 EOF, Scanner, TamsinScannerEngine
1415 )
1516
1617
3233 contents = f.read()
3334 return Parser(
3435 Scanner(
35 ScannerState(contents, filename=filename),
36 StringBuffer(contents, filename=filename),
3637 engines=(TamsinScannerEngine(),),
3738 )
3839 )
1010 EOF = object()
1111
1212
13 class ScannerState(object):
14 def __init__(self, buffer):
15 """Create a new ScannerState object.
16
17 You should treat ScannerState objects as immutable.
18
19 buffer should be a raw string, not unicode. If position is given,
20 line_number and column_number should be given too, to match.
21
22 """
23 assert isinstance(buffer, Buffer)
24 self._buffer = buffer
25
26 @property
27 def filename(self):
28 return self._buffer.filename
29
30 @property
31 def line_number(self):
32 return self._buffer.line_number
33
34 @property
35 def column_number(self):
36 return self._buffer.column_number
37
38 def is_at_eof(self):
39 return self._buffer.is_at_eof()
40
41 def is_at_utf8(self):
42 k = ord(self._buffer.first(1))
43 if k & 0b11100000 == 0b11000000:
44 return 2
45 elif k & 0b11110000 == 0b11100000:
46 return 3
47 elif k & 0b11111000 == 0b11110000:
48 return 4
49 else:
50 return 0
51
52 def isalnum(self):
53 return self._buffer.first(1).isalnum()
54
55 def chop(self, amount):
56 (result, new_buffer) = self._buffer.chop(amount)
57 new_state = ScannerState(new_buffer)
58 return (result, new_state)
59
60 def first(self, amount):
61 return self._buffer.first(amount)
62
63 def startswith(self, strings):
64 for s in strings:
65 if self._buffer.first(len(s)) == s:
66 return True
67 return False
68
69 def __eq__(self, other):
70 return self._buffer == other._buffer
71
72 def __repr__(self):
73 return "ScannerState(%r)" % (
74 self._buffer
75 )
76
77
7813 class Scanner(EventProducer):
79 def __init__(self, state, engines=None, listeners=None):
14 def __init__(self, buffer, engines=None, listeners=None):
8015 """Create a new Scanner object.
8116
8217 """
8318 self.listeners = listeners
8419 self.event('set_buffer', buffer)
85 assert isinstance(state, ScannerState)
86 self.state = state
20 assert isinstance(buffer, Buffer)
21 self.buffer = buffer
8722 self.engines = []
8823 if engines is not None:
8924 for engine in engines:
9934 Scanner.
10035
10136 """
102 return self.state
37 return self.buffer
10338
10439 def install_state(self, state):
10540 """Restores the state of this Scanner to that which was saved by
10641 a previous call to get_state().
10742
10843 """
109 self.state = state
44 self.buffer = state
11045
11146 def push_engine(self, engine):
11247 self.engines.append(engine)
12156 to see if ... something
12257
12358 """
124 return self.state.is_at_eof()
59 return self.buffer.is_at_eof()
12560
12661 def is_at_utf8(self):
12762 """Returns the number of bytes following that comprise a UTF-8
13065 Should only be used by ScannerEngines.
13166
13267 """
133 return self.state.is_at_utf8()
68 return self.buffer.is_at_utf8()
13469
13570 def chop(self, amount):
13671 """Returns amount characters from the buffer and advances the
13974 Should only be used by ScannerEngines.
14075
14176 """
142 (result, state) = self.state.chop(amount)
143 self.state = state
77 (result, buffer) = self.buffer.chop(amount)
78 self.buffer = buffer
14479 return result
14580
14681 def first(self, amount):
15186 reporting.
15287
15388 """
154 return self.state.first(amount)
89 return self.buffer.first(amount)
15590
15691 def startswith(self, strings):
157 return self.state.startswith(strings)
92 return self.buffer.startswith(strings)
15893
15994 def isalnum(self):
160 return self.state.isalnum()
95 return self.buffer.isalnum()
16196
16297 def error_message(self, expected, found):
16398 if found is EOF:
167102 return (
168103 "expected %s but found %s at line %s, column %s in '%s'" %
169104 (expected, found,
170 self.state.line_number,
171 self.state.column_number,
172 self.state.filename)
105 self.buffer.line_number,
106 self.buffer.column_number,
107 self.buffer.filename)
173108 )
174109
175110 def error(self, expected, found):
220155 def dump(self, indent=1):
221156 print "==" * indent + "%r" % self
222157 print "--" * indent + "engines: %r" % repr(self.engines)
223 print "--" * indent + "state: %r" % self.state
158 print "--" * indent + "buffer: %r" % self.buffer
224159
225160
226161 class ScannerEngine(object):