Split scanner off from parser module, into own module.
Chris Pressey
8 years ago
0 | 0 | # encoding: UTF-8 |
1 | ||
2 | import re | |
3 | 1 | |
4 | 2 | from sixtypical.ast import Program, Defn, Routine, Block, Instr |
5 | 3 | from sixtypical.model import ( |
7 | 5 | RoutineType, VectorType, ExecutableType, |
8 | 6 | LocationRef, ConstantRef |
9 | 7 | ) |
10 | ||
11 | ||
12 | class Scanner(object): | |
13 | def __init__(self, text): | |
14 | self.text = text | |
15 | self.token = None | |
16 | self.type = None | |
17 | self.scan() | |
18 | ||
19 | def scan_pattern(self, pattern, type, token_group=1, rest_group=2): | |
20 | pattern = r'^(' + pattern + r')(.*?)$' | |
21 | match = re.match(pattern, self.text, re.DOTALL) | |
22 | if not match: | |
23 | return False | |
24 | else: | |
25 | self.type = type | |
26 | self.token = match.group(token_group) | |
27 | self.text = match.group(rest_group) | |
28 | return True | |
29 | ||
30 | def scan(self): | |
31 | self.scan_pattern(r'[ \t\n\r]*', 'whitespace') | |
32 | while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'): | |
33 | self.scan_pattern(r'[ \t\n\r]*', 'whitespace') | |
34 | if not self.text: | |
35 | self.token = None | |
36 | self.type = 'EOF' | |
37 | return | |
38 | if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'): | |
39 | return | |
40 | if self.scan_pattern(r'\d+', 'integer literal'): | |
41 | return | |
42 | if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal', | |
43 | token_group=2, rest_group=3): | |
44 | # ecch | |
45 | self.token = str(eval('0x' + self.token)) | |
46 | return | |
47 | if self.scan_pattern(r'\"(.*?)\"', 'string literal', | |
48 | token_group=2, rest_group=3): | |
49 | return | |
50 | if self.scan_pattern(r'\w+', 'identifier'): | |
51 | return | |
52 | if self.scan_pattern(r'.', 'unknown character'): | |
53 | return | |
54 | else: | |
55 | raise AssertionError("this should never happen, self.text=(%s)" % self.text) | |
56 | ||
57 | def expect(self, token): | |
58 | if self.token == token: | |
59 | self.scan() | |
60 | else: | |
61 | raise SyntaxError("Expected '%s', but found '%s'" % | |
62 | (token, self.token)) | |
63 | ||
64 | def on(self, token): | |
65 | return self.token == token | |
66 | ||
67 | def on_type(self, type): | |
68 | return self.type == type | |
69 | ||
70 | def check_type(self, type): | |
71 | if not self.type == type: | |
72 | raise SyntaxError("Expected %s, but found %s ('%s')" % | |
73 | (type, self.type, self.token)) | |
74 | ||
75 | def consume(self, token): | |
76 | if self.token == token: | |
77 | self.scan() | |
78 | return True | |
79 | else: | |
80 | return False | |
8 | from sixtypical.scanner import Scanner | |
81 | 9 | |
82 | 10 | |
83 | 11 | class SymEntry(object): |
0 | # encoding: UTF-8 | |
1 | ||
2 | import re | |
3 | ||
4 | ||
5 | class Scanner(object): | |
6 | def __init__(self, text): | |
7 | self.text = text | |
8 | self.token = None | |
9 | self.type = None | |
10 | self.scan() | |
11 | ||
12 | def scan_pattern(self, pattern, type, token_group=1, rest_group=2): | |
13 | pattern = r'^(' + pattern + r')(.*?)$' | |
14 | match = re.match(pattern, self.text, re.DOTALL) | |
15 | if not match: | |
16 | return False | |
17 | else: | |
18 | self.type = type | |
19 | self.token = match.group(token_group) | |
20 | self.text = match.group(rest_group) | |
21 | return True | |
22 | ||
23 | def scan(self): | |
24 | self.scan_pattern(r'[ \t\n\r]*', 'whitespace') | |
25 | while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'): | |
26 | self.scan_pattern(r'[ \t\n\r]*', 'whitespace') | |
27 | if not self.text: | |
28 | self.token = None | |
29 | self.type = 'EOF' | |
30 | return | |
31 | if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'): | |
32 | return | |
33 | if self.scan_pattern(r'\d+', 'integer literal'): | |
34 | return | |
35 | if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal', | |
36 | token_group=2, rest_group=3): | |
37 | # ecch | |
38 | self.token = str(eval('0x' + self.token)) | |
39 | return | |
40 | if self.scan_pattern(r'\"(.*?)\"', 'string literal', | |
41 | token_group=2, rest_group=3): | |
42 | return | |
43 | if self.scan_pattern(r'\w+', 'identifier'): | |
44 | return | |
45 | if self.scan_pattern(r'.', 'unknown character'): | |
46 | return | |
47 | else: | |
48 | raise AssertionError("this should never happen, self.text=(%s)" % self.text) | |
49 | ||
50 | def expect(self, token): | |
51 | if self.token == token: | |
52 | self.scan() | |
53 | else: | |
54 | raise SyntaxError("Expected '%s', but found '%s'" % | |
55 | (token, self.token)) | |
56 | ||
57 | def on(self, token): | |
58 | return self.token == token | |
59 | ||
60 | def on_type(self, type): | |
61 | return self.type == type | |
62 | ||
63 | def check_type(self, type): | |
64 | if not self.type == type: | |
65 | raise SyntaxError("Expected %s, but found %s ('%s')" % | |
66 | (type, self.type, self.token)) | |
67 | ||
68 | def consume(self, token): | |
69 | if self.token == token: | |
70 | self.scan() | |
71 | return True | |
72 | else: | |
73 | return False |