A simple recursive-descent parser skeleton in Ruby.
Cat's Eye Technologies
12 years ago
0 | #!/usr/bin/env ruby | |
1 | ||
2 | class LanguageSyntaxError < StandardError | |
3 | end | |
4 | ||
5 | class Tokenizer | |
6 | def initialize s | |
7 | @string = s | |
8 | @text = nil | |
9 | @type = nil | |
10 | scan_impl | |
11 | end | |
12 | ||
13 | def text | |
14 | return @text | |
15 | end | |
16 | ||
17 | def type | |
18 | return @type | |
19 | end | |
20 | ||
21 | def set_token(text, type) | |
22 | #puts "token '#{text}' of type '#{type}'; string now '#{@string}'" | |
23 | @text = text | |
24 | @type = type | |
25 | end | |
26 | ||
27 | def scan | |
28 | scan_impl | |
29 | return @text | |
30 | end | |
31 | ||
32 | def scan_impl | |
33 | m = /^\s+(.*?)$/.match @string | |
34 | @string = m[1] if not m.nil? | |
35 | ||
36 | if @string.empty? | |
37 | set_token(nil, nil) | |
38 | return | |
39 | end | |
40 | ||
41 | # check for any of: (, ), comma, return as single token | |
42 | m = /^([(),])(.*?)$/.match @string | |
43 | if m | |
44 | @string = m[2] | |
45 | set_token(m[1], 'seperator') | |
46 | return | |
47 | end | |
48 | ||
49 | # check for strings of: >, <, =, !, | |
50 | m = /^([<>=!]+)(.*?)$/.match @string | |
51 | if m | |
52 | @string = m[2] | |
53 | set_token(m[1], 'relop') | |
54 | return | |
55 | end | |
56 | ||
57 | # check for strings of "word" characters | |
58 | m = /^(\w+)(.*?)$/.match @string | |
59 | if m | |
60 | @string = m[2] | |
61 | set_token(m[1], 'atom') | |
62 | return | |
63 | end | |
64 | ||
65 | set_token(nil, nil) | |
66 | end | |
67 | ||
68 | def consume s | |
69 | if @text == s | |
70 | scan | |
71 | true | |
72 | else | |
73 | false | |
74 | end | |
75 | end | |
76 | ||
77 | def expect s | |
78 | if @text == s | |
79 | scan | |
80 | else | |
81 | raise LanguageSyntaxError, "expected '#{s}', found '#{@text}'" | |
82 | end | |
83 | end | |
84 | end | |
85 | ||
86 | # Expr ::= Atom | "(" Expr {"," Expr} ")". | |
87 | ||
88 | class Parser | |
89 | def initialize s | |
90 | @tokenizer = Tokenizer.new(s) | |
91 | end | |
92 | ||
93 | def expr | |
94 | if @tokenizer.consume "(" | |
95 | exprs = [] | |
96 | e = expr | |
97 | exprs.push e | |
98 | while @tokenizer.consume "," | |
99 | e = expr | |
100 | exprs.push e | |
101 | end | |
102 | @tokenizer.expect ")" | |
103 | r = List.new(exprs) | |
104 | #puts "Returning #{r}" | |
105 | return r | |
106 | else | |
107 | if @tokenizer.type == 'atom' | |
108 | r = Atom.new(@tokenizer.text) | |
109 | @tokenizer.scan | |
110 | #puts "Returning #{r}" | |
111 | return r | |
112 | else | |
113 | raise LanguageSyntaxError, "expected atom, found #{@tokenizer.type}" | |
114 | end | |
115 | end | |
116 | end | |
117 | end | |
118 | ||
119 | class Atom | |
120 | def initialize t | |
121 | @t = t | |
122 | end | |
123 | ||
124 | def to_s | |
125 | return ".#{@t}" | |
126 | end | |
127 | end | |
128 | ||
129 | class List | |
130 | def initialize l | |
131 | @l = l | |
132 | end | |
133 | ||
134 | def to_s | |
135 | s = "(" | |
136 | for e in @l | |
137 | s += e.to_s + " " | |
138 | end | |
139 | s += ")" | |
140 | return s | |
141 | end | |
142 | end | |
143 | ||
144 | ### Main ### | |
145 | ||
146 | p = Parser.new("(a, b, (c, d, e), f, ((g)))") | |
147 | print p.expr |