git @ Cat's Eye Technologies The-Dipple / 07ef0aa
A simple recursive-descent parser skeleton in Ruby. Cat's Eye Technologies 12 years ago
1 changed file(s) with 148 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 #!/usr/bin/env ruby
1
2 class LanguageSyntaxError < StandardError
3 end
4
5 class Tokenizer
6 def initialize s
7 @string = s
8 @text = nil
9 @type = nil
10 scan_impl
11 end
12
13 def text
14 return @text
15 end
16
17 def type
18 return @type
19 end
20
21 def set_token(text, type)
22 #puts "token '#{text}' of type '#{type}'; string now '#{@string}'"
23 @text = text
24 @type = type
25 end
26
27 def scan
28 scan_impl
29 return @text
30 end
31
32 def scan_impl
33 m = /^\s+(.*?)$/.match @string
34 @string = m[1] if not m.nil?
35
36 if @string.empty?
37 set_token(nil, nil)
38 return
39 end
40
41 # check for any of: (, ), comma, return as single token
42 m = /^([(),])(.*?)$/.match @string
43 if m
44 @string = m[2]
45 set_token(m[1], 'seperator')
46 return
47 end
48
49 # check for strings of: >, <, =, !,
50 m = /^([<>=!]+)(.*?)$/.match @string
51 if m
52 @string = m[2]
53 set_token(m[1], 'relop')
54 return
55 end
56
57 # check for strings of "word" characters
58 m = /^(\w+)(.*?)$/.match @string
59 if m
60 @string = m[2]
61 set_token(m[1], 'atom')
62 return
63 end
64
65 set_token(nil, nil)
66 end
67
68 def consume s
69 if @text == s
70 scan
71 true
72 else
73 false
74 end
75 end
76
77 def expect s
78 if @text == s
79 scan
80 else
81 raise LanguageSyntaxError, "expected '#{s}', found '#{@text}'"
82 end
83 end
84 end
85
86 # Expr ::= Atom | "(" Expr {"," Expr} ")".
87
88 class Parser
89 def initialize s
90 @tokenizer = Tokenizer.new(s)
91 end
92
93 def expr
94 if @tokenizer.consume "("
95 exprs = []
96 e = expr
97 exprs.push e
98 while @tokenizer.consume ","
99 e = expr
100 exprs.push e
101 end
102 @tokenizer.expect ")"
103 r = List.new(exprs)
104 #puts "Returning #{r}"
105 return r
106 else
107 if @tokenizer.type == 'atom'
108 r = Atom.new(@tokenizer.text)
109 @tokenizer.scan
110 #puts "Returning #{r}"
111 return r
112 else
113 raise LanguageSyntaxError, "expected atom, found #{@tokenizer.type}"
114 end
115 end
116 end
117 end
118
119 class Atom
120 def initialize t
121 @t = t
122 end
123
124 def to_s
125 return ".#{@t}"
126 end
127 end
128
129 class List
130 def initialize l
131 @l = l
132 end
133
134 def to_s
135 s = "("
136 for e in @l
137 s += e.to_s + " "
138 end
139 s += ")"
140 return s
141 end
142 end
143
144 ### Main ###
145
146 p = Parser.new("(a, b, (c, d, e), f, ((g)))")
147 print p.expr