Fancy StringScanner based lexer

This commit is contained in:
Tristan Hume
2013-07-24 15:39:48 -04:00
parent b20a594f25
commit 84be895db2

View File

@@ -23,13 +23,17 @@ module Liquid
':' => :colon,
',' => :comma
}
IDENTIFIER = /[\w\-]+/
SINGLE_STRING_LITERAL = /'[^\']*'/
DOUBLE_STRING_LITERAL = /"[^\"]*"/
INTEGER_LITERAL = /-?\d+/
FLOAT_LITERAL = /-?\d+(?:\.\d+)?/
def initialize(input)
@input = input
@ss = StringScanner.new(input)
end
def tokenize
@p = 0
@output = []
loop do
@@ -41,62 +45,26 @@ module Liquid
def next_token
consume_whitespace
c = @input[@p]
return nil unless c
if identifier?(c)
identifier
elsif c == '"' || c == '\''
string_literal
elsif s = SPECIALS[c]
@p += 1
Token[s]
return if @ss.eos?
case
when t = @ss.scan(IDENTIFIER) then Token[:id, t]
when t = @ss.scan(SINGLE_STRING_LITERAL) then Token[:string, t]
when t = @ss.scan(DOUBLE_STRING_LITERAL) then Token[:string, t]
when t = @ss.scan(INTEGER_LITERAL) then Token[:integer, t]
when t = @ss.scan(FLOAT_LITERAL) then Token[:float, t]
else
c = @ss.getch
if s = SPECIALS[c]
return Token[s]
end
raise SyntaxError, "Unexpected character #{c}."
end
end
def identifier?(c)
c =~ /^[\w\-]$/
end
def whitespace?(c)
c =~ /^\s$/
end
def consume
c = @input[@p]
@p += 1
c
end
def consume_whitespace
while whitespace?(@input[@p])
@p += 1
end
@ss.skip(/\s*/)
end
def identifier
str = ""
while identifier?(@input[@p])
str << @input[@p]
@p += 1
end
Token[:id, str]
end
def string_literal
quote = consume()
start = @p
while @input[@p] != quote
@p += 1
end
@p += 1 # closing quote
Token[:string, @input[start..(@p-2)]]
end
def number_literal
end
end