From 84be895db236673fa9e86ae87a53ebcf9b268a40 Mon Sep 17 00:00:00 2001 From: Tristan Hume Date: Wed, 24 Jul 2013 15:39:48 -0400 Subject: [PATCH] Fancy StringScanner based lexer --- lib/liquid/lexer.rb | 72 +++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 52 deletions(-) diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 6f1fabe..e8c72f7 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -23,13 +23,17 @@ module Liquid ':' => :colon, ',' => :comma } + IDENTIFIER = /[\w\-]+/ + SINGLE_STRING_LITERAL = /'[^\']*'/ + DOUBLE_STRING_LITERAL = /"[^\"]*"/ + INTEGER_LITERAL = /-?\d+/ + FLOAT_LITERAL = /-?\d+(?:\.\d+)?/ def initialize(input) - @input = input + @ss = StringScanner.new(input) end def tokenize - @p = 0 @output = [] loop do @@ -41,62 +45,26 @@ module Liquid def next_token consume_whitespace - c = @input[@p] - return nil unless c - - if identifier?(c) - identifier - elsif c == '"' || c == '\'' - string_literal - elsif s = SPECIALS[c] - @p += 1 - Token[s] + return if @ss.eos? + + case + when t = @ss.scan(IDENTIFIER) then Token[:id, t] + when t = @ss.scan(SINGLE_STRING_LITERAL) then Token[:string, t] + when t = @ss.scan(DOUBLE_STRING_LITERAL) then Token[:string, t] + when t = @ss.scan(INTEGER_LITERAL) then Token[:integer, t] + when t = @ss.scan(FLOAT_LITERAL) then Token[:float, t] else + c = @ss.getch + if s = SPECIALS[c] + return Token[s] + end + raise SyntaxError, "Unexpected character #{c}." end end - def identifier?(c) - c =~ /^[\w\-]$/ - end - - def whitespace?(c) - c =~ /^\s$/ - end - - def consume - c = @input[@p] - @p += 1 - c - end - def consume_whitespace - while whitespace?(@input[@p]) - @p += 1 - end + @ss.skip(/\s*/) end - - def identifier - str = "" - while identifier?(@input[@p]) - str << @input[@p] - @p += 1 - end - Token[:id, str] - end - - def string_literal - quote = consume() - - start = @p - while @input[@p] != quote - @p += 1 - end - @p += 1 # closing quote - - Token[:string, @input[start..(@p-2)]] - end - - def number_literal end end