diff --git a/lib/liquid.rb b/lib/liquid.rb index a377f95..62b1c80 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -46,6 +46,7 @@ module Liquid end require "liquid/version" +require 'liquid/lexer' require 'liquid/parser' require 'liquid/drop' require 'liquid/extensions' diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb new file mode 100644 index 0000000..228b998 --- /dev/null +++ b/lib/liquid/lexer.rb @@ -0,0 +1,74 @@ +module Liquid + class Token + attr_accessor :type, :contents + def initialize(*args) + @type, @contents = args + end + + def self.[](*args) + Token.new(*args) + end + + def inspect + out = "<#{@type}" + out << ": \'#{@contents}\'" if contents + out << '>' + end + end + + class Lexer + SPECIALS = { + '|' => :pipe, + '.' => :dot, + ':' => :colon, + ',' => :comma + } + + def initialize(input) + @input = input.chars.to_a + end + + def tokenize + @p = 0 + @output = [] + + loop do + consume_whitespace + c = @input[@p] + + # are we out of input? + return @output unless c + + if identifier?(c) + @output << consume_identifier + elsif s = SPECIALS[c] + @output << Token[s] + @p += 1 + end + end + end + + def identifier?(c) + c =~ /^[\w\-]$/ + end + + def whitespace?(c) + c =~ /^\s$/ + end + + def consume_whitespace + while whitespace?(@input[@p]) + @p += 1 + end + end + + def consume_identifier + str = "" + while identifier?(@input[@p]) + str << @input[@p] + @p += 1 + end + Token[:identifier, str] + end + end +end diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index af25996..e7126e7 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -3,13 +3,24 @@ module Liquid # it provides helpers and encapsulates state class Parser def initialize(input) - @tokens = tokenize(input) + l = Lexer.new(input) + @tokens = l.tokenize @p = 0 # pointer to current location end - def tokenize(input) - # "foo.bar | filter: baz, qux" becomes ["foo", ".", "bar", "|", "filter", ":", "baz", ",", "qux"] - input.split(/\b/).map {|tok| tok.strip} + def consume(type) + token = @tokens[@p] + if match && token.type != type + raise SyntaxError, "Expected #{match} but found #{@tokens[@p]}" + end + @p += 1 + token end + + def look(type) + @tokens[@p].type == type + end + + # === General Liquid parsing functions === end end