From 2332d86156e19557df18bff6d38c2b3dcf3b39a0 Mon Sep 17 00:00:00 2001 From: Tristan Hume Date: Wed, 24 Jul 2013 11:35:00 -0400 Subject: [PATCH] Slow lexer and parser scaffold. --- lib/liquid.rb | 2 ++ lib/liquid/lexer.rb | 61 ++++++++++++++++++++++++++++++++++++++++++++ lib/liquid/parser.rb | 10 ++++++++ 3 files changed, 73 insertions(+) create mode 100644 lib/liquid/lexer.rb create mode 100644 lib/liquid/parser.rb diff --git a/lib/liquid.rb b/lib/liquid.rb index cb84a6e..6d2fe8a 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -46,6 +46,8 @@ module Liquid end require "liquid/version" +require 'liquid/parser' +require 'liquid/lexer' require 'liquid/drop' require 'liquid/extensions' require 'liquid/errors' diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb new file mode 100644 index 0000000..eef9c9c --- /dev/null +++ b/lib/liquid/lexer.rb @@ -0,0 +1,61 @@ +module Liquid + class Lexer + SPECIALS = { + '|' => :pipe, + '.' => :dot, + ':' => :colon, + ',' => :comma + } + + def tokenize(input) + @p = 0 + @output = [] + @input = input.chars.to_a + + loop do + consume_whitespace + c = @input[@p] + return @output unless c + + if identifier?(c) + @output << consume_identifier + elsif s = SPECIALS[c] + @output << s + @p += 1 + end + end + end + + def benchmark + require 'benchmark' + s = "bob.hello | filter: lol, troll" + Benchmark.bmbm do |x| + x.report('c') { 100_000.times { tokenize(s) }} + x.report('r') { 100_000.times { s.split(/\b/).map {|y| y.strip} }} + end + end + + def identifier?(c) + c =~ /^[\w\-]$/ + end + + def whitespace?(c) + c =~ /^\s$/ + end + + def consume_whitespace + while whitespace?(@input[@p]) + @p += 1 + end + end + + def consume_identifier + str = "" + while identifier?(@input[@p]) + str << @input[@p] + @p += 1 + end + str + end + end +end diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb new file mode 100644 index 0000000..47b5765 --- /dev/null +++ b/lib/liquid/parser.rb @@ -0,0 +1,10 @@ +module Liquid + # This class is used by tags to parse themselves + # it provides helpers and encapsulates state + class Parser + def initialize(input) + @input = input + @p = 0 # pointer to current location + end + end +end