Add convenience methods for getting a struct from a ruby object.

If we are trying to get the struct from something other than self, then we should make sure to check the class of the object. This util functions make this easier.
Implement Block#parse_body in C.
2026-01-02 00:05:42 +03:00 · 2014-03-25 16:16:38 -04:00 · 2014-03-25 16:16:38 -04:00 · 2014-03-25 16:16:38 -04:00 · 2014-03-25 16:16:38 -04:00 · 2014-03-25 16:16:38 -04:00
23 changed files with 480 additions and 110 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,5 @@ pkg
 .rvmrc
 .ruby-version
 Gemfile.lock
+*.bundle
+/tmp
--- a/6
+++ b/6
@@ -1,5 +1,6 @@
 require 'rake'
 require 'rake/testtask'
+require 'rake/extensiontask'
 $LOAD_PATH.unshift File.expand_path("../lib", __FILE__)
 require "liquid/version"

@@ -75,3 +76,8 @@ desc "Run example"
 task :example do
  ruby "-w -d -Ilib example/server/server.rb"
 end
+
+Rake::ExtensionTask.new "liquid" do |ext|
+  ext.lib_dir = "lib/liquid"
+end
+Rake::Task[:test].prerequisites << :compile
--- a/ext/liquid/block.c
+++ b/ext/liquid/block.c
@@ -0,0 +1,168 @@
+#include "liquid_ext.h"
+
+VALUE cLiquidBlock;
+ID intern_assert_missing_delimitation, intern_block_delimiter, intern_is_blank, intern_new,
+   intern_new_with_options, intern_tags, intern_unknown_tag, intern_unterminated_tag,
+   intern_unterminated_variable;
+
+struct liquid_tag
+{
+    char *name, *markup;
+    long name_length, markup_length;
+};
+
+static bool parse_tag(struct liquid_tag *tag, char *token, long token_length)
+{
+    // Strip {{ and }} braces
+    token += 2;
+    token_length -= 4;
+
+    char *end = token + token_length;
+    while (token < end && isspace(*token))
+        token++;
+    tag->name = token;
+
+    char c = *token;
+    while (token < end && (isalnum(c) || c == '_'))
+        c = *(++token);
+    tag->name_length = token - tag->name;
+    if (!tag->name_length) {
+        memset(tag, 0, sizeof(*tag));
+        return false;
+    }
+
+    while (token < end && isspace(*token))
+        token++;
+    tag->markup = token;
+
+    char *last = end - 1;
+    while (token < last && isspace(*last))
+        last--;
+    end = last + 1;
+    tag->markup_length = end - token;
+    return true;
+}
+
+static VALUE rb_parse_body(VALUE self, VALUE tokenizerObj)
+{
+    struct liquid_tokenizer *tokenizer = LIQUID_TOKENIZER_GET_STRUCT(tokenizerObj);
+
+    bool blank = true;
+    VALUE nodelist = rb_iv_get(self, "@nodelist");
+    if (nodelist == Qnil) {
+        nodelist = rb_ary_new();
+        rb_iv_set(self, "@nodelist", nodelist);
+    } else {
+        rb_ary_clear(nodelist);
+    }
+
+    struct token token;
+    while (true) {
+        liquid_tokenizer_next(tokenizer, &token);
+        switch (token.type) {
+        case TOKEN_NONE:
+            /*
+             * Make sure that it's ok to end parsing in the current block.
+             * Effectively this method will throw an exception unless the current block is
+             * of type Document
+             */
+            rb_funcall(self, intern_assert_missing_delimitation, 0);
+            goto done;
+        case TOKEN_INVALID:
+        {
+            VALUE token_obj = rb_str_new(token.str, token.length);
+            if (token.str[1] == '%')
+                rb_funcall(self, intern_unterminated_tag, 1, token_obj);
+            else
+                rb_funcall(self, intern_unterminated_variable, 1, token_obj);
+            break;
+        }
+        case TOKEN_TAG:
+        {
+            struct liquid_tag tag;
+            if (!parse_tag(&tag, token.str, token.length)) {
+                // FIXME: provide more appropriate error message
+                rb_funcall(self, intern_unterminated_tag, 1, rb_str_new(token.str, token.length));
+            } else {
+                if (tag.name_length >= 3 && !memcmp(tag.name, "end", 3)) {
+                    VALUE block_delimiter = rb_funcall(self, intern_block_delimiter, 0);
+                    if (TYPE(block_delimiter) == T_STRING &&
+                        tag.name_length == RSTRING_LEN(block_delimiter) &&
+                        !memcmp(tag.name, RSTRING_PTR(block_delimiter), tag.name_length))
+                    {
+                        goto done;
+                    }
+                }
+
+                VALUE tags = rb_funcall(cLiquidTemplate, intern_tags, 0);
+                Check_Type(tags, T_HASH);
+                VALUE tag_name = rb_str_new(tag.name, tag.name_length);
+                VALUE tag_class = rb_hash_lookup(tags, tag_name);
+                VALUE markup = rb_str_new(tag.markup, tag.markup_length);
+                if (tag_class != Qnil) {
+                    VALUE options = rb_iv_get(self, "@options");
+                    if (options == Qnil)
+                        options = rb_hash_new();
+                    VALUE new_tag = rb_funcall(tag_class, intern_new_with_options, 4,
+                                               tag_name, markup, tokenizerObj, options);
+                    if (blank) {
+                        VALUE blank_block = rb_funcall(new_tag, intern_is_blank, 0);
+                        if (blank_block == Qnil || blank_block == Qfalse)
+                            blank = false;
+                    }
+                    rb_ary_push(nodelist, new_tag);
+                } else {
+                    rb_funcall(self, intern_unknown_tag, 3, tag_name, markup, tokenizerObj);
+                    /*
+                     * multi-block tags may store the nodelist in a block array on unknown_tag
+                     * then replace @nodelist with a new array. We need to use the new array
+                     * for the block following the tag token.
+                     */
+                    nodelist = rb_iv_get(self, "@nodelist");
+                }
+            }
+            break;
+        }
+        case TOKEN_VARIABLE:
+        {
+            VALUE markup = rb_str_new(token.str + 2, token.length - 4);
+            VALUE options = rb_iv_get(self, "@options");
+            VALUE new_var = rb_funcall(cLiquidVariable, intern_new, 2, markup, options);
+            rb_ary_push(nodelist, new_var);
+            blank = false;
+            break;
+        }
+        case TOKEN_STRING:
+            rb_ary_push(nodelist, rb_str_new(token.str, token.length));
+            if (blank) {
+                int i;
+                for (i = 0; i < token.length; i++) {
+                    if (!isspace(token.str[i])) {
+                        blank = false;
+                        break;
+                    }
+                }
+            }
+            break;
+        }
+    }
+done:
+    rb_iv_set(self, "@blank", blank ? Qtrue : Qfalse);
+    return Qnil;
+}
+
+void init_liquid_block()
+{
+    intern_assert_missing_delimitation = rb_intern("assert_missing_delimitation!");
+    intern_block_delimiter = rb_intern("block_delimiter");
+    intern_is_blank = rb_intern("blank?");
+    intern_new = rb_intern("new");
+    intern_new_with_options = rb_intern("new_with_options");
+    intern_tags = rb_intern("tags");
+    intern_unknown_tag = rb_intern("unknown_tag");
+    intern_unterminated_tag = rb_intern("unterminated_tag");
+    intern_unterminated_variable = rb_intern("unterminated_variable");
+
+    cLiquidBlock = rb_define_class_under(mLiquid, "Block", cLiquidTag);
+    rb_define_method(cLiquidBlock, "parse_body", rb_parse_body, 1);
+}
--- a/ext/liquid/block.h
+++ b/ext/liquid/block.h
@@ -0,0 +1,8 @@
+#ifndef LIQUID_BLOCK_H
+#define LIQUID_BLOCK_H
+
+void init_liquid_block();
+
+extern VALUE cLiquidBlock;
+
+#endif
--- a/ext/liquid/extconf.rb
+++ b/ext/liquid/extconf.rb
@@ -0,0 +1,3 @@
+require 'mkmf'
+$CFLAGS << ' -Wall'
+create_makefile("liquid/liquid")
--- a/ext/liquid/liquid_ext.c
+++ b/ext/liquid/liquid_ext.c
@@ -0,0 +1,15 @@
+#include "liquid_ext.h"
+
+VALUE mLiquid;
+VALUE cLiquidTemplate, cLiquidTag, cLiquidVariable;
+
+void Init_liquid(void)
+{
+    mLiquid = rb_define_module("Liquid");
+    cLiquidTemplate = rb_define_class_under(mLiquid, "Template", rb_cObject);
+    cLiquidTag = rb_define_class_under(mLiquid, "Tag", rb_cObject);
+    cLiquidVariable = rb_define_class_under(mLiquid, "Variable", rb_cObject);
+
+    init_liquid_tokenizer();
+    init_liquid_block();
+}
--- a/ext/liquid/liquid_ext.h
+++ b/ext/liquid/liquid_ext.h
@@ -0,0 +1,15 @@
+#ifndef LIQUID_EXT_H
+#define LIQUID_EXT_H
+
+#include <stdbool.h>
+#include <ctype.h>
+#include <ruby.h>
+
+#include "tokenizer.h"
+#include "block.h"
+#include "utils.h"
+
+extern VALUE mLiquid;
+extern VALUE cLiquidTemplate, cLiquidTag, cLiquidVariable;
+
+#endif
--- a/ext/liquid/tokenizer.c
+++ b/ext/liquid/tokenizer.c
@@ -0,0 +1,113 @@
+#include "liquid_ext.h"
+
+VALUE cLiquidTokenizer;
+
+static void free_tokenizer(void *ptr)
+{
+    struct liquid_tokenizer *tokenizer = ptr;
+    xfree(tokenizer);
+}
+
+static VALUE rb_allocate(VALUE klass)
+{
+    VALUE obj;
+    struct liquid_tokenizer *tokenizer;
+
+    obj = Data_Make_Struct(klass, struct liquid_tokenizer, NULL, free_tokenizer, tokenizer);
+    return obj;
+}
+
+static VALUE rb_initialize(VALUE self, VALUE source)
+{
+    struct liquid_tokenizer *tokenizer;
+
+    Check_Type(source, T_STRING);
+    Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
+    tokenizer->cursor = RSTRING_PTR(source);
+    tokenizer->length = RSTRING_LEN(source);
+    return Qnil;
+}
+
+void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token)
+{
+    if (tokenizer->length <= 0) {
+        memset(token, 0, sizeof(*token));
+        return;
+    }
+    token->type = TOKEN_STRING;
+
+    char *cursor = tokenizer->cursor;
+    char *last = tokenizer->cursor + tokenizer->length - 1;
+
+    while (cursor < last) {
+        if (*cursor++ != '{')
+            continue;
+
+        char c = *cursor++;
+        if (c != '%' && c != '{')
+            continue;
+        if (cursor - tokenizer->cursor > 2) {
+            token->type = TOKEN_STRING;
+            cursor -= 2;
+            goto found;
+        }
+        char *incomplete_end = cursor;
+        token->type = TOKEN_INVALID;
+        if (c == '%') {
+            while (cursor < last) {
+                if (*cursor++ != '%')
+                    continue;
+                c = *cursor++;
+                while (c == '%' && cursor <= last)
+                    c = *cursor++;
+                if (c != '}')
+                    continue;
+                token->type = TOKEN_TAG;
+                goto found;
+            }
+            cursor = incomplete_end;
+            goto found;
+        } else {
+            while (cursor < last) {
+                if (*cursor++ != '}')
+                    continue;
+                if (*cursor++ != '}') {
+                    incomplete_end = cursor - 1;
+                    continue;
+                }
+                token->type = TOKEN_VARIABLE;
+                goto found;
+            }
+            cursor = incomplete_end;
+            goto found;
+        }
+    }
+    cursor = last + 1;
+found:
+    token->str = tokenizer->cursor;
+    token->length = cursor - tokenizer->cursor;
+    tokenizer->cursor += token->length;
+    tokenizer->length -= token->length;
+}
+
+static VALUE rb_next(VALUE self)
+{
+    struct liquid_tokenizer *tokenizer;
+    Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
+
+    struct token token;
+    liquid_tokenizer_next(tokenizer, &token);
+    if (token.type == TOKEN_NONE)
+        return Qnil;
+
+    return rb_str_new(token.str, token.length);
+}
+
+void init_liquid_tokenizer()
+{
+    cLiquidTokenizer = rb_define_class_under(mLiquid, "Tokenizer", rb_cObject);
+    rb_define_alloc_func(cLiquidTokenizer, rb_allocate);
+    rb_define_method(cLiquidTokenizer, "initialize", rb_initialize, 1);
+    rb_define_method(cLiquidTokenizer, "next", rb_next, 0);
+    rb_define_alias(cLiquidTokenizer, "shift", "next");
+}
--- a/ext/liquid/tokenizer.h
+++ b/ext/liquid/tokenizer.h
@@ -0,0 +1,30 @@
+#ifndef LIQUID_TOKENIZER_H
+#define LIQUID_TOKENIZER_H
+
+extern VALUE cLiquidTokenizer;
+
+enum token_type {
+    TOKEN_NONE,
+    TOKEN_INVALID,
+    TOKEN_STRING,
+    TOKEN_TAG,
+    TOKEN_VARIABLE
+};
+
+struct token {
+    enum token_type type;
+    char *str;
+    int length;
+};
+
+struct liquid_tokenizer {
+    char *cursor;
+    int length;
+};
+
+void init_liquid_tokenizer();
+void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token);
+
+#define LIQUID_TOKENIZER_GET_STRUCT(obj) ((struct liquid_tokenizer *)obj_get_data_ptr(obj, cLiquidTokenizer))
+
+#endif
--- a/ext/liquid/utils.c
+++ b/ext/liquid/utils.c
@@ -0,0 +1,21 @@
+#include <ruby.h>
+
+void raise_type_error(VALUE expected, VALUE got)
+{
+    rb_raise(rb_eTypeError, "wrong argument type %s (expected %s)",
+                             rb_class2name(got), rb_class2name(expected));
+}
+
+void check_class(VALUE obj, int type, VALUE klass)
+{
+    Check_Type(obj, type);
+    VALUE obj_klass = RBASIC_CLASS(obj);
+    if (obj_klass != klass)
+        raise_type_error(klass, obj_klass);
+}
+
+void *obj_get_data_ptr(VALUE obj, VALUE klass)
+{
+    check_class(obj, T_DATA, klass);
+    return DATA_PTR(obj);
+}
--- a/ext/liquid/utils.h
+++ b/ext/liquid/utils.h
@@ -0,0 +1,8 @@
+#ifndef LIQUID_UTILS_H
+#define LIQUID_UTILS_H
+
+void raise_type_error(VALUE expected, VALUE got);
+void check_class(VALUE klass);
+void *obj_get_data_ptr(VALUE obj, VALUE klass);
+
+#endif
--- a/lib/liquid.rb
+++ b/lib/liquid.rb
@@ -30,16 +30,13 @@ module Liquid
  VariableSegment             = /[\w\-]/
  VariableStart               = /\{\{/
  VariableEnd                 = /\}\}/
-  VariableIncompleteEnd       = /\}\}?/
  QuotedString                = /"[^"]*"|'[^']*'/
  QuotedFragment              = /#{QuotedString}|(?:[^\s,\|'"]|#{QuotedString})+/o
  TagAttributes               = /(\w+)\s*\:\s*(#{QuotedFragment})/o
-  AnyStartingTag              = /\{\{|\{\%/
-  PartialTemplateParser       = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om
-  TemplateParser              = /(#{PartialTemplateParser}|#{AnyStartingTag})/om
  VariableParser              = /\[[^\]]+\]|#{VariableSegment}+\??/o
 end

+require 'liquid/liquid'
 require "liquid/version"
 require 'liquid/lexer'
 require 'liquid/parser'
--- a/lib/liquid/block.rb
+++ b/lib/liquid/block.rb
@@ -1,82 +1,26 @@
 module Liquid
  class Block < Tag
-    IsTag             = /\A#{TagStart}/o
-    IsVariable        = /\A#{VariableStart}/o
-    FullToken         = /\A#{TagStart}\s*(\w+)\s*(.*)?#{TagEnd}\z/om
-    ContentOfVariable = /\A#{VariableStart}(.*)#{VariableEnd}\z/om
+    def initialize(tag_name, markup, tokens)
+      super
+      parse_body(tokens)
+    end

    def blank?
      @blank || false
    end

-    def parse(tokens)
-      @blank = true
-      @nodelist ||= []
-      @nodelist.clear
-
-      # All child tags of the current block.
-      @children = []
-
-      while token = tokens.shift
-        case token
-        when IsTag
-          if token =~ FullToken
-
-            # if we found the proper block delimiter just end parsing here and let the outer block
-            # proceed
-            if block_delimiter == $1
-              end_tag
-              return
-            end
-
-            # fetch the tag from registered blocks
-            if tag = Template.tags[$1]
-              new_tag = tag.parse($1, $2, tokens, @options)
-              @blank &&= new_tag.blank?
-              @nodelist << new_tag
-              @children << new_tag
-            else
-              # this tag is not registered with the system
-              # pass it to the current block for special handling or error reporting
-              unknown_tag($1, $2, tokens)
-            end
-          else
-            raise SyntaxError.new(options[:locale].t("errors.syntax.tag_termination".freeze, :token => token, :tag_end => TagEnd.inspect))
-          end
-        when IsVariable
-          new_var = create_variable(token)
-          @nodelist << new_var
-          @children << new_var
-          @blank = false
-        when ''.freeze
-          # pass
-        else
-          @nodelist << token
-          @blank &&= (token =~ /\A\s*\z/)
-        end
-      end
-
-      # Make sure that it's ok to end parsing in the current block.
-      # Effectively this method will throw an exception unless the current block is
-      # of type Document
-      assert_missing_delimitation!
-    end
-
    # warnings of this block and all sub-tags
    def warnings
      all_warnings = []
      all_warnings.concat(@warnings) if @warnings

-      (@children || []).each do |node|
-        all_warnings.concat(node.warnings || [])
+      (nodelist || []).each do |node|
+        all_warnings.concat(node.warnings || []) if node.respond_to?(:warnings)
      end

      all_warnings
    end

-    def end_tag
-    end
-
    def unknown_tag(tag, params, tokens)
      case tag
      when 'else'.freeze
@@ -112,6 +56,14 @@ module Liquid

    protected

+    def unterminated_variable(token)
+      raise SyntaxError.new(options[:locale].t("errors.syntax.variable_termination".freeze, :token => token, :tag_end => VariableEnd.inspect))
+    end
+
+    def unterminated_tag(token)
+      raise SyntaxError.new(options[:locale].t("errors.syntax.tag_termination".freeze, :token => token, :tag_end => TagEnd.inspect))
+    end
+
    def assert_missing_delimitation!
      raise SyntaxError.new(options[:locale].t("errors.syntax.tag_never_closed".freeze, :block_name => block_name))
    end
--- a/lib/liquid/document.rb
+++ b/lib/liquid/document.rb
@@ -7,7 +7,7 @@ module Liquid

    # There isn't a real delimiter
    def block_delimiter
-      []
+      nil
    end

    # Document blocks don't need to be terminated since they are not actually opened
--- a/lib/liquid/tag.rb
+++ b/lib/liquid/tag.rb
@@ -19,9 +19,6 @@ module Liquid
      @options    = options
    end

-    def parse(tokens)
-    end
-
    def name
      self.class.name.downcase
    end
--- a/lib/liquid/tags/ifchanged.rb
+++ b/lib/liquid/tags/ifchanged.rb
@@ -4,7 +4,7 @@ module Liquid
    def render(context)
      context.stack do

-        output = render_all(@nodelist, context)
+        output = super

        if output != context.registers[:ifchanged]
          context.registers[:ifchanged] = output
--- a/lib/liquid/tags/include.rb
+++ b/lib/liquid/tags/include.rb
@@ -35,9 +35,6 @@ module Liquid
      end
    end

-    def parse(tokens)
-    end
-
    def blank?
      false
    end
--- a/lib/liquid/tags/raw.rb
+++ b/lib/liquid/tags/raw.rb
@@ -2,16 +2,13 @@ module Liquid
  class Raw < Block
    FullTokenPossiblyInvalid = /\A(.*)#{TagStart}\s*(\w+)\s*(.*)?#{TagEnd}\z/om

-    def parse(tokens)
+    def parse_body(tokens)
      @nodelist ||= []
      @nodelist.clear
      while token = tokens.shift
        if token =~ FullTokenPossiblyInvalid
          @nodelist << $1 if $1 != "".freeze
-          if block_delimiter == $2
-            end_tag
-            return
-          end
+          return if block_delimiter == $2
        end
        @nodelist << token if not token.empty?
      end
--- a/lib/liquid/tags/table_row.rb
+++ b/lib/liquid/tags/table_row.rb
@@ -54,7 +54,7 @@ module Liquid

          col += 1

-          result << "<td class=\"col#{col}\">" << render_all(@nodelist, context) << '</td>'
+          result << "<td class=\"col#{col}\">" << super << '</td>'

          if col == cols and (index != length - 1)
            col  = 0
--- a/lib/liquid/template.rb
+++ b/lib/liquid/template.rb
@@ -162,16 +162,9 @@ module Liquid

    private

-    # Uses the <tt>Liquid::TemplateParser</tt> regexp to tokenize the passed source
    def tokenize(source)
      source = source.source if source.respond_to?(:source)
-      return [] if source.to_s.empty?
-      tokens = source.split(TemplateParser)
-
-      # removes the rogue empty element at the beginning of the array
-      tokens.shift if tokens[0] and tokens[0].empty?
-
-      tokens
+      Tokenizer.new(source.to_s)
    end

  end
--- a/liquid.gemspec
+++ b/liquid.gemspec
@@ -18,13 +18,17 @@ Gem::Specification.new do |s|
  s.required_rubygems_version = ">= 1.3.7"

  s.test_files  = Dir.glob("{test}/**/*")
-  s.files       = Dir.glob("{lib}/**/*") + %w(MIT-LICENSE README.md)
+  s.files       = Dir.glob("{lib,ext}/**/*") + %w(MIT-LICENSE README.md)
+  s.extensions  = ['ext/liquid/extconf.rb']

  s.extra_rdoc_files  = ["History.md", "README.md"]

  s.require_path = "lib"

-  s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0")
  s.add_development_dependency 'rake'
  s.add_development_dependency 'activesupport'
+  if RUBY_ENGINE == 'ruby'
+    s.add_development_dependency 'rake-compiler'
+    s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0")
+  end
 end
--- a/test/liquid/template_test.rb
+++ b/test/liquid/template_test.rb
@@ -25,26 +25,6 @@ end
 class TemplateTest < Test::Unit::TestCase
  include Liquid

-  def test_tokenize_strings
-    assert_equal [' '], Template.new.send(:tokenize, ' ')
-    assert_equal ['hello world'], Template.new.send(:tokenize, 'hello world')
-  end
-
-  def test_tokenize_variables
-    assert_equal ['{{funk}}'], Template.new.send(:tokenize, '{{funk}}')
-    assert_equal [' ', '{{funk}}', ' '], Template.new.send(:tokenize, ' {{funk}} ')
-    assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], Template.new.send(:tokenize, ' {{funk}} {{so}} {{brother}} ')
-    assert_equal [' ', '{{  funk  }}', ' '], Template.new.send(:tokenize, ' {{  funk  }} ')
-  end
-
-  def test_tokenize_blocks
-    assert_equal ['{%comment%}'], Template.new.send(:tokenize, '{%comment%}')
-    assert_equal [' ', '{%comment%}', ' '], Template.new.send(:tokenize, ' {%comment%} ')
-
-    assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], Template.new.send(:tokenize, ' {%comment%} {%endcomment%} ')
-    assert_equal ['  ', '{% comment %}', ' ', '{% endcomment %}', ' '], Template.new.send(:tokenize, "  {% comment %} {% endcomment %} ")
-  end
-
  def test_instance_assigns_persist_on_same_template_object_between_parses
    t = Template.new
    assert_equal 'from instance assigns', t.parse("{% assign foo = 'from instance assigns' %}{{ foo }}").render!
--- a/test/liquid/tokenizer_test.rb
+++ b/test/liquid/tokenizer_test.rb
@@ -0,0 +1,64 @@
+require 'test_helper'
+
+class TokenizerTest < Test::Unit::TestCase
+  def test_tokenize_strings
+    assert_equal [' '], tokenize(' ')
+    assert_equal ['hello world'], tokenize('hello world')
+  end
+
+  def test_tokenize_variables
+    assert_equal ['{{funk}}'], tokenize('{{funk}}')
+    assert_equal [' ', '{{funk}}', ' '], tokenize(' {{funk}} ')
+    assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], tokenize(' {{funk}} {{so}} {{brother}} ')
+    assert_equal [' ', '{{  funk  }}', ' '], tokenize(' {{  funk  }} ')
+  end
+
+  def test_tokenize_blocks
+    assert_equal ['{%comment%}'], tokenize('{%comment%}')
+    assert_equal [' ', '{%comment%}', ' '], tokenize(' {%comment%} ')
+
+    assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], tokenize(' {%comment%} {%endcomment%} ')
+    assert_equal ['  ', '{% comment %}', ' ', '{% endcomment %}', ' '], tokenize("  {% comment %} {% endcomment %} ")
+  end
+
+  def test_tokenize_incomplete_end
+    assert_tokens 'before{{ incomplete }after', ['before', '{{ incomplete }', 'after']
+    assert_tokens 'before{% incomplete %after', ['before', '{%', ' incomplete %after']
+  end
+
+  def test_tokenize_no_end
+    assert_tokens 'before{{ unterminated ', ['before', '{{', ' unterminated ']
+    assert_tokens 'before{% unterminated ', ['before', '{%', ' unterminated ']
+  end
+
+  private
+
+  def assert_tokens(source, expected)
+    assert_equal expected, tokenize(source)
+    assert_equal expected, old_tokenize(source)
+  end
+
+  def tokenize(source)
+    tokenizer = Liquid::Tokenizer.new(source)
+    tokens = []
+    while token = tokenizer.next
+      tokens << token
+    end
+    tokens
+  end
+
+  AnyStartingTag        = /\{\{|\{\%/
+  VariableIncompleteEnd = /\}\}?/
+  PartialTemplateParser = /#{Liquid::TagStart}.*?#{Liquid::TagEnd}|#{Liquid::VariableStart}.*?#{VariableIncompleteEnd}/o
+  TemplateParser        = /(#{PartialTemplateParser}|#{AnyStartingTag})/o
+
+  def old_tokenize(source)
+    return [] if source.to_s.empty?
+    tokens = source.split(TemplateParser)
+
+    # removes the rogue empty element at the beginning of the array
+    tokens.shift if tokens[0] and tokens[0].empty?
+
+    tokens
+  end
+end
Author	SHA1	Message	Date
Dylan Thacker-Smith	8a93a7ff55	Add convenience methods for getting a struct from a ruby object. If we are trying to get the struct from something other than self, then we should make sure to check the class of the object. This util functions make this easier.	2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith	e2974ed95f	Implement Block#parse_body in C.	2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith	99f950c167	Rename Block#parse to parse_body since that is how it is being used.	2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith	dc78e565ab	Move the parse method out of Tag, only blocks need the body parsed. The parse method should be renamed to something like parse_body, since that is how it is used, and no non-block tags were using the parse method.	2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith	0fac50aea7	Use super rather than render_all in single block render classes.	2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith	8e45b44b21	Avoid keeping track of two lists of nodes during parsing.	2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith	c0832ce0d1	Return nil in Document#block_delimiter rather than an empty array. The block delimiter is normally a string, so nil makes more sense when there is no delimiter. We also don't want to allocate an array for no reason.	2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith	802a6671cb	Remove unused Block#end_tag method. Although the method is called, it is defined with an empty body and not overridden to do anything else.	2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith	87472e73b6	Implement tokenization in a C extension.	2014-03-25 16:15:02 -04:00