Implement tokenization in a C extension.

2026-01-13 21:45:44 +03:00 · 2014-03-26 03:20:34 -04:00
24 changed files with 199 additions and 332 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,5 +6,7 @@ pkg
 .rvmrc
 .ruby-version
 Gemfile.lock
+/ext/liquid/Makefile
+*.o
 *.bundle
 /tmp
--- a/10
+++ b/10
@@ -1,6 +1,5 @@
 require 'rake'
 require 'rake/testtask'
-require 'rake/extensiontask'
 $LOAD_PATH.unshift File.expand_path("../lib", __FILE__)
 require "liquid/version"

@@ -77,7 +76,10 @@ task :example do
  ruby "-w -d -Ilib example/server/server.rb"
 end

-Rake::ExtensionTask.new "liquid" do |ext|
-  ext.lib_dir = "lib/liquid"
+if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
+  require 'rake/extensiontask'
+  Rake::ExtensionTask.new "liquid" do |ext|
+    ext.lib_dir = "lib/liquid"
+  end
+  Rake::Task[:test].prerequisites << :compile
 end
-Rake::Task[:test].prerequisites << :compile
--- a/ext/liquid/block.c
+++ b/ext/liquid/block.c
@@ -1,168 +0,0 @@
-#include "liquid_ext.h"
-
-VALUE cLiquidBlock;
-ID intern_assert_missing_delimitation, intern_block_delimiter, intern_is_blank, intern_new,
-   intern_new_with_options, intern_tags, intern_unknown_tag, intern_unterminated_tag,
-   intern_unterminated_variable;
-
-struct liquid_tag
-{
-    char *name, *markup;
-    long name_length, markup_length;
-};
-
-static bool parse_tag(struct liquid_tag *tag, char *token, long token_length)
-{
-    // Strip {{ and }} braces
-    token += 2;
-    token_length -= 4;
-
-    char *end = token + token_length;
-    while (token < end && isspace(*token))
-        token++;
-    tag->name = token;
-
-    char c = *token;
-    while (token < end && (isalnum(c) || c == '_'))
-        c = *(++token);
-    tag->name_length = token - tag->name;
-    if (!tag->name_length) {
-        memset(tag, 0, sizeof(*tag));
-        return false;
-    }
-
-    while (token < end && isspace(*token))
-        token++;
-    tag->markup = token;
-
-    char *last = end - 1;
-    while (token < last && isspace(*last))
-        last--;
-    end = last + 1;
-    tag->markup_length = end - token;
-    return true;
-}
-
-static VALUE rb_parse_body(VALUE self, VALUE tokenizerObj)
-{
-    struct liquid_tokenizer *tokenizer = LIQUID_TOKENIZER_GET_STRUCT(tokenizerObj);
-
-    bool blank = true;
-    VALUE nodelist = rb_iv_get(self, "@nodelist");
-    if (nodelist == Qnil) {
-        nodelist = rb_ary_new();
-        rb_iv_set(self, "@nodelist", nodelist);
-    } else {
-        rb_ary_clear(nodelist);
-    }
-
-    struct token token;
-    while (true) {
-        liquid_tokenizer_next(tokenizer, &token);
-        switch (token.type) {
-        case TOKEN_NONE:
-            /*
-             * Make sure that it's ok to end parsing in the current block.
-             * Effectively this method will throw an exception unless the current block is
-             * of type Document
-             */
-            rb_funcall(self, intern_assert_missing_delimitation, 0);
-            goto done;
-        case TOKEN_INVALID:
-        {
-            VALUE token_obj = rb_str_new(token.str, token.length);
-            if (token.str[1] == '%')
-                rb_funcall(self, intern_unterminated_tag, 1, token_obj);
-            else
-                rb_funcall(self, intern_unterminated_variable, 1, token_obj);
-            break;
-        }
-        case TOKEN_TAG:
-        {
-            struct liquid_tag tag;
-            if (!parse_tag(&tag, token.str, token.length)) {
-                // FIXME: provide more appropriate error message
-                rb_funcall(self, intern_unterminated_tag, 1, rb_str_new(token.str, token.length));
-            } else {
-                if (tag.name_length >= 3 && !memcmp(tag.name, "end", 3)) {
-                    VALUE block_delimiter = rb_funcall(self, intern_block_delimiter, 0);
-                    if (TYPE(block_delimiter) == T_STRING &&
-                        tag.name_length == RSTRING_LEN(block_delimiter) &&
-                        !memcmp(tag.name, RSTRING_PTR(block_delimiter), tag.name_length))
-                    {
-                        goto done;
-                    }
-                }
-
-                VALUE tags = rb_funcall(cLiquidTemplate, intern_tags, 0);
-                Check_Type(tags, T_HASH);
-                VALUE tag_name = rb_str_new(tag.name, tag.name_length);
-                VALUE tag_class = rb_hash_lookup(tags, tag_name);
-                VALUE markup = rb_str_new(tag.markup, tag.markup_length);
-                if (tag_class != Qnil) {
-                    VALUE options = rb_iv_get(self, "@options");
-                    if (options == Qnil)
-                        options = rb_hash_new();
-                    VALUE new_tag = rb_funcall(tag_class, intern_new_with_options, 4,
-                                               tag_name, markup, tokenizerObj, options);
-                    if (blank) {
-                        VALUE blank_block = rb_funcall(new_tag, intern_is_blank, 0);
-                        if (blank_block == Qnil || blank_block == Qfalse)
-                            blank = false;
-                    }
-                    rb_ary_push(nodelist, new_tag);
-                } else {
-                    rb_funcall(self, intern_unknown_tag, 3, tag_name, markup, tokenizerObj);
-                    /*
-                     * multi-block tags may store the nodelist in a block array on unknown_tag
-                     * then replace @nodelist with a new array. We need to use the new array
-                     * for the block following the tag token.
-                     */
-                    nodelist = rb_iv_get(self, "@nodelist");
-                }
-            }
-            break;
-        }
-        case TOKEN_VARIABLE:
-        {
-            VALUE markup = rb_str_new(token.str + 2, token.length - 4);
-            VALUE options = rb_iv_get(self, "@options");
-            VALUE new_var = rb_funcall(cLiquidVariable, intern_new, 2, markup, options);
-            rb_ary_push(nodelist, new_var);
-            blank = false;
-            break;
-        }
-        case TOKEN_STRING:
-            rb_ary_push(nodelist, rb_str_new(token.str, token.length));
-            if (blank) {
-                int i;
-                for (i = 0; i < token.length; i++) {
-                    if (!isspace(token.str[i])) {
-                        blank = false;
-                        break;
-                    }
-                }
-            }
-            break;
-        }
-    }
-done:
-    rb_iv_set(self, "@blank", blank ? Qtrue : Qfalse);
-    return Qnil;
-}
-
-void init_liquid_block()
-{
-    intern_assert_missing_delimitation = rb_intern("assert_missing_delimitation!");
-    intern_block_delimiter = rb_intern("block_delimiter");
-    intern_is_blank = rb_intern("blank?");
-    intern_new = rb_intern("new");
-    intern_new_with_options = rb_intern("new_with_options");
-    intern_tags = rb_intern("tags");
-    intern_unknown_tag = rb_intern("unknown_tag");
-    intern_unterminated_tag = rb_intern("unterminated_tag");
-    intern_unterminated_variable = rb_intern("unterminated_variable");
-
-    cLiquidBlock = rb_define_class_under(mLiquid, "Block", cLiquidTag);
-    rb_define_method(cLiquidBlock, "parse_body", rb_parse_body, 1);
-}
--- a/ext/liquid/block.h
+++ b/ext/liquid/block.h
@@ -1,8 +0,0 @@
-#ifndef LIQUID_BLOCK_H
-#define LIQUID_BLOCK_H
-
-void init_liquid_block();
-
-extern VALUE cLiquidBlock;
-
-#endif
--- a/ext/liquid/extconf.rb
+++ b/ext/liquid/extconf.rb
@@ -1,3 +1,4 @@
 require 'mkmf'
-$CFLAGS << ' -Wall'
+$CFLAGS << ' -Wall -Werror'
+$warnflags.gsub!(/-Wdeclaration-after-statement/, "")
 create_makefile("liquid/liquid")
--- a/ext/liquid/liquid.c
+++ b/ext/liquid/liquid.c
@@ -0,0 +1,9 @@
+#include "liquid.h"
+
+VALUE mLiquid;
+
+void Init_liquid(void)
+{
+    mLiquid = rb_define_module("Liquid");
+    init_liquid_tokenizer();
+}
--- a/ext/liquid/liquid.h
+++ b/ext/liquid/liquid.h
@@ -0,0 +1,11 @@
+#ifndef LIQUID_H
+#define LIQUID_H
+
+#include <ruby.h>
+#include <stdbool.h>
+
+#include "tokenizer.h"
+
+extern VALUE mLiquid;
+
+#endif
--- a/ext/liquid/liquid_ext.c
+++ b/ext/liquid/liquid_ext.c
@@ -1,15 +0,0 @@
-#include "liquid_ext.h"
-
-VALUE mLiquid;
-VALUE cLiquidTemplate, cLiquidTag, cLiquidVariable;
-
-void Init_liquid(void)
-{
-    mLiquid = rb_define_module("Liquid");
-    cLiquidTemplate = rb_define_class_under(mLiquid, "Template", rb_cObject);
-    cLiquidTag = rb_define_class_under(mLiquid, "Tag", rb_cObject);
-    cLiquidVariable = rb_define_class_under(mLiquid, "Variable", rb_cObject);
-
-    init_liquid_tokenizer();
-    init_liquid_block();
-}
--- a/ext/liquid/liquid_ext.h
+++ b/ext/liquid/liquid_ext.h
@@ -1,15 +0,0 @@
-#ifndef LIQUID_EXT_H
-#define LIQUID_EXT_H
-
-#include <stdbool.h>
-#include <ctype.h>
-#include <ruby.h>
-
-#include "tokenizer.h"
-#include "block.h"
-#include "utils.h"
-
-extern VALUE mLiquid;
-extern VALUE cLiquidTemplate, cLiquidTag, cLiquidVariable;
-
-#endif
--- a/ext/liquid/tokenizer.c
+++ b/ext/liquid/tokenizer.c
@@ -1,43 +1,66 @@
-#include "liquid_ext.h"
+#include "liquid.h"

 VALUE cLiquidTokenizer;

-static void free_tokenizer(void *ptr)
+static void tokenizer_mark(void *ptr) {
+    tokenizer_t *tokenizer = ptr;
+    rb_gc_mark(tokenizer->source);
+}
+
+static void tokenizer_free(void *ptr)
 {
-    struct liquid_tokenizer *tokenizer = ptr;
+    tokenizer_t *tokenizer = ptr;
    xfree(tokenizer);
 }

-static VALUE rb_allocate(VALUE klass)
+static size_t tokenizer_memsize(const void *ptr)
+{
+    return ptr ? sizeof(tokenizer_t) : 0;
+}
+
+const rb_data_type_t tokenizer_data_type = {
+    "liquid_tokenizer",
+    {tokenizer_mark, tokenizer_free, tokenizer_memsize,},
+#ifdef RUBY_TYPED_FREE_IMMEDIATELY
+    NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
+#endif
+};
+
+static VALUE tokenizer_allocate(VALUE klass)
 {
    VALUE obj;
-    struct liquid_tokenizer *tokenizer;
+    tokenizer_t *tokenizer;

-    obj = Data_Make_Struct(klass, struct liquid_tokenizer, NULL, free_tokenizer, tokenizer);
+    obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer);
+    tokenizer->source = Qnil;
    return obj;
 }

-static VALUE rb_initialize(VALUE self, VALUE source)
+static VALUE tokenizer_initialize_method(VALUE self, VALUE source)
 {
-    struct liquid_tokenizer *tokenizer;
+    tokenizer_t *tokenizer;

    Check_Type(source, T_STRING);
-    Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
+    Tokenizer_Get_Struct(self, tokenizer);
+    source = rb_str_dup_frozen(source);
+    tokenizer->source = source;
    tokenizer->cursor = RSTRING_PTR(source);
    tokenizer->length = RSTRING_LEN(source);
    return Qnil;
 }

-void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token)
+void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
 {
    if (tokenizer->length <= 0) {
        memset(token, 0, sizeof(*token));
        return;
    }
-    token->type = TOKEN_STRING;

-    char *cursor = tokenizer->cursor;
-    char *last = tokenizer->cursor + tokenizer->length - 1;
+    const char *cursor = tokenizer->cursor;
+    const char *last = cursor + tokenizer->length - 1;
+
+    token->str = cursor;
+    token->type = TOKEN_STRING;

    while (cursor < last) {
        if (*cursor++ != '{')
@@ -51,7 +74,6 @@ void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *tok
            cursor -= 2;
            goto found;
        }
-        char *incomplete_end = cursor;
        token->type = TOKEN_INVALID;
        if (c == '%') {
            while (cursor < last) {
@@ -65,38 +87,40 @@ void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *tok
                token->type = TOKEN_TAG;
                goto found;
            }
-            cursor = incomplete_end;
+            // unterminated tag
+            cursor = tokenizer->cursor + 2;
            goto found;
        } else {
            while (cursor < last) {
                if (*cursor++ != '}')
                    continue;
                if (*cursor++ != '}') {
-                    incomplete_end = cursor - 1;
-                    continue;
+                    // variable incomplete end, used to end raw tags
+                    cursor--;
+                    goto found;
                }
                token->type = TOKEN_VARIABLE;
                goto found;
            }
-            cursor = incomplete_end;
+            // unterminated variable
+            cursor = tokenizer->cursor + 2;
            goto found;
        }
    }
    cursor = last + 1;
 found:
-    token->str = tokenizer->cursor;
    token->length = cursor - tokenizer->cursor;
    tokenizer->cursor += token->length;
    tokenizer->length -= token->length;
 }

-static VALUE rb_next(VALUE self)
+static VALUE tokenizer_next_method(VALUE self)
 {
-    struct liquid_tokenizer *tokenizer;
-    Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
+    tokenizer_t *tokenizer;
+    Tokenizer_Get_Struct(self, tokenizer);

-    struct token token;
-    liquid_tokenizer_next(tokenizer, &token);
+    token_t token;
+    tokenizer_next(tokenizer, &token);
    if (token.type == TOKEN_NONE)
        return Qnil;

@@ -106,8 +130,8 @@ static VALUE rb_next(VALUE self)
 void init_liquid_tokenizer()
 {
    cLiquidTokenizer = rb_define_class_under(mLiquid, "Tokenizer", rb_cObject);
-    rb_define_alloc_func(cLiquidTokenizer, rb_allocate);
-    rb_define_method(cLiquidTokenizer, "initialize", rb_initialize, 1);
-    rb_define_method(cLiquidTokenizer, "next", rb_next, 0);
+    rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
+    rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 1);
+    rb_define_method(cLiquidTokenizer, "next", tokenizer_next_method, 0);
    rb_define_alias(cLiquidTokenizer, "shift", "next");
 }
--- a/ext/liquid/tokenizer.h
+++ b/ext/liquid/tokenizer.h
@@ -1,8 +1,6 @@
 #ifndef LIQUID_TOKENIZER_H
 #define LIQUID_TOKENIZER_H

-extern VALUE cLiquidTokenizer;
-
 enum token_type {
    TOKEN_NONE,
    TOKEN_INVALID,
@@ -11,20 +9,23 @@ enum token_type {
    TOKEN_VARIABLE
 };

-struct token {
+typedef struct token {
    enum token_type type;
-    char *str;
-    int length;
-};
+    const char *str;
+    long length;
+} token_t;

-struct liquid_tokenizer {
-    char *cursor;
-    int length;
-};
+typedef struct tokenizer {
+    VALUE source;
+    const char *cursor;
+    long length;
+} tokenizer_t;
+
+extern VALUE cLiquidTokenizer;
+extern const rb_data_type_t tokenizer_data_type;
+#define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval)

 void init_liquid_tokenizer();
-void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token);
-
-#define LIQUID_TOKENIZER_GET_STRUCT(obj) ((struct liquid_tokenizer *)obj_get_data_ptr(obj, cLiquidTokenizer))
+void tokenizer_next(tokenizer_t *tokenizer, token_t *token);

 #endif
--- a/ext/liquid/utils.c
+++ b/ext/liquid/utils.c
@@ -1,21 +0,0 @@
-#include <ruby.h>
-
-void raise_type_error(VALUE expected, VALUE got)
-{
-    rb_raise(rb_eTypeError, "wrong argument type %s (expected %s)",
-                             rb_class2name(got), rb_class2name(expected));
-}
-
-void check_class(VALUE obj, int type, VALUE klass)
-{
-    Check_Type(obj, type);
-    VALUE obj_klass = RBASIC_CLASS(obj);
-    if (obj_klass != klass)
-        raise_type_error(klass, obj_klass);
-}
-
-void *obj_get_data_ptr(VALUE obj, VALUE klass)
-{
-    check_class(obj, T_DATA, klass);
-    return DATA_PTR(obj);
-}
--- a/ext/liquid/utils.h
+++ b/ext/liquid/utils.h
@@ -1,8 +0,0 @@
-#ifndef LIQUID_UTILS_H
-#define LIQUID_UTILS_H
-
-void raise_type_error(VALUE expected, VALUE got);
-void check_class(VALUE klass);
-void *obj_get_data_ptr(VALUE obj, VALUE klass);
-
-#endif
--- a/lib/liquid.rb
+++ b/lib/liquid.rb
@@ -36,7 +36,6 @@ module Liquid
  VariableParser              = /\[[^\]]+\]|#{VariableSegment}+\??/o
 end

-require 'liquid/liquid'
 require "liquid/version"
 require 'liquid/lexer'
 require 'liquid/parser'
@@ -61,3 +60,9 @@ require 'liquid/utils'
 # Load all the tags of the standard library
 #
 Dir[File.dirname(__FILE__) + '/liquid/tags/*.rb'].each { |f| require f }
+
+if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
+  require 'liquid/liquid'
+else
+  require 'liquid/tokenizer'
+end
--- a/lib/liquid/block.rb
+++ b/lib/liquid/block.rb
@@ -1,26 +1,82 @@
 module Liquid
  class Block < Tag
-    def initialize(tag_name, markup, tokens)
-      super
-      parse_body(tokens)
-    end
+    IsTag             = /\A#{TagStart}/o
+    IsVariable        = /\A#{VariableStart}/o
+    FullToken         = /\A#{TagStart}\s*(\w+)\s*(.*)?#{TagEnd}\z/om
+    ContentOfVariable = /\A#{VariableStart}(.*)#{VariableEnd}\z/om

    def blank?
      @blank || false
    end

+    def parse(tokens)
+      @blank = true
+      @nodelist ||= []
+      @nodelist.clear
+
+      # All child tags of the current block.
+      @children = []
+
+      while token = tokens.shift
+        case token
+        when IsTag
+          if token =~ FullToken
+
+            # if we found the proper block delimiter just end parsing here and let the outer block
+            # proceed
+            if block_delimiter == $1
+              end_tag
+              return
+            end
+
+            # fetch the tag from registered blocks
+            if tag = Template.tags[$1]
+              new_tag = tag.parse($1, $2, tokens, @options)
+              @blank &&= new_tag.blank?
+              @nodelist << new_tag
+              @children << new_tag
+            else
+              # this tag is not registered with the system
+              # pass it to the current block for special handling or error reporting
+              unknown_tag($1, $2, tokens)
+            end
+          else
+            raise SyntaxError.new(options[:locale].t("errors.syntax.tag_termination".freeze, :token => token, :tag_end => TagEnd.inspect))
+          end
+        when IsVariable
+          new_var = create_variable(token)
+          @nodelist << new_var
+          @children << new_var
+          @blank = false
+        when ''.freeze
+          # pass
+        else
+          @nodelist << token
+          @blank &&= (token =~ /\A\s*\z/)
+        end
+      end
+
+      # Make sure that it's ok to end parsing in the current block.
+      # Effectively this method will throw an exception unless the current block is
+      # of type Document
+      assert_missing_delimitation!
+    end
+
    # warnings of this block and all sub-tags
    def warnings
      all_warnings = []
      all_warnings.concat(@warnings) if @warnings

-      (nodelist || []).each do |node|
-        all_warnings.concat(node.warnings || []) if node.respond_to?(:warnings)
+      (@children || []).each do |node|
+        all_warnings.concat(node.warnings || [])
      end

      all_warnings
    end

+    def end_tag
+    end
+
    def unknown_tag(tag, params, tokens)
      case tag
      when 'else'.freeze
@@ -56,14 +112,6 @@ module Liquid

    protected

-    def unterminated_variable(token)
-      raise SyntaxError.new(options[:locale].t("errors.syntax.variable_termination".freeze, :token => token, :tag_end => VariableEnd.inspect))
-    end
-
-    def unterminated_tag(token)
-      raise SyntaxError.new(options[:locale].t("errors.syntax.tag_termination".freeze, :token => token, :tag_end => TagEnd.inspect))
-    end
-
    def assert_missing_delimitation!
      raise SyntaxError.new(options[:locale].t("errors.syntax.tag_never_closed".freeze, :block_name => block_name))
    end
--- a/lib/liquid/document.rb
+++ b/lib/liquid/document.rb
@@ -7,7 +7,7 @@ module Liquid

    # There isn't a real delimiter
    def block_delimiter
-      nil
+      []
    end

    # Document blocks don't need to be terminated since they are not actually opened
--- a/lib/liquid/tag.rb
+++ b/lib/liquid/tag.rb
@@ -19,6 +19,9 @@ module Liquid
      @options    = options
    end

+    def parse(tokens)
+    end
+
    def name
      self.class.name.downcase
    end
--- a/lib/liquid/tags/ifchanged.rb
+++ b/lib/liquid/tags/ifchanged.rb
@@ -4,7 +4,7 @@ module Liquid
    def render(context)
      context.stack do

-        output = super
+        output = render_all(@nodelist, context)

        if output != context.registers[:ifchanged]
          context.registers[:ifchanged] = output
--- a/lib/liquid/tags/include.rb
+++ b/lib/liquid/tags/include.rb
@@ -35,6 +35,9 @@ module Liquid
      end
    end

+    def parse(tokens)
+    end
+
    def blank?
      false
    end
--- a/lib/liquid/tags/raw.rb
+++ b/lib/liquid/tags/raw.rb
@@ -2,13 +2,16 @@ module Liquid
  class Raw < Block
    FullTokenPossiblyInvalid = /\A(.*)#{TagStart}\s*(\w+)\s*(.*)?#{TagEnd}\z/om

-    def parse_body(tokens)
+    def parse(tokens)
      @nodelist ||= []
      @nodelist.clear
      while token = tokens.shift
        if token =~ FullTokenPossiblyInvalid
          @nodelist << $1 if $1 != "".freeze
-          return if block_delimiter == $2
+          if block_delimiter == $2
+            end_tag
+            return
+          end
        end
        @nodelist << token if not token.empty?
      end
--- a/lib/liquid/tags/table_row.rb
+++ b/lib/liquid/tags/table_row.rb
@@ -54,7 +54,7 @@ module Liquid

          col += 1

-          result << "<td class=\"col#{col}\">" << super << '</td>'
+          result << "<td class=\"col#{col}\">" << render_all(@nodelist, context) << '</td>'

          if col == cols and (index != length - 1)
            col  = 0
--- a/lib/liquid/tokenizer.rb
+++ b/lib/liquid/tokenizer.rb
@@ -0,0 +1,20 @@
+module Liquid
+  class Tokenizer
+    VariableIncompleteEnd = /\}\}?/
+    AnyStartingTag        = /\{\{|\{\%/
+    PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om
+    TemplateParser        = /(#{PartialTemplateParser}|#{AnyStartingTag})/om
+
+    def initialize(source)
+      @tokens = source.split(TemplateParser)
+
+      # removes the rogue empty element at the beginning of the array
+      @tokens.shift if @tokens[0] && @tokens[0].empty?
+    end
+
+    def next
+      @tokens.shift
+    end
+    alias_method :shift, :next
+  end
+end
--- a/liquid.gemspec
+++ b/liquid.gemspec
@@ -19,7 +19,6 @@ Gem::Specification.new do |s|

  s.test_files  = Dir.glob("{test}/**/*")
  s.files       = Dir.glob("{lib,ext}/**/*") + %w(MIT-LICENSE README.md)
-  s.extensions  = ['ext/liquid/extconf.rb']

  s.extra_rdoc_files  = ["History.md", "README.md"]

@@ -27,7 +26,8 @@ Gem::Specification.new do |s|

  s.add_development_dependency 'rake'
  s.add_development_dependency 'activesupport'
-  if RUBY_ENGINE == 'ruby'
+  if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
+    s.extensions  = ['ext/liquid/extconf.rb']
    s.add_development_dependency 'rake-compiler'
    s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0")
  end
--- a/test/liquid/tokenizer_test.rb
+++ b/test/liquid/tokenizer_test.rb
@@ -21,23 +21,8 @@ class TokenizerTest < Test::Unit::TestCase
    assert_equal ['  ', '{% comment %}', ' ', '{% endcomment %}', ' '], tokenize("  {% comment %} {% endcomment %} ")
  end

-  def test_tokenize_incomplete_end
-    assert_tokens 'before{{ incomplete }after', ['before', '{{ incomplete }', 'after']
-    assert_tokens 'before{% incomplete %after', ['before', '{%', ' incomplete %after']
-  end
-
-  def test_tokenize_no_end
-    assert_tokens 'before{{ unterminated ', ['before', '{{', ' unterminated ']
-    assert_tokens 'before{% unterminated ', ['before', '{%', ' unterminated ']
-  end
-
  private

-  def assert_tokens(source, expected)
-    assert_equal expected, tokenize(source)
-    assert_equal expected, old_tokenize(source)
-  end
-
  def tokenize(source)
    tokenizer = Liquid::Tokenizer.new(source)
    tokens = []
@@ -46,19 +31,4 @@ class TokenizerTest < Test::Unit::TestCase
    end
    tokens
  end
-
-  AnyStartingTag        = /\{\{|\{\%/
-  VariableIncompleteEnd = /\}\}?/
-  PartialTemplateParser = /#{Liquid::TagStart}.*?#{Liquid::TagEnd}|#{Liquid::VariableStart}.*?#{VariableIncompleteEnd}/o
-  TemplateParser        = /(#{PartialTemplateParser}|#{AnyStartingTag})/o
-
-  def old_tokenize(source)
-    return [] if source.to_s.empty?
-    tokens = source.split(TemplateParser)
-
-    # removes the rogue empty element at the beginning of the array
-    tokens.shift if tokens[0] and tokens[0].empty?
-
-    tokens
-  end
 end