diff --git a/.gitignore b/.gitignore index 0e27775..2bb59b2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,7 @@ pkg .rvmrc .ruby-version Gemfile.lock +/ext/liquid/Makefile +*.o +*.bundle +/tmp diff --git a/Rakefile b/Rakefile index cd69268..cac9d82 100755 --- a/Rakefile +++ b/Rakefile @@ -75,3 +75,11 @@ desc "Run example" task :example do ruby "-w -d -Ilib example/server/server.rb" end + +if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby' + require 'rake/extensiontask' + Rake::ExtensionTask.new "liquid" do |ext| + ext.lib_dir = "lib/liquid" + end + Rake::Task[:test].prerequisites << :compile +end diff --git a/ext/liquid/extconf.rb b/ext/liquid/extconf.rb new file mode 100644 index 0000000..98deaaa --- /dev/null +++ b/ext/liquid/extconf.rb @@ -0,0 +1,4 @@ +require 'mkmf' +$CFLAGS << ' -Wall -Werror' +$warnflags.gsub!(/-Wdeclaration-after-statement/, "") +create_makefile("liquid/liquid") diff --git a/ext/liquid/liquid.c b/ext/liquid/liquid.c new file mode 100644 index 0000000..a6d1eb4 --- /dev/null +++ b/ext/liquid/liquid.c @@ -0,0 +1,9 @@ +#include "liquid.h" + +VALUE mLiquid; + +void Init_liquid(void) +{ + mLiquid = rb_define_module("Liquid"); + init_liquid_tokenizer(); +} diff --git a/ext/liquid/liquid.h b/ext/liquid/liquid.h new file mode 100644 index 0000000..07dde6a --- /dev/null +++ b/ext/liquid/liquid.h @@ -0,0 +1,11 @@ +#ifndef LIQUID_H +#define LIQUID_H + +#include +#include + +#include "tokenizer.h" + +extern VALUE mLiquid; + +#endif diff --git a/ext/liquid/tokenizer.c b/ext/liquid/tokenizer.c new file mode 100644 index 0000000..29266ef --- /dev/null +++ b/ext/liquid/tokenizer.c @@ -0,0 +1,137 @@ +#include "liquid.h" + +VALUE cLiquidTokenizer; + +static void tokenizer_mark(void *ptr) { + tokenizer_t *tokenizer = ptr; + rb_gc_mark(tokenizer->source); +} + +static void tokenizer_free(void *ptr) +{ + tokenizer_t *tokenizer = ptr; + xfree(tokenizer); +} + +static size_t tokenizer_memsize(const void *ptr) +{ + return ptr ? sizeof(tokenizer_t) : 0; +} + +const rb_data_type_t tokenizer_data_type = { + "liquid_tokenizer", + {tokenizer_mark, tokenizer_free, tokenizer_memsize,}, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY +#endif +}; + +static VALUE tokenizer_allocate(VALUE klass) +{ + VALUE obj; + tokenizer_t *tokenizer; + + obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer); + tokenizer->source = Qnil; + return obj; +} + +static VALUE tokenizer_initialize_method(VALUE self, VALUE source) +{ + tokenizer_t *tokenizer; + + Check_Type(source, T_STRING); + Tokenizer_Get_Struct(self, tokenizer); + source = rb_str_dup_frozen(source); + tokenizer->source = source; + tokenizer->cursor = RSTRING_PTR(source); + tokenizer->length = RSTRING_LEN(source); + return Qnil; +} + +void tokenizer_next(tokenizer_t *tokenizer, token_t *token) +{ + if (tokenizer->length <= 0) { + memset(token, 0, sizeof(*token)); + return; + } + + const char *cursor = tokenizer->cursor; + const char *last = cursor + tokenizer->length - 1; + + token->str = cursor; + token->type = TOKEN_STRING; + + while (cursor < last) { + if (*cursor++ != '{') + continue; + + char c = *cursor++; + if (c != '%' && c != '{') + continue; + if (cursor - tokenizer->cursor > 2) { + token->type = TOKEN_STRING; + cursor -= 2; + goto found; + } + token->type = TOKEN_INVALID; + if (c == '%') { + while (cursor < last) { + if (*cursor++ != '%') + continue; + c = *cursor++; + while (c == '%' && cursor <= last) + c = *cursor++; + if (c != '}') + continue; + token->type = TOKEN_TAG; + goto found; + } + // unterminated tag + cursor = tokenizer->cursor + 2; + goto found; + } else { + while (cursor < last) { + if (*cursor++ != '}') + continue; + if (*cursor++ != '}') { + // variable incomplete end, used to end raw tags + cursor--; + goto found; + } + token->type = TOKEN_VARIABLE; + goto found; + } + // unterminated variable + cursor = tokenizer->cursor + 2; + goto found; + } + } + cursor = last + 1; +found: + token->length = cursor - tokenizer->cursor; + tokenizer->cursor += token->length; + tokenizer->length -= token->length; +} + +static VALUE tokenizer_next_method(VALUE self) +{ + tokenizer_t *tokenizer; + Tokenizer_Get_Struct(self, tokenizer); + + token_t token; + tokenizer_next(tokenizer, &token); + if (token.type == TOKEN_NONE) + return Qnil; + + return rb_str_new(token.str, token.length); +} + +void init_liquid_tokenizer() +{ + cLiquidTokenizer = rb_define_class_under(mLiquid, "Tokenizer", rb_cObject); + rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate); + rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 1); + rb_define_method(cLiquidTokenizer, "next", tokenizer_next_method, 0); + rb_define_alias(cLiquidTokenizer, "shift", "next"); +} diff --git a/ext/liquid/tokenizer.h b/ext/liquid/tokenizer.h new file mode 100644 index 0000000..e3a28a4 --- /dev/null +++ b/ext/liquid/tokenizer.h @@ -0,0 +1,31 @@ +#ifndef LIQUID_TOKENIZER_H +#define LIQUID_TOKENIZER_H + +enum token_type { + TOKEN_NONE, + TOKEN_INVALID, + TOKEN_STRING, + TOKEN_TAG, + TOKEN_VARIABLE +}; + +typedef struct token { + enum token_type type; + const char *str; + long length; +} token_t; + +typedef struct tokenizer { + VALUE source; + const char *cursor; + long length; +} tokenizer_t; + +extern VALUE cLiquidTokenizer; +extern const rb_data_type_t tokenizer_data_type; +#define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval) + +void init_liquid_tokenizer(); +void tokenizer_next(tokenizer_t *tokenizer, token_t *token); + +#endif diff --git a/lib/liquid.rb b/lib/liquid.rb index 484f8b6..053a56a 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -30,13 +30,9 @@ module Liquid VariableSegment = /[\w\-]/ VariableStart = /\{\{/ VariableEnd = /\}\}/ - VariableIncompleteEnd = /\}\}?/ QuotedString = /"[^"]*"|'[^']*'/ QuotedFragment = /#{QuotedString}|(?:[^\s,\|'"]|#{QuotedString})+/o TagAttributes = /(\w+)\s*\:\s*(#{QuotedFragment})/o - AnyStartingTag = /\{\{|\{\%/ - PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om - TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/om VariableParser = /\[[^\]]+\]|#{VariableSegment}+\??/o end @@ -64,3 +60,9 @@ require 'liquid/utils' # Load all the tags of the standard library # Dir[File.dirname(__FILE__) + '/liquid/tags/*.rb'].each { |f| require f } + +if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby' + require 'liquid/liquid' +else + require 'liquid/tokenizer' +end diff --git a/lib/liquid/template.rb b/lib/liquid/template.rb index 13748d4..fc97b4a 100644 --- a/lib/liquid/template.rb +++ b/lib/liquid/template.rb @@ -162,16 +162,9 @@ module Liquid private - # Uses the Liquid::TemplateParser regexp to tokenize the passed source def tokenize(source) source = source.source if source.respond_to?(:source) - return [] if source.to_s.empty? - tokens = source.split(TemplateParser) - - # removes the rogue empty element at the beginning of the array - tokens.shift if tokens[0] and tokens[0].empty? - - tokens + Tokenizer.new(source.to_s) end end diff --git a/lib/liquid/tokenizer.rb b/lib/liquid/tokenizer.rb new file mode 100644 index 0000000..2af4c37 --- /dev/null +++ b/lib/liquid/tokenizer.rb @@ -0,0 +1,20 @@ +module Liquid + class Tokenizer + VariableIncompleteEnd = /\}\}?/ + AnyStartingTag = /\{\{|\{\%/ + PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om + TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/om + + def initialize(source) + @tokens = source.split(TemplateParser) + + # removes the rogue empty element at the beginning of the array + @tokens.shift if @tokens[0] && @tokens[0].empty? + end + + def next + @tokens.shift + end + alias_method :shift, :next + end +end diff --git a/liquid.gemspec b/liquid.gemspec index 297a716..fa6f405 100644 --- a/liquid.gemspec +++ b/liquid.gemspec @@ -18,13 +18,17 @@ Gem::Specification.new do |s| s.required_rubygems_version = ">= 1.3.7" s.test_files = Dir.glob("{test}/**/*") - s.files = Dir.glob("{lib}/**/*") + %w(MIT-LICENSE README.md) + s.files = Dir.glob("{lib,ext}/**/*") + %w(MIT-LICENSE README.md) s.extra_rdoc_files = ["History.md", "README.md"] s.require_path = "lib" - s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0") s.add_development_dependency 'rake' s.add_development_dependency 'activesupport' + if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby' + s.extensions = ['ext/liquid/extconf.rb'] + s.add_development_dependency 'rake-compiler' + s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0") + end end diff --git a/test/liquid/template_test.rb b/test/liquid/template_test.rb index f58f8b8..1baea9f 100644 --- a/test/liquid/template_test.rb +++ b/test/liquid/template_test.rb @@ -25,26 +25,6 @@ end class TemplateTest < Test::Unit::TestCase include Liquid - def test_tokenize_strings - assert_equal [' '], Template.new.send(:tokenize, ' ') - assert_equal ['hello world'], Template.new.send(:tokenize, 'hello world') - end - - def test_tokenize_variables - assert_equal ['{{funk}}'], Template.new.send(:tokenize, '{{funk}}') - assert_equal [' ', '{{funk}}', ' '], Template.new.send(:tokenize, ' {{funk}} ') - assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], Template.new.send(:tokenize, ' {{funk}} {{so}} {{brother}} ') - assert_equal [' ', '{{ funk }}', ' '], Template.new.send(:tokenize, ' {{ funk }} ') - end - - def test_tokenize_blocks - assert_equal ['{%comment%}'], Template.new.send(:tokenize, '{%comment%}') - assert_equal [' ', '{%comment%}', ' '], Template.new.send(:tokenize, ' {%comment%} ') - - assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], Template.new.send(:tokenize, ' {%comment%} {%endcomment%} ') - assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], Template.new.send(:tokenize, " {% comment %} {% endcomment %} ") - end - def test_instance_assigns_persist_on_same_template_object_between_parses t = Template.new assert_equal 'from instance assigns', t.parse("{% assign foo = 'from instance assigns' %}{{ foo }}").render! diff --git a/test/liquid/tokenizer_test.rb b/test/liquid/tokenizer_test.rb new file mode 100644 index 0000000..3ed379f --- /dev/null +++ b/test/liquid/tokenizer_test.rb @@ -0,0 +1,34 @@ +require 'test_helper' + +class TokenizerTest < Test::Unit::TestCase + def test_tokenize_strings + assert_equal [' '], tokenize(' ') + assert_equal ['hello world'], tokenize('hello world') + end + + def test_tokenize_variables + assert_equal ['{{funk}}'], tokenize('{{funk}}') + assert_equal [' ', '{{funk}}', ' '], tokenize(' {{funk}} ') + assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], tokenize(' {{funk}} {{so}} {{brother}} ') + assert_equal [' ', '{{ funk }}', ' '], tokenize(' {{ funk }} ') + end + + def test_tokenize_blocks + assert_equal ['{%comment%}'], tokenize('{%comment%}') + assert_equal [' ', '{%comment%}', ' '], tokenize(' {%comment%} ') + + assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], tokenize(' {%comment%} {%endcomment%} ') + assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], tokenize(" {% comment %} {% endcomment %} ") + end + + private + + def tokenize(source) + tokenizer = Liquid::Tokenizer.new(source) + tokens = [] + while token = tokenizer.next + tokens << token + end + tokens + end +end