Compare commits

...

1 Commits

Author SHA1 Message Date
Dylan Thacker-Smith
d3e4e4c419 Implement tokenization in a C extension. 2014-03-26 03:20:34 -04:00
13 changed files with 271 additions and 34 deletions

4
.gitignore vendored
View File

@@ -6,3 +6,7 @@ pkg
.rvmrc .rvmrc
.ruby-version .ruby-version
Gemfile.lock Gemfile.lock
/ext/liquid/Makefile
*.o
*.bundle
/tmp

View File

@@ -75,3 +75,11 @@ desc "Run example"
task :example do task :example do
ruby "-w -d -Ilib example/server/server.rb" ruby "-w -d -Ilib example/server/server.rb"
end end
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
require 'rake/extensiontask'
Rake::ExtensionTask.new "liquid" do |ext|
ext.lib_dir = "lib/liquid"
end
Rake::Task[:test].prerequisites << :compile
end

4
ext/liquid/extconf.rb Normal file
View File

@@ -0,0 +1,4 @@
require 'mkmf'
$CFLAGS << ' -Wall -Werror'
$warnflags.gsub!(/-Wdeclaration-after-statement/, "")
create_makefile("liquid/liquid")

9
ext/liquid/liquid.c Normal file
View File

@@ -0,0 +1,9 @@
#include "liquid.h"
VALUE mLiquid;
void Init_liquid(void)
{
mLiquid = rb_define_module("Liquid");
init_liquid_tokenizer();
}

11
ext/liquid/liquid.h Normal file
View File

@@ -0,0 +1,11 @@
#ifndef LIQUID_H
#define LIQUID_H
#include <ruby.h>
#include <stdbool.h>
#include "tokenizer.h"
extern VALUE mLiquid;
#endif

137
ext/liquid/tokenizer.c Normal file
View File

@@ -0,0 +1,137 @@
#include "liquid.h"
VALUE cLiquidTokenizer;
static void tokenizer_mark(void *ptr) {
tokenizer_t *tokenizer = ptr;
rb_gc_mark(tokenizer->source);
}
static void tokenizer_free(void *ptr)
{
tokenizer_t *tokenizer = ptr;
xfree(tokenizer);
}
static size_t tokenizer_memsize(const void *ptr)
{
return ptr ? sizeof(tokenizer_t) : 0;
}
const rb_data_type_t tokenizer_data_type = {
"liquid_tokenizer",
{tokenizer_mark, tokenizer_free, tokenizer_memsize,},
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
#endif
};
static VALUE tokenizer_allocate(VALUE klass)
{
VALUE obj;
tokenizer_t *tokenizer;
obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer);
tokenizer->source = Qnil;
return obj;
}
static VALUE tokenizer_initialize_method(VALUE self, VALUE source)
{
tokenizer_t *tokenizer;
Check_Type(source, T_STRING);
Tokenizer_Get_Struct(self, tokenizer);
source = rb_str_dup_frozen(source);
tokenizer->source = source;
tokenizer->cursor = RSTRING_PTR(source);
tokenizer->length = RSTRING_LEN(source);
return Qnil;
}
void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
{
if (tokenizer->length <= 0) {
memset(token, 0, sizeof(*token));
return;
}
const char *cursor = tokenizer->cursor;
const char *last = cursor + tokenizer->length - 1;
token->str = cursor;
token->type = TOKEN_STRING;
while (cursor < last) {
if (*cursor++ != '{')
continue;
char c = *cursor++;
if (c != '%' && c != '{')
continue;
if (cursor - tokenizer->cursor > 2) {
token->type = TOKEN_STRING;
cursor -= 2;
goto found;
}
token->type = TOKEN_INVALID;
if (c == '%') {
while (cursor < last) {
if (*cursor++ != '%')
continue;
c = *cursor++;
while (c == '%' && cursor <= last)
c = *cursor++;
if (c != '}')
continue;
token->type = TOKEN_TAG;
goto found;
}
// unterminated tag
cursor = tokenizer->cursor + 2;
goto found;
} else {
while (cursor < last) {
if (*cursor++ != '}')
continue;
if (*cursor++ != '}') {
// variable incomplete end, used to end raw tags
cursor--;
goto found;
}
token->type = TOKEN_VARIABLE;
goto found;
}
// unterminated variable
cursor = tokenizer->cursor + 2;
goto found;
}
}
cursor = last + 1;
found:
token->length = cursor - tokenizer->cursor;
tokenizer->cursor += token->length;
tokenizer->length -= token->length;
}
static VALUE tokenizer_next_method(VALUE self)
{
tokenizer_t *tokenizer;
Tokenizer_Get_Struct(self, tokenizer);
token_t token;
tokenizer_next(tokenizer, &token);
if (token.type == TOKEN_NONE)
return Qnil;
return rb_str_new(token.str, token.length);
}
void init_liquid_tokenizer()
{
cLiquidTokenizer = rb_define_class_under(mLiquid, "Tokenizer", rb_cObject);
rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 1);
rb_define_method(cLiquidTokenizer, "next", tokenizer_next_method, 0);
rb_define_alias(cLiquidTokenizer, "shift", "next");
}

31
ext/liquid/tokenizer.h Normal file
View File

@@ -0,0 +1,31 @@
#ifndef LIQUID_TOKENIZER_H
#define LIQUID_TOKENIZER_H
enum token_type {
TOKEN_NONE,
TOKEN_INVALID,
TOKEN_STRING,
TOKEN_TAG,
TOKEN_VARIABLE
};
typedef struct token {
enum token_type type;
const char *str;
long length;
} token_t;
typedef struct tokenizer {
VALUE source;
const char *cursor;
long length;
} tokenizer_t;
extern VALUE cLiquidTokenizer;
extern const rb_data_type_t tokenizer_data_type;
#define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval)
void init_liquid_tokenizer();
void tokenizer_next(tokenizer_t *tokenizer, token_t *token);
#endif

View File

@@ -30,13 +30,9 @@ module Liquid
VariableSegment = /[\w\-]/ VariableSegment = /[\w\-]/
VariableStart = /\{\{/ VariableStart = /\{\{/
VariableEnd = /\}\}/ VariableEnd = /\}\}/
VariableIncompleteEnd = /\}\}?/
QuotedString = /"[^"]*"|'[^']*'/ QuotedString = /"[^"]*"|'[^']*'/
QuotedFragment = /#{QuotedString}|(?:[^\s,\|'"]|#{QuotedString})+/o QuotedFragment = /#{QuotedString}|(?:[^\s,\|'"]|#{QuotedString})+/o
TagAttributes = /(\w+)\s*\:\s*(#{QuotedFragment})/o TagAttributes = /(\w+)\s*\:\s*(#{QuotedFragment})/o
AnyStartingTag = /\{\{|\{\%/
PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om
TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/om
VariableParser = /\[[^\]]+\]|#{VariableSegment}+\??/o VariableParser = /\[[^\]]+\]|#{VariableSegment}+\??/o
end end
@@ -64,3 +60,9 @@ require 'liquid/utils'
# Load all the tags of the standard library # Load all the tags of the standard library
# #
Dir[File.dirname(__FILE__) + '/liquid/tags/*.rb'].each { |f| require f } Dir[File.dirname(__FILE__) + '/liquid/tags/*.rb'].each { |f| require f }
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
require 'liquid/liquid'
else
require 'liquid/tokenizer'
end

View File

@@ -162,16 +162,9 @@ module Liquid
private private
# Uses the <tt>Liquid::TemplateParser</tt> regexp to tokenize the passed source
def tokenize(source) def tokenize(source)
source = source.source if source.respond_to?(:source) source = source.source if source.respond_to?(:source)
return [] if source.to_s.empty? Tokenizer.new(source.to_s)
tokens = source.split(TemplateParser)
# removes the rogue empty element at the beginning of the array
tokens.shift if tokens[0] and tokens[0].empty?
tokens
end end
end end

20
lib/liquid/tokenizer.rb Normal file
View File

@@ -0,0 +1,20 @@
module Liquid
class Tokenizer
VariableIncompleteEnd = /\}\}?/
AnyStartingTag = /\{\{|\{\%/
PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om
TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/om
def initialize(source)
@tokens = source.split(TemplateParser)
# removes the rogue empty element at the beginning of the array
@tokens.shift if @tokens[0] && @tokens[0].empty?
end
def next
@tokens.shift
end
alias_method :shift, :next
end
end

View File

@@ -18,13 +18,17 @@ Gem::Specification.new do |s|
s.required_rubygems_version = ">= 1.3.7" s.required_rubygems_version = ">= 1.3.7"
s.test_files = Dir.glob("{test}/**/*") s.test_files = Dir.glob("{test}/**/*")
s.files = Dir.glob("{lib}/**/*") + %w(MIT-LICENSE README.md) s.files = Dir.glob("{lib,ext}/**/*") + %w(MIT-LICENSE README.md)
s.extra_rdoc_files = ["History.md", "README.md"] s.extra_rdoc_files = ["History.md", "README.md"]
s.require_path = "lib" s.require_path = "lib"
s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0")
s.add_development_dependency 'rake' s.add_development_dependency 'rake'
s.add_development_dependency 'activesupport' s.add_development_dependency 'activesupport'
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
s.extensions = ['ext/liquid/extconf.rb']
s.add_development_dependency 'rake-compiler'
s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0")
end
end end

View File

@@ -25,26 +25,6 @@ end
class TemplateTest < Test::Unit::TestCase class TemplateTest < Test::Unit::TestCase
include Liquid include Liquid
def test_tokenize_strings
assert_equal [' '], Template.new.send(:tokenize, ' ')
assert_equal ['hello world'], Template.new.send(:tokenize, 'hello world')
end
def test_tokenize_variables
assert_equal ['{{funk}}'], Template.new.send(:tokenize, '{{funk}}')
assert_equal [' ', '{{funk}}', ' '], Template.new.send(:tokenize, ' {{funk}} ')
assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], Template.new.send(:tokenize, ' {{funk}} {{so}} {{brother}} ')
assert_equal [' ', '{{ funk }}', ' '], Template.new.send(:tokenize, ' {{ funk }} ')
end
def test_tokenize_blocks
assert_equal ['{%comment%}'], Template.new.send(:tokenize, '{%comment%}')
assert_equal [' ', '{%comment%}', ' '], Template.new.send(:tokenize, ' {%comment%} ')
assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], Template.new.send(:tokenize, ' {%comment%} {%endcomment%} ')
assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], Template.new.send(:tokenize, " {% comment %} {% endcomment %} ")
end
def test_instance_assigns_persist_on_same_template_object_between_parses def test_instance_assigns_persist_on_same_template_object_between_parses
t = Template.new t = Template.new
assert_equal 'from instance assigns', t.parse("{% assign foo = 'from instance assigns' %}{{ foo }}").render! assert_equal 'from instance assigns', t.parse("{% assign foo = 'from instance assigns' %}{{ foo }}").render!

View File

@@ -0,0 +1,34 @@
require 'test_helper'
class TokenizerTest < Test::Unit::TestCase
def test_tokenize_strings
assert_equal [' '], tokenize(' ')
assert_equal ['hello world'], tokenize('hello world')
end
def test_tokenize_variables
assert_equal ['{{funk}}'], tokenize('{{funk}}')
assert_equal [' ', '{{funk}}', ' '], tokenize(' {{funk}} ')
assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], tokenize(' {{funk}} {{so}} {{brother}} ')
assert_equal [' ', '{{ funk }}', ' '], tokenize(' {{ funk }} ')
end
def test_tokenize_blocks
assert_equal ['{%comment%}'], tokenize('{%comment%}')
assert_equal [' ', '{%comment%}', ' '], tokenize(' {%comment%} ')
assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], tokenize(' {%comment%} {%endcomment%} ')
assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], tokenize(" {% comment %} {% endcomment %} ")
end
private
def tokenize(source)
tokenizer = Liquid::Tokenizer.new(source)
tokens = []
while token = tokenizer.next
tokens << token
end
tokens
end
end