Compare commits

...

9 Commits

Author SHA1 Message Date
Dylan Thacker-Smith
8a93a7ff55 Add convenience methods for getting a struct from a ruby object.
If we are trying to get the struct from something other than self, then we
should make sure to check the class of the object.  This util functions
make this easier.
2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith
e2974ed95f Implement Block#parse_body in C. 2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith
99f950c167 Rename Block#parse to parse_body since that is how it is being used. 2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith
dc78e565ab Move the parse method out of Tag, only blocks need the body parsed.
The parse method should be renamed to something like parse_body,
since that is how it is used, and no non-block tags were using the
parse method.
2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith
0fac50aea7 Use super rather than render_all in single block render classes. 2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith
8e45b44b21 Avoid keeping track of two lists of nodes during parsing. 2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith
c0832ce0d1 Return nil in Document#block_delimiter rather than an empty array.
The block delimiter is normally a string, so nil makes more sense when
there is no delimiter. We also don't want to allocate an array for no
reason.
2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith
802a6671cb Remove unused Block#end_tag method.
Although the method is called, it is defined with an empty body and not
overridden to do anything else.
2014-03-25 16:16:38 -04:00
Dylan Thacker-Smith
87472e73b6 Implement tokenization in a C extension. 2014-03-25 16:15:02 -04:00
23 changed files with 480 additions and 110 deletions

2
.gitignore vendored
View File

@@ -6,3 +6,5 @@ pkg
.rvmrc
.ruby-version
Gemfile.lock
*.bundle
/tmp

View File

@@ -1,5 +1,6 @@
require 'rake'
require 'rake/testtask'
require 'rake/extensiontask'
$LOAD_PATH.unshift File.expand_path("../lib", __FILE__)
require "liquid/version"
@@ -75,3 +76,8 @@ desc "Run example"
task :example do
ruby "-w -d -Ilib example/server/server.rb"
end
Rake::ExtensionTask.new "liquid" do |ext|
ext.lib_dir = "lib/liquid"
end
Rake::Task[:test].prerequisites << :compile

168
ext/liquid/block.c Normal file
View File

@@ -0,0 +1,168 @@
#include "liquid_ext.h"
VALUE cLiquidBlock;
ID intern_assert_missing_delimitation, intern_block_delimiter, intern_is_blank, intern_new,
intern_new_with_options, intern_tags, intern_unknown_tag, intern_unterminated_tag,
intern_unterminated_variable;
struct liquid_tag
{
char *name, *markup;
long name_length, markup_length;
};
static bool parse_tag(struct liquid_tag *tag, char *token, long token_length)
{
// Strip {{ and }} braces
token += 2;
token_length -= 4;
char *end = token + token_length;
while (token < end && isspace(*token))
token++;
tag->name = token;
char c = *token;
while (token < end && (isalnum(c) || c == '_'))
c = *(++token);
tag->name_length = token - tag->name;
if (!tag->name_length) {
memset(tag, 0, sizeof(*tag));
return false;
}
while (token < end && isspace(*token))
token++;
tag->markup = token;
char *last = end - 1;
while (token < last && isspace(*last))
last--;
end = last + 1;
tag->markup_length = end - token;
return true;
}
static VALUE rb_parse_body(VALUE self, VALUE tokenizerObj)
{
struct liquid_tokenizer *tokenizer = LIQUID_TOKENIZER_GET_STRUCT(tokenizerObj);
bool blank = true;
VALUE nodelist = rb_iv_get(self, "@nodelist");
if (nodelist == Qnil) {
nodelist = rb_ary_new();
rb_iv_set(self, "@nodelist", nodelist);
} else {
rb_ary_clear(nodelist);
}
struct token token;
while (true) {
liquid_tokenizer_next(tokenizer, &token);
switch (token.type) {
case TOKEN_NONE:
/*
* Make sure that it's ok to end parsing in the current block.
* Effectively this method will throw an exception unless the current block is
* of type Document
*/
rb_funcall(self, intern_assert_missing_delimitation, 0);
goto done;
case TOKEN_INVALID:
{
VALUE token_obj = rb_str_new(token.str, token.length);
if (token.str[1] == '%')
rb_funcall(self, intern_unterminated_tag, 1, token_obj);
else
rb_funcall(self, intern_unterminated_variable, 1, token_obj);
break;
}
case TOKEN_TAG:
{
struct liquid_tag tag;
if (!parse_tag(&tag, token.str, token.length)) {
// FIXME: provide more appropriate error message
rb_funcall(self, intern_unterminated_tag, 1, rb_str_new(token.str, token.length));
} else {
if (tag.name_length >= 3 && !memcmp(tag.name, "end", 3)) {
VALUE block_delimiter = rb_funcall(self, intern_block_delimiter, 0);
if (TYPE(block_delimiter) == T_STRING &&
tag.name_length == RSTRING_LEN(block_delimiter) &&
!memcmp(tag.name, RSTRING_PTR(block_delimiter), tag.name_length))
{
goto done;
}
}
VALUE tags = rb_funcall(cLiquidTemplate, intern_tags, 0);
Check_Type(tags, T_HASH);
VALUE tag_name = rb_str_new(tag.name, tag.name_length);
VALUE tag_class = rb_hash_lookup(tags, tag_name);
VALUE markup = rb_str_new(tag.markup, tag.markup_length);
if (tag_class != Qnil) {
VALUE options = rb_iv_get(self, "@options");
if (options == Qnil)
options = rb_hash_new();
VALUE new_tag = rb_funcall(tag_class, intern_new_with_options, 4,
tag_name, markup, tokenizerObj, options);
if (blank) {
VALUE blank_block = rb_funcall(new_tag, intern_is_blank, 0);
if (blank_block == Qnil || blank_block == Qfalse)
blank = false;
}
rb_ary_push(nodelist, new_tag);
} else {
rb_funcall(self, intern_unknown_tag, 3, tag_name, markup, tokenizerObj);
/*
* multi-block tags may store the nodelist in a block array on unknown_tag
* then replace @nodelist with a new array. We need to use the new array
* for the block following the tag token.
*/
nodelist = rb_iv_get(self, "@nodelist");
}
}
break;
}
case TOKEN_VARIABLE:
{
VALUE markup = rb_str_new(token.str + 2, token.length - 4);
VALUE options = rb_iv_get(self, "@options");
VALUE new_var = rb_funcall(cLiquidVariable, intern_new, 2, markup, options);
rb_ary_push(nodelist, new_var);
blank = false;
break;
}
case TOKEN_STRING:
rb_ary_push(nodelist, rb_str_new(token.str, token.length));
if (blank) {
int i;
for (i = 0; i < token.length; i++) {
if (!isspace(token.str[i])) {
blank = false;
break;
}
}
}
break;
}
}
done:
rb_iv_set(self, "@blank", blank ? Qtrue : Qfalse);
return Qnil;
}
void init_liquid_block()
{
intern_assert_missing_delimitation = rb_intern("assert_missing_delimitation!");
intern_block_delimiter = rb_intern("block_delimiter");
intern_is_blank = rb_intern("blank?");
intern_new = rb_intern("new");
intern_new_with_options = rb_intern("new_with_options");
intern_tags = rb_intern("tags");
intern_unknown_tag = rb_intern("unknown_tag");
intern_unterminated_tag = rb_intern("unterminated_tag");
intern_unterminated_variable = rb_intern("unterminated_variable");
cLiquidBlock = rb_define_class_under(mLiquid, "Block", cLiquidTag);
rb_define_method(cLiquidBlock, "parse_body", rb_parse_body, 1);
}

8
ext/liquid/block.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef LIQUID_BLOCK_H
#define LIQUID_BLOCK_H
void init_liquid_block();
extern VALUE cLiquidBlock;
#endif

3
ext/liquid/extconf.rb Normal file
View File

@@ -0,0 +1,3 @@
require 'mkmf'
$CFLAGS << ' -Wall'
create_makefile("liquid/liquid")

15
ext/liquid/liquid_ext.c Normal file
View File

@@ -0,0 +1,15 @@
#include "liquid_ext.h"
VALUE mLiquid;
VALUE cLiquidTemplate, cLiquidTag, cLiquidVariable;
void Init_liquid(void)
{
mLiquid = rb_define_module("Liquid");
cLiquidTemplate = rb_define_class_under(mLiquid, "Template", rb_cObject);
cLiquidTag = rb_define_class_under(mLiquid, "Tag", rb_cObject);
cLiquidVariable = rb_define_class_under(mLiquid, "Variable", rb_cObject);
init_liquid_tokenizer();
init_liquid_block();
}

15
ext/liquid/liquid_ext.h Normal file
View File

@@ -0,0 +1,15 @@
#ifndef LIQUID_EXT_H
#define LIQUID_EXT_H
#include <stdbool.h>
#include <ctype.h>
#include <ruby.h>
#include "tokenizer.h"
#include "block.h"
#include "utils.h"
extern VALUE mLiquid;
extern VALUE cLiquidTemplate, cLiquidTag, cLiquidVariable;
#endif

113
ext/liquid/tokenizer.c Normal file
View File

@@ -0,0 +1,113 @@
#include "liquid_ext.h"
VALUE cLiquidTokenizer;
static void free_tokenizer(void *ptr)
{
struct liquid_tokenizer *tokenizer = ptr;
xfree(tokenizer);
}
static VALUE rb_allocate(VALUE klass)
{
VALUE obj;
struct liquid_tokenizer *tokenizer;
obj = Data_Make_Struct(klass, struct liquid_tokenizer, NULL, free_tokenizer, tokenizer);
return obj;
}
static VALUE rb_initialize(VALUE self, VALUE source)
{
struct liquid_tokenizer *tokenizer;
Check_Type(source, T_STRING);
Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
tokenizer->cursor = RSTRING_PTR(source);
tokenizer->length = RSTRING_LEN(source);
return Qnil;
}
void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token)
{
if (tokenizer->length <= 0) {
memset(token, 0, sizeof(*token));
return;
}
token->type = TOKEN_STRING;
char *cursor = tokenizer->cursor;
char *last = tokenizer->cursor + tokenizer->length - 1;
while (cursor < last) {
if (*cursor++ != '{')
continue;
char c = *cursor++;
if (c != '%' && c != '{')
continue;
if (cursor - tokenizer->cursor > 2) {
token->type = TOKEN_STRING;
cursor -= 2;
goto found;
}
char *incomplete_end = cursor;
token->type = TOKEN_INVALID;
if (c == '%') {
while (cursor < last) {
if (*cursor++ != '%')
continue;
c = *cursor++;
while (c == '%' && cursor <= last)
c = *cursor++;
if (c != '}')
continue;
token->type = TOKEN_TAG;
goto found;
}
cursor = incomplete_end;
goto found;
} else {
while (cursor < last) {
if (*cursor++ != '}')
continue;
if (*cursor++ != '}') {
incomplete_end = cursor - 1;
continue;
}
token->type = TOKEN_VARIABLE;
goto found;
}
cursor = incomplete_end;
goto found;
}
}
cursor = last + 1;
found:
token->str = tokenizer->cursor;
token->length = cursor - tokenizer->cursor;
tokenizer->cursor += token->length;
tokenizer->length -= token->length;
}
static VALUE rb_next(VALUE self)
{
struct liquid_tokenizer *tokenizer;
Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
struct token token;
liquid_tokenizer_next(tokenizer, &token);
if (token.type == TOKEN_NONE)
return Qnil;
return rb_str_new(token.str, token.length);
}
void init_liquid_tokenizer()
{
cLiquidTokenizer = rb_define_class_under(mLiquid, "Tokenizer", rb_cObject);
rb_define_alloc_func(cLiquidTokenizer, rb_allocate);
rb_define_method(cLiquidTokenizer, "initialize", rb_initialize, 1);
rb_define_method(cLiquidTokenizer, "next", rb_next, 0);
rb_define_alias(cLiquidTokenizer, "shift", "next");
}

30
ext/liquid/tokenizer.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef LIQUID_TOKENIZER_H
#define LIQUID_TOKENIZER_H
extern VALUE cLiquidTokenizer;
enum token_type {
TOKEN_NONE,
TOKEN_INVALID,
TOKEN_STRING,
TOKEN_TAG,
TOKEN_VARIABLE
};
struct token {
enum token_type type;
char *str;
int length;
};
struct liquid_tokenizer {
char *cursor;
int length;
};
void init_liquid_tokenizer();
void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token);
#define LIQUID_TOKENIZER_GET_STRUCT(obj) ((struct liquid_tokenizer *)obj_get_data_ptr(obj, cLiquidTokenizer))
#endif

21
ext/liquid/utils.c Normal file
View File

@@ -0,0 +1,21 @@
#include <ruby.h>
void raise_type_error(VALUE expected, VALUE got)
{
rb_raise(rb_eTypeError, "wrong argument type %s (expected %s)",
rb_class2name(got), rb_class2name(expected));
}
void check_class(VALUE obj, int type, VALUE klass)
{
Check_Type(obj, type);
VALUE obj_klass = RBASIC_CLASS(obj);
if (obj_klass != klass)
raise_type_error(klass, obj_klass);
}
void *obj_get_data_ptr(VALUE obj, VALUE klass)
{
check_class(obj, T_DATA, klass);
return DATA_PTR(obj);
}

8
ext/liquid/utils.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef LIQUID_UTILS_H
#define LIQUID_UTILS_H
void raise_type_error(VALUE expected, VALUE got);
void check_class(VALUE klass);
void *obj_get_data_ptr(VALUE obj, VALUE klass);
#endif

View File

@@ -30,16 +30,13 @@ module Liquid
VariableSegment = /[\w\-]/
VariableStart = /\{\{/
VariableEnd = /\}\}/
VariableIncompleteEnd = /\}\}?/
QuotedString = /"[^"]*"|'[^']*'/
QuotedFragment = /#{QuotedString}|(?:[^\s,\|'"]|#{QuotedString})+/o
TagAttributes = /(\w+)\s*\:\s*(#{QuotedFragment})/o
AnyStartingTag = /\{\{|\{\%/
PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om
TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/om
VariableParser = /\[[^\]]+\]|#{VariableSegment}+\??/o
end
require 'liquid/liquid'
require "liquid/version"
require 'liquid/lexer'
require 'liquid/parser'

View File

@@ -1,82 +1,26 @@
module Liquid
class Block < Tag
IsTag = /\A#{TagStart}/o
IsVariable = /\A#{VariableStart}/o
FullToken = /\A#{TagStart}\s*(\w+)\s*(.*)?#{TagEnd}\z/om
ContentOfVariable = /\A#{VariableStart}(.*)#{VariableEnd}\z/om
def initialize(tag_name, markup, tokens)
super
parse_body(tokens)
end
def blank?
@blank || false
end
def parse(tokens)
@blank = true
@nodelist ||= []
@nodelist.clear
# All child tags of the current block.
@children = []
while token = tokens.shift
case token
when IsTag
if token =~ FullToken
# if we found the proper block delimiter just end parsing here and let the outer block
# proceed
if block_delimiter == $1
end_tag
return
end
# fetch the tag from registered blocks
if tag = Template.tags[$1]
new_tag = tag.parse($1, $2, tokens, @options)
@blank &&= new_tag.blank?
@nodelist << new_tag
@children << new_tag
else
# this tag is not registered with the system
# pass it to the current block for special handling or error reporting
unknown_tag($1, $2, tokens)
end
else
raise SyntaxError.new(options[:locale].t("errors.syntax.tag_termination".freeze, :token => token, :tag_end => TagEnd.inspect))
end
when IsVariable
new_var = create_variable(token)
@nodelist << new_var
@children << new_var
@blank = false
when ''.freeze
# pass
else
@nodelist << token
@blank &&= (token =~ /\A\s*\z/)
end
end
# Make sure that it's ok to end parsing in the current block.
# Effectively this method will throw an exception unless the current block is
# of type Document
assert_missing_delimitation!
end
# warnings of this block and all sub-tags
def warnings
all_warnings = []
all_warnings.concat(@warnings) if @warnings
(@children || []).each do |node|
all_warnings.concat(node.warnings || [])
(nodelist || []).each do |node|
all_warnings.concat(node.warnings || []) if node.respond_to?(:warnings)
end
all_warnings
end
def end_tag
end
def unknown_tag(tag, params, tokens)
case tag
when 'else'.freeze
@@ -112,6 +56,14 @@ module Liquid
protected
def unterminated_variable(token)
raise SyntaxError.new(options[:locale].t("errors.syntax.variable_termination".freeze, :token => token, :tag_end => VariableEnd.inspect))
end
def unterminated_tag(token)
raise SyntaxError.new(options[:locale].t("errors.syntax.tag_termination".freeze, :token => token, :tag_end => TagEnd.inspect))
end
def assert_missing_delimitation!
raise SyntaxError.new(options[:locale].t("errors.syntax.tag_never_closed".freeze, :block_name => block_name))
end

View File

@@ -7,7 +7,7 @@ module Liquid
# There isn't a real delimiter
def block_delimiter
[]
nil
end
# Document blocks don't need to be terminated since they are not actually opened

View File

@@ -19,9 +19,6 @@ module Liquid
@options = options
end
def parse(tokens)
end
def name
self.class.name.downcase
end

View File

@@ -4,7 +4,7 @@ module Liquid
def render(context)
context.stack do
output = render_all(@nodelist, context)
output = super
if output != context.registers[:ifchanged]
context.registers[:ifchanged] = output

View File

@@ -35,9 +35,6 @@ module Liquid
end
end
def parse(tokens)
end
def blank?
false
end

View File

@@ -2,16 +2,13 @@ module Liquid
class Raw < Block
FullTokenPossiblyInvalid = /\A(.*)#{TagStart}\s*(\w+)\s*(.*)?#{TagEnd}\z/om
def parse(tokens)
def parse_body(tokens)
@nodelist ||= []
@nodelist.clear
while token = tokens.shift
if token =~ FullTokenPossiblyInvalid
@nodelist << $1 if $1 != "".freeze
if block_delimiter == $2
end_tag
return
end
return if block_delimiter == $2
end
@nodelist << token if not token.empty?
end

View File

@@ -54,7 +54,7 @@ module Liquid
col += 1
result << "<td class=\"col#{col}\">" << render_all(@nodelist, context) << '</td>'
result << "<td class=\"col#{col}\">" << super << '</td>'
if col == cols and (index != length - 1)
col = 0

View File

@@ -162,16 +162,9 @@ module Liquid
private
# Uses the <tt>Liquid::TemplateParser</tt> regexp to tokenize the passed source
def tokenize(source)
source = source.source if source.respond_to?(:source)
return [] if source.to_s.empty?
tokens = source.split(TemplateParser)
# removes the rogue empty element at the beginning of the array
tokens.shift if tokens[0] and tokens[0].empty?
tokens
Tokenizer.new(source.to_s)
end
end

View File

@@ -18,13 +18,17 @@ Gem::Specification.new do |s|
s.required_rubygems_version = ">= 1.3.7"
s.test_files = Dir.glob("{test}/**/*")
s.files = Dir.glob("{lib}/**/*") + %w(MIT-LICENSE README.md)
s.files = Dir.glob("{lib,ext}/**/*") + %w(MIT-LICENSE README.md)
s.extensions = ['ext/liquid/extconf.rb']
s.extra_rdoc_files = ["History.md", "README.md"]
s.require_path = "lib"
s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0")
s.add_development_dependency 'rake'
s.add_development_dependency 'activesupport'
if RUBY_ENGINE == 'ruby'
s.add_development_dependency 'rake-compiler'
s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0")
end
end

View File

@@ -25,26 +25,6 @@ end
class TemplateTest < Test::Unit::TestCase
include Liquid
def test_tokenize_strings
assert_equal [' '], Template.new.send(:tokenize, ' ')
assert_equal ['hello world'], Template.new.send(:tokenize, 'hello world')
end
def test_tokenize_variables
assert_equal ['{{funk}}'], Template.new.send(:tokenize, '{{funk}}')
assert_equal [' ', '{{funk}}', ' '], Template.new.send(:tokenize, ' {{funk}} ')
assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], Template.new.send(:tokenize, ' {{funk}} {{so}} {{brother}} ')
assert_equal [' ', '{{ funk }}', ' '], Template.new.send(:tokenize, ' {{ funk }} ')
end
def test_tokenize_blocks
assert_equal ['{%comment%}'], Template.new.send(:tokenize, '{%comment%}')
assert_equal [' ', '{%comment%}', ' '], Template.new.send(:tokenize, ' {%comment%} ')
assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], Template.new.send(:tokenize, ' {%comment%} {%endcomment%} ')
assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], Template.new.send(:tokenize, " {% comment %} {% endcomment %} ")
end
def test_instance_assigns_persist_on_same_template_object_between_parses
t = Template.new
assert_equal 'from instance assigns', t.parse("{% assign foo = 'from instance assigns' %}{{ foo }}").render!

View File

@@ -0,0 +1,64 @@
require 'test_helper'
class TokenizerTest < Test::Unit::TestCase
def test_tokenize_strings
assert_equal [' '], tokenize(' ')
assert_equal ['hello world'], tokenize('hello world')
end
def test_tokenize_variables
assert_equal ['{{funk}}'], tokenize('{{funk}}')
assert_equal [' ', '{{funk}}', ' '], tokenize(' {{funk}} ')
assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], tokenize(' {{funk}} {{so}} {{brother}} ')
assert_equal [' ', '{{ funk }}', ' '], tokenize(' {{ funk }} ')
end
def test_tokenize_blocks
assert_equal ['{%comment%}'], tokenize('{%comment%}')
assert_equal [' ', '{%comment%}', ' '], tokenize(' {%comment%} ')
assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], tokenize(' {%comment%} {%endcomment%} ')
assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], tokenize(" {% comment %} {% endcomment %} ")
end
def test_tokenize_incomplete_end
assert_tokens 'before{{ incomplete }after', ['before', '{{ incomplete }', 'after']
assert_tokens 'before{% incomplete %after', ['before', '{%', ' incomplete %after']
end
def test_tokenize_no_end
assert_tokens 'before{{ unterminated ', ['before', '{{', ' unterminated ']
assert_tokens 'before{% unterminated ', ['before', '{%', ' unterminated ']
end
private
def assert_tokens(source, expected)
assert_equal expected, tokenize(source)
assert_equal expected, old_tokenize(source)
end
def tokenize(source)
tokenizer = Liquid::Tokenizer.new(source)
tokens = []
while token = tokenizer.next
tokens << token
end
tokens
end
AnyStartingTag = /\{\{|\{\%/
VariableIncompleteEnd = /\}\}?/
PartialTemplateParser = /#{Liquid::TagStart}.*?#{Liquid::TagEnd}|#{Liquid::VariableStart}.*?#{VariableIncompleteEnd}/o
TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/o
def old_tokenize(source)
return [] if source.to_s.empty?
tokens = source.split(TemplateParser)
# removes the rogue empty element at the beginning of the array
tokens.shift if tokens[0] and tokens[0].empty?
tokens
end
end