Compare commits

..

1 Commits

Author SHA1 Message Date
Dylan Thacker-Smith
d3e4e4c419 Implement tokenization in a C extension. 2014-03-26 03:20:34 -04:00
24 changed files with 199 additions and 332 deletions

2
.gitignore vendored
View File

@@ -6,5 +6,7 @@ pkg
.rvmrc
.ruby-version
Gemfile.lock
/ext/liquid/Makefile
*.o
*.bundle
/tmp

View File

@@ -1,6 +1,5 @@
require 'rake'
require 'rake/testtask'
require 'rake/extensiontask'
$LOAD_PATH.unshift File.expand_path("../lib", __FILE__)
require "liquid/version"
@@ -77,7 +76,10 @@ task :example do
ruby "-w -d -Ilib example/server/server.rb"
end
Rake::ExtensionTask.new "liquid" do |ext|
ext.lib_dir = "lib/liquid"
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
require 'rake/extensiontask'
Rake::ExtensionTask.new "liquid" do |ext|
ext.lib_dir = "lib/liquid"
end
Rake::Task[:test].prerequisites << :compile
end
Rake::Task[:test].prerequisites << :compile

View File

@@ -1,168 +0,0 @@
#include "liquid_ext.h"
VALUE cLiquidBlock;
ID intern_assert_missing_delimitation, intern_block_delimiter, intern_is_blank, intern_new,
intern_new_with_options, intern_tags, intern_unknown_tag, intern_unterminated_tag,
intern_unterminated_variable;
struct liquid_tag
{
char *name, *markup;
long name_length, markup_length;
};
static bool parse_tag(struct liquid_tag *tag, char *token, long token_length)
{
// Strip {{ and }} braces
token += 2;
token_length -= 4;
char *end = token + token_length;
while (token < end && isspace(*token))
token++;
tag->name = token;
char c = *token;
while (token < end && (isalnum(c) || c == '_'))
c = *(++token);
tag->name_length = token - tag->name;
if (!tag->name_length) {
memset(tag, 0, sizeof(*tag));
return false;
}
while (token < end && isspace(*token))
token++;
tag->markup = token;
char *last = end - 1;
while (token < last && isspace(*last))
last--;
end = last + 1;
tag->markup_length = end - token;
return true;
}
static VALUE rb_parse_body(VALUE self, VALUE tokenizerObj)
{
struct liquid_tokenizer *tokenizer = LIQUID_TOKENIZER_GET_STRUCT(tokenizerObj);
bool blank = true;
VALUE nodelist = rb_iv_get(self, "@nodelist");
if (nodelist == Qnil) {
nodelist = rb_ary_new();
rb_iv_set(self, "@nodelist", nodelist);
} else {
rb_ary_clear(nodelist);
}
struct token token;
while (true) {
liquid_tokenizer_next(tokenizer, &token);
switch (token.type) {
case TOKEN_NONE:
/*
* Make sure that it's ok to end parsing in the current block.
* Effectively this method will throw an exception unless the current block is
* of type Document
*/
rb_funcall(self, intern_assert_missing_delimitation, 0);
goto done;
case TOKEN_INVALID:
{
VALUE token_obj = rb_str_new(token.str, token.length);
if (token.str[1] == '%')
rb_funcall(self, intern_unterminated_tag, 1, token_obj);
else
rb_funcall(self, intern_unterminated_variable, 1, token_obj);
break;
}
case TOKEN_TAG:
{
struct liquid_tag tag;
if (!parse_tag(&tag, token.str, token.length)) {
// FIXME: provide more appropriate error message
rb_funcall(self, intern_unterminated_tag, 1, rb_str_new(token.str, token.length));
} else {
if (tag.name_length >= 3 && !memcmp(tag.name, "end", 3)) {
VALUE block_delimiter = rb_funcall(self, intern_block_delimiter, 0);
if (TYPE(block_delimiter) == T_STRING &&
tag.name_length == RSTRING_LEN(block_delimiter) &&
!memcmp(tag.name, RSTRING_PTR(block_delimiter), tag.name_length))
{
goto done;
}
}
VALUE tags = rb_funcall(cLiquidTemplate, intern_tags, 0);
Check_Type(tags, T_HASH);
VALUE tag_name = rb_str_new(tag.name, tag.name_length);
VALUE tag_class = rb_hash_lookup(tags, tag_name);
VALUE markup = rb_str_new(tag.markup, tag.markup_length);
if (tag_class != Qnil) {
VALUE options = rb_iv_get(self, "@options");
if (options == Qnil)
options = rb_hash_new();
VALUE new_tag = rb_funcall(tag_class, intern_new_with_options, 4,
tag_name, markup, tokenizerObj, options);
if (blank) {
VALUE blank_block = rb_funcall(new_tag, intern_is_blank, 0);
if (blank_block == Qnil || blank_block == Qfalse)
blank = false;
}
rb_ary_push(nodelist, new_tag);
} else {
rb_funcall(self, intern_unknown_tag, 3, tag_name, markup, tokenizerObj);
/*
* multi-block tags may store the nodelist in a block array on unknown_tag
* then replace @nodelist with a new array. We need to use the new array
* for the block following the tag token.
*/
nodelist = rb_iv_get(self, "@nodelist");
}
}
break;
}
case TOKEN_VARIABLE:
{
VALUE markup = rb_str_new(token.str + 2, token.length - 4);
VALUE options = rb_iv_get(self, "@options");
VALUE new_var = rb_funcall(cLiquidVariable, intern_new, 2, markup, options);
rb_ary_push(nodelist, new_var);
blank = false;
break;
}
case TOKEN_STRING:
rb_ary_push(nodelist, rb_str_new(token.str, token.length));
if (blank) {
int i;
for (i = 0; i < token.length; i++) {
if (!isspace(token.str[i])) {
blank = false;
break;
}
}
}
break;
}
}
done:
rb_iv_set(self, "@blank", blank ? Qtrue : Qfalse);
return Qnil;
}
void init_liquid_block()
{
intern_assert_missing_delimitation = rb_intern("assert_missing_delimitation!");
intern_block_delimiter = rb_intern("block_delimiter");
intern_is_blank = rb_intern("blank?");
intern_new = rb_intern("new");
intern_new_with_options = rb_intern("new_with_options");
intern_tags = rb_intern("tags");
intern_unknown_tag = rb_intern("unknown_tag");
intern_unterminated_tag = rb_intern("unterminated_tag");
intern_unterminated_variable = rb_intern("unterminated_variable");
cLiquidBlock = rb_define_class_under(mLiquid, "Block", cLiquidTag);
rb_define_method(cLiquidBlock, "parse_body", rb_parse_body, 1);
}

View File

@@ -1,8 +0,0 @@
#ifndef LIQUID_BLOCK_H
#define LIQUID_BLOCK_H
void init_liquid_block();
extern VALUE cLiquidBlock;
#endif

View File

@@ -1,3 +1,4 @@
require 'mkmf'
$CFLAGS << ' -Wall'
$CFLAGS << ' -Wall -Werror'
$warnflags.gsub!(/-Wdeclaration-after-statement/, "")
create_makefile("liquid/liquid")

9
ext/liquid/liquid.c Normal file
View File

@@ -0,0 +1,9 @@
#include "liquid.h"
VALUE mLiquid;
void Init_liquid(void)
{
mLiquid = rb_define_module("Liquid");
init_liquid_tokenizer();
}

11
ext/liquid/liquid.h Normal file
View File

@@ -0,0 +1,11 @@
#ifndef LIQUID_H
#define LIQUID_H
#include <ruby.h>
#include <stdbool.h>
#include "tokenizer.h"
extern VALUE mLiquid;
#endif

View File

@@ -1,15 +0,0 @@
#include "liquid_ext.h"
VALUE mLiquid;
VALUE cLiquidTemplate, cLiquidTag, cLiquidVariable;
void Init_liquid(void)
{
mLiquid = rb_define_module("Liquid");
cLiquidTemplate = rb_define_class_under(mLiquid, "Template", rb_cObject);
cLiquidTag = rb_define_class_under(mLiquid, "Tag", rb_cObject);
cLiquidVariable = rb_define_class_under(mLiquid, "Variable", rb_cObject);
init_liquid_tokenizer();
init_liquid_block();
}

View File

@@ -1,15 +0,0 @@
#ifndef LIQUID_EXT_H
#define LIQUID_EXT_H
#include <stdbool.h>
#include <ctype.h>
#include <ruby.h>
#include "tokenizer.h"
#include "block.h"
#include "utils.h"
extern VALUE mLiquid;
extern VALUE cLiquidTemplate, cLiquidTag, cLiquidVariable;
#endif

View File

@@ -1,43 +1,66 @@
#include "liquid_ext.h"
#include "liquid.h"
VALUE cLiquidTokenizer;
static void free_tokenizer(void *ptr)
static void tokenizer_mark(void *ptr) {
tokenizer_t *tokenizer = ptr;
rb_gc_mark(tokenizer->source);
}
static void tokenizer_free(void *ptr)
{
struct liquid_tokenizer *tokenizer = ptr;
tokenizer_t *tokenizer = ptr;
xfree(tokenizer);
}
static VALUE rb_allocate(VALUE klass)
static size_t tokenizer_memsize(const void *ptr)
{
return ptr ? sizeof(tokenizer_t) : 0;
}
const rb_data_type_t tokenizer_data_type = {
"liquid_tokenizer",
{tokenizer_mark, tokenizer_free, tokenizer_memsize,},
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
#endif
};
static VALUE tokenizer_allocate(VALUE klass)
{
VALUE obj;
struct liquid_tokenizer *tokenizer;
tokenizer_t *tokenizer;
obj = Data_Make_Struct(klass, struct liquid_tokenizer, NULL, free_tokenizer, tokenizer);
obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer);
tokenizer->source = Qnil;
return obj;
}
static VALUE rb_initialize(VALUE self, VALUE source)
static VALUE tokenizer_initialize_method(VALUE self, VALUE source)
{
struct liquid_tokenizer *tokenizer;
tokenizer_t *tokenizer;
Check_Type(source, T_STRING);
Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
Tokenizer_Get_Struct(self, tokenizer);
source = rb_str_dup_frozen(source);
tokenizer->source = source;
tokenizer->cursor = RSTRING_PTR(source);
tokenizer->length = RSTRING_LEN(source);
return Qnil;
}
void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token)
void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
{
if (tokenizer->length <= 0) {
memset(token, 0, sizeof(*token));
return;
}
token->type = TOKEN_STRING;
char *cursor = tokenizer->cursor;
char *last = tokenizer->cursor + tokenizer->length - 1;
const char *cursor = tokenizer->cursor;
const char *last = cursor + tokenizer->length - 1;
token->str = cursor;
token->type = TOKEN_STRING;
while (cursor < last) {
if (*cursor++ != '{')
@@ -51,7 +74,6 @@ void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *tok
cursor -= 2;
goto found;
}
char *incomplete_end = cursor;
token->type = TOKEN_INVALID;
if (c == '%') {
while (cursor < last) {
@@ -65,38 +87,40 @@ void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *tok
token->type = TOKEN_TAG;
goto found;
}
cursor = incomplete_end;
// unterminated tag
cursor = tokenizer->cursor + 2;
goto found;
} else {
while (cursor < last) {
if (*cursor++ != '}')
continue;
if (*cursor++ != '}') {
incomplete_end = cursor - 1;
continue;
// variable incomplete end, used to end raw tags
cursor--;
goto found;
}
token->type = TOKEN_VARIABLE;
goto found;
}
cursor = incomplete_end;
// unterminated variable
cursor = tokenizer->cursor + 2;
goto found;
}
}
cursor = last + 1;
found:
token->str = tokenizer->cursor;
token->length = cursor - tokenizer->cursor;
tokenizer->cursor += token->length;
tokenizer->length -= token->length;
}
static VALUE rb_next(VALUE self)
static VALUE tokenizer_next_method(VALUE self)
{
struct liquid_tokenizer *tokenizer;
Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
tokenizer_t *tokenizer;
Tokenizer_Get_Struct(self, tokenizer);
struct token token;
liquid_tokenizer_next(tokenizer, &token);
token_t token;
tokenizer_next(tokenizer, &token);
if (token.type == TOKEN_NONE)
return Qnil;
@@ -106,8 +130,8 @@ static VALUE rb_next(VALUE self)
void init_liquid_tokenizer()
{
cLiquidTokenizer = rb_define_class_under(mLiquid, "Tokenizer", rb_cObject);
rb_define_alloc_func(cLiquidTokenizer, rb_allocate);
rb_define_method(cLiquidTokenizer, "initialize", rb_initialize, 1);
rb_define_method(cLiquidTokenizer, "next", rb_next, 0);
rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 1);
rb_define_method(cLiquidTokenizer, "next", tokenizer_next_method, 0);
rb_define_alias(cLiquidTokenizer, "shift", "next");
}

View File

@@ -1,8 +1,6 @@
#ifndef LIQUID_TOKENIZER_H
#define LIQUID_TOKENIZER_H
extern VALUE cLiquidTokenizer;
enum token_type {
TOKEN_NONE,
TOKEN_INVALID,
@@ -11,20 +9,23 @@ enum token_type {
TOKEN_VARIABLE
};
struct token {
typedef struct token {
enum token_type type;
char *str;
int length;
};
const char *str;
long length;
} token_t;
struct liquid_tokenizer {
char *cursor;
int length;
};
typedef struct tokenizer {
VALUE source;
const char *cursor;
long length;
} tokenizer_t;
extern VALUE cLiquidTokenizer;
extern const rb_data_type_t tokenizer_data_type;
#define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval)
void init_liquid_tokenizer();
void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token);
#define LIQUID_TOKENIZER_GET_STRUCT(obj) ((struct liquid_tokenizer *)obj_get_data_ptr(obj, cLiquidTokenizer))
void tokenizer_next(tokenizer_t *tokenizer, token_t *token);
#endif

View File

@@ -1,21 +0,0 @@
#include <ruby.h>
void raise_type_error(VALUE expected, VALUE got)
{
rb_raise(rb_eTypeError, "wrong argument type %s (expected %s)",
rb_class2name(got), rb_class2name(expected));
}
void check_class(VALUE obj, int type, VALUE klass)
{
Check_Type(obj, type);
VALUE obj_klass = RBASIC_CLASS(obj);
if (obj_klass != klass)
raise_type_error(klass, obj_klass);
}
void *obj_get_data_ptr(VALUE obj, VALUE klass)
{
check_class(obj, T_DATA, klass);
return DATA_PTR(obj);
}

View File

@@ -1,8 +0,0 @@
#ifndef LIQUID_UTILS_H
#define LIQUID_UTILS_H
void raise_type_error(VALUE expected, VALUE got);
void check_class(VALUE klass);
void *obj_get_data_ptr(VALUE obj, VALUE klass);
#endif

View File

@@ -36,7 +36,6 @@ module Liquid
VariableParser = /\[[^\]]+\]|#{VariableSegment}+\??/o
end
require 'liquid/liquid'
require "liquid/version"
require 'liquid/lexer'
require 'liquid/parser'
@@ -61,3 +60,9 @@ require 'liquid/utils'
# Load all the tags of the standard library
#
Dir[File.dirname(__FILE__) + '/liquid/tags/*.rb'].each { |f| require f }
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
require 'liquid/liquid'
else
require 'liquid/tokenizer'
end

View File

@@ -1,26 +1,82 @@
module Liquid
class Block < Tag
def initialize(tag_name, markup, tokens)
super
parse_body(tokens)
end
IsTag = /\A#{TagStart}/o
IsVariable = /\A#{VariableStart}/o
FullToken = /\A#{TagStart}\s*(\w+)\s*(.*)?#{TagEnd}\z/om
ContentOfVariable = /\A#{VariableStart}(.*)#{VariableEnd}\z/om
def blank?
@blank || false
end
def parse(tokens)
@blank = true
@nodelist ||= []
@nodelist.clear
# All child tags of the current block.
@children = []
while token = tokens.shift
case token
when IsTag
if token =~ FullToken
# if we found the proper block delimiter just end parsing here and let the outer block
# proceed
if block_delimiter == $1
end_tag
return
end
# fetch the tag from registered blocks
if tag = Template.tags[$1]
new_tag = tag.parse($1, $2, tokens, @options)
@blank &&= new_tag.blank?
@nodelist << new_tag
@children << new_tag
else
# this tag is not registered with the system
# pass it to the current block for special handling or error reporting
unknown_tag($1, $2, tokens)
end
else
raise SyntaxError.new(options[:locale].t("errors.syntax.tag_termination".freeze, :token => token, :tag_end => TagEnd.inspect))
end
when IsVariable
new_var = create_variable(token)
@nodelist << new_var
@children << new_var
@blank = false
when ''.freeze
# pass
else
@nodelist << token
@blank &&= (token =~ /\A\s*\z/)
end
end
# Make sure that it's ok to end parsing in the current block.
# Effectively this method will throw an exception unless the current block is
# of type Document
assert_missing_delimitation!
end
# warnings of this block and all sub-tags
def warnings
all_warnings = []
all_warnings.concat(@warnings) if @warnings
(nodelist || []).each do |node|
all_warnings.concat(node.warnings || []) if node.respond_to?(:warnings)
(@children || []).each do |node|
all_warnings.concat(node.warnings || [])
end
all_warnings
end
def end_tag
end
def unknown_tag(tag, params, tokens)
case tag
when 'else'.freeze
@@ -56,14 +112,6 @@ module Liquid
protected
def unterminated_variable(token)
raise SyntaxError.new(options[:locale].t("errors.syntax.variable_termination".freeze, :token => token, :tag_end => VariableEnd.inspect))
end
def unterminated_tag(token)
raise SyntaxError.new(options[:locale].t("errors.syntax.tag_termination".freeze, :token => token, :tag_end => TagEnd.inspect))
end
def assert_missing_delimitation!
raise SyntaxError.new(options[:locale].t("errors.syntax.tag_never_closed".freeze, :block_name => block_name))
end

View File

@@ -7,7 +7,7 @@ module Liquid
# There isn't a real delimiter
def block_delimiter
nil
[]
end
# Document blocks don't need to be terminated since they are not actually opened

View File

@@ -19,6 +19,9 @@ module Liquid
@options = options
end
def parse(tokens)
end
def name
self.class.name.downcase
end

View File

@@ -4,7 +4,7 @@ module Liquid
def render(context)
context.stack do
output = super
output = render_all(@nodelist, context)
if output != context.registers[:ifchanged]
context.registers[:ifchanged] = output

View File

@@ -35,6 +35,9 @@ module Liquid
end
end
def parse(tokens)
end
def blank?
false
end

View File

@@ -2,13 +2,16 @@ module Liquid
class Raw < Block
FullTokenPossiblyInvalid = /\A(.*)#{TagStart}\s*(\w+)\s*(.*)?#{TagEnd}\z/om
def parse_body(tokens)
def parse(tokens)
@nodelist ||= []
@nodelist.clear
while token = tokens.shift
if token =~ FullTokenPossiblyInvalid
@nodelist << $1 if $1 != "".freeze
return if block_delimiter == $2
if block_delimiter == $2
end_tag
return
end
end
@nodelist << token if not token.empty?
end

View File

@@ -54,7 +54,7 @@ module Liquid
col += 1
result << "<td class=\"col#{col}\">" << super << '</td>'
result << "<td class=\"col#{col}\">" << render_all(@nodelist, context) << '</td>'
if col == cols and (index != length - 1)
col = 0

20
lib/liquid/tokenizer.rb Normal file
View File

@@ -0,0 +1,20 @@
module Liquid
class Tokenizer
VariableIncompleteEnd = /\}\}?/
AnyStartingTag = /\{\{|\{\%/
PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/om
TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/om
def initialize(source)
@tokens = source.split(TemplateParser)
# removes the rogue empty element at the beginning of the array
@tokens.shift if @tokens[0] && @tokens[0].empty?
end
def next
@tokens.shift
end
alias_method :shift, :next
end
end

View File

@@ -19,7 +19,6 @@ Gem::Specification.new do |s|
s.test_files = Dir.glob("{test}/**/*")
s.files = Dir.glob("{lib,ext}/**/*") + %w(MIT-LICENSE README.md)
s.extensions = ['ext/liquid/extconf.rb']
s.extra_rdoc_files = ["History.md", "README.md"]
@@ -27,7 +26,8 @@ Gem::Specification.new do |s|
s.add_development_dependency 'rake'
s.add_development_dependency 'activesupport'
if RUBY_ENGINE == 'ruby'
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'ruby'
s.extensions = ['ext/liquid/extconf.rb']
s.add_development_dependency 'rake-compiler'
s.add_development_dependency 'stackprof' if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.1.0")
end

View File

@@ -21,23 +21,8 @@ class TokenizerTest < Test::Unit::TestCase
assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], tokenize(" {% comment %} {% endcomment %} ")
end
def test_tokenize_incomplete_end
assert_tokens 'before{{ incomplete }after', ['before', '{{ incomplete }', 'after']
assert_tokens 'before{% incomplete %after', ['before', '{%', ' incomplete %after']
end
def test_tokenize_no_end
assert_tokens 'before{{ unterminated ', ['before', '{{', ' unterminated ']
assert_tokens 'before{% unterminated ', ['before', '{%', ' unterminated ']
end
private
def assert_tokens(source, expected)
assert_equal expected, tokenize(source)
assert_equal expected, old_tokenize(source)
end
def tokenize(source)
tokenizer = Liquid::Tokenizer.new(source)
tokens = []
@@ -46,19 +31,4 @@ class TokenizerTest < Test::Unit::TestCase
end
tokens
end
AnyStartingTag = /\{\{|\{\%/
VariableIncompleteEnd = /\}\}?/
PartialTemplateParser = /#{Liquid::TagStart}.*?#{Liquid::TagEnd}|#{Liquid::VariableStart}.*?#{VariableIncompleteEnd}/o
TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/o
def old_tokenize(source)
return [] if source.to_s.empty?
tokens = source.split(TemplateParser)
# removes the rogue empty element at the beginning of the array
tokens.shift if tokens[0] and tokens[0].empty?
tokens
end
end