mirror of
https://github.com/kemko/liquid.git
synced 2026-01-06 10:15:40 +03:00
Create a Liquid::Tokenizer class in the C extension.
This commit is contained in:
1
Rakefile
1
Rakefile
@@ -84,3 +84,4 @@ end
|
||||
Rake::ExtensionTask.new "liquid" do |ext|
|
||||
ext.lib_dir = "lib/liquid"
|
||||
end
|
||||
Rake::Task[:test].prerequisites << :compile
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
#include "ruby.h"
|
||||
#include "liquid_ext.h"
|
||||
|
||||
VALUE mLiquid;
|
||||
|
||||
void Init_liquid(void)
|
||||
{
|
||||
VALUE mLiquid = rb_define_module("Liquid");
|
||||
mLiquid = rb_define_module("Liquid");
|
||||
init_liquid_tokenizer();
|
||||
}
|
||||
|
||||
8
ext/liquid/liquid_ext.h
Normal file
8
ext/liquid/liquid_ext.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef LIQUID_EXT_H
|
||||
#define LIQUID_EXT_H
|
||||
|
||||
#include <ruby.h>
|
||||
#include <stdbool.h>
|
||||
#include "tokenizer.h"
|
||||
|
||||
#endif
|
||||
115
ext/liquid/tokenizer.c
Normal file
115
ext/liquid/tokenizer.c
Normal file
@@ -0,0 +1,115 @@
|
||||
#include "liquid_ext.h"
|
||||
|
||||
VALUE cLiquidTokenizer;
|
||||
extern VALUE mLiquid;
|
||||
|
||||
static void free_tokenizer(void *ptr)
|
||||
{
|
||||
struct liquid_tokenizer *tokenizer;
|
||||
xfree(tokenizer);
|
||||
}
|
||||
|
||||
static VALUE rb_allocate(VALUE klass)
|
||||
{
|
||||
VALUE obj;
|
||||
struct liquid_tokenizer *tokenizer;
|
||||
|
||||
obj = Data_Make_Struct(klass, struct liquid_tokenizer, NULL, free_tokenizer, tokenizer);
|
||||
return obj;
|
||||
}
|
||||
|
||||
static VALUE rb_initialize(VALUE self, VALUE source)
|
||||
{
|
||||
struct liquid_tokenizer *tokenizer;
|
||||
|
||||
Check_Type(source, T_STRING);
|
||||
Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
|
||||
tokenizer->cursor = RSTRING_PTR(source);
|
||||
tokenizer->length = RSTRING_LEN(source);
|
||||
}
|
||||
|
||||
void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token)
|
||||
{
|
||||
if (tokenizer->length <= 0) {
|
||||
memset(token, 0, sizeof(*token));
|
||||
return;
|
||||
}
|
||||
token->type = TOKEN_STRING;
|
||||
|
||||
char *cursor = tokenizer->cursor;
|
||||
char *last = tokenizer->cursor + tokenizer->length - 1;
|
||||
|
||||
while (cursor < last) {
|
||||
if (*cursor++ != '{')
|
||||
continue;
|
||||
|
||||
char c = *cursor++;
|
||||
if (c != '%' && c != '{')
|
||||
continue;
|
||||
if (cursor - tokenizer->cursor > 2) {
|
||||
token->type = TOKEN_STRING;
|
||||
cursor -= 2;
|
||||
goto found;
|
||||
}
|
||||
char *incomplete_end = cursor;
|
||||
token->type = TOKEN_INVALID;
|
||||
if (c == '%') {
|
||||
while (cursor < last) {
|
||||
if (*cursor++ != '%')
|
||||
continue;
|
||||
c = *cursor++;
|
||||
while (c == '%' && cursor <= last)
|
||||
c = *cursor++;
|
||||
if (c != '}')
|
||||
continue;
|
||||
token->type = TOKEN_TAG;
|
||||
goto found;
|
||||
}
|
||||
// FIXME: Handle syntax error for strict mode
|
||||
cursor = incomplete_end;
|
||||
goto found;
|
||||
} else {
|
||||
while (cursor < last) {
|
||||
if (*cursor++ != '}')
|
||||
continue;
|
||||
if (*cursor++ != '}') {
|
||||
incomplete_end = cursor - 1;
|
||||
continue;
|
||||
}
|
||||
token->type = TOKEN_VARIABLE;
|
||||
goto found;
|
||||
}
|
||||
// FIXME: Handle syntax error for strict mode
|
||||
cursor = incomplete_end;
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
cursor = last + 1;
|
||||
found:
|
||||
token->str = tokenizer->cursor;
|
||||
token->length = cursor - tokenizer->cursor;
|
||||
tokenizer->cursor += token->length;
|
||||
tokenizer->length -= token->length;
|
||||
}
|
||||
|
||||
static VALUE rb_next(VALUE self)
|
||||
{
|
||||
struct liquid_tokenizer *tokenizer;
|
||||
Data_Get_Struct(self, struct liquid_tokenizer, tokenizer);
|
||||
|
||||
struct token token;
|
||||
liquid_tokenizer_next(tokenizer, &token);
|
||||
if (token.type == TOKEN_NONE)
|
||||
return Qnil;
|
||||
|
||||
return rb_str_new(token.str, token.length);
|
||||
}
|
||||
|
||||
void init_liquid_tokenizer()
|
||||
{
|
||||
cLiquidTokenizer = rb_define_class_under(mLiquid, "Tokenizer", rb_cObject);
|
||||
rb_define_alloc_func(cLiquidTokenizer, rb_allocate);
|
||||
rb_define_method(cLiquidTokenizer, "initialize", rb_initialize, 1);
|
||||
rb_define_method(cLiquidTokenizer, "next", rb_next, 0);
|
||||
rb_define_alias(cLiquidTokenizer, "shift", "next");
|
||||
}
|
||||
26
ext/liquid/tokenizer.h
Normal file
26
ext/liquid/tokenizer.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#ifndef LIQUID_TOKENIZER_H
|
||||
#define LIQUID_TOKENIZER_H
|
||||
|
||||
enum token_type {
|
||||
TOKEN_NONE,
|
||||
TOKEN_INVALID,
|
||||
TOKEN_STRING,
|
||||
TOKEN_TAG,
|
||||
TOKEN_VARIABLE
|
||||
};
|
||||
|
||||
struct token {
|
||||
enum token_type type;
|
||||
char *str;
|
||||
int length;
|
||||
};
|
||||
|
||||
struct liquid_tokenizer {
|
||||
char *cursor;
|
||||
int length;
|
||||
};
|
||||
|
||||
void init_liquid_tokenizer();
|
||||
void liquid_tokenizer_next(struct liquid_tokenizer *tokenizer, struct token *token);
|
||||
|
||||
#endif
|
||||
@@ -30,7 +30,6 @@ module Liquid
|
||||
VariableSegment = /[\w\-]/
|
||||
VariableStart = /\{\{/
|
||||
VariableEnd = /\}\}/
|
||||
VariableIncompleteEnd = /\}\}?/
|
||||
QuotedString = /"[^"]*"|'[^']*'/
|
||||
QuotedFragment = /#{QuotedString}|(?:[^\s,\|'"]|#{QuotedString})+/o
|
||||
StrictQuotedFragment = /"[^"]+"|'[^']+'|[^\s|:,]+/
|
||||
@@ -39,9 +38,6 @@ module Liquid
|
||||
SpacelessFilter = /\A(?:'[^']+'|"[^"]+"|[^'"])*#{FilterSeparator}(?:#{StrictQuotedFragment})(?:#{FirstFilterArgument}(?:#{OtherFilterArgument})*)?/o
|
||||
Expression = /(?:#{QuotedFragment}(?:#{SpacelessFilter})*)/o
|
||||
TagAttributes = /(\w+)\s*\:\s*(#{QuotedFragment})/o
|
||||
AnyStartingTag = /\{\{|\{\%/
|
||||
PartialTemplateParser = /#{TagStart}.*?#{TagEnd}|#{VariableStart}.*?#{VariableIncompleteEnd}/o
|
||||
TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/o
|
||||
VariableParser = /\[[^\]]+\]|#{VariableSegment}+\??/o
|
||||
end
|
||||
|
||||
|
||||
@@ -162,16 +162,9 @@ module Liquid
|
||||
|
||||
private
|
||||
|
||||
# Uses the <tt>Liquid::TemplateParser</tt> regexp to tokenize the passed source
|
||||
def tokenize(source)
|
||||
source = source.source if source.respond_to?(:source)
|
||||
return [] if source.to_s.empty?
|
||||
tokens = source.split(TemplateParser)
|
||||
|
||||
# removes the rogue empty element at the beginning of the array
|
||||
tokens.shift if tokens[0] and tokens[0].empty?
|
||||
|
||||
tokens
|
||||
Tokenizer.new(source.to_s)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@@ -25,26 +25,6 @@ end
|
||||
class TemplateTest < Test::Unit::TestCase
|
||||
include Liquid
|
||||
|
||||
def test_tokenize_strings
|
||||
assert_equal [' '], Template.new.send(:tokenize, ' ')
|
||||
assert_equal ['hello world'], Template.new.send(:tokenize, 'hello world')
|
||||
end
|
||||
|
||||
def test_tokenize_variables
|
||||
assert_equal ['{{funk}}'], Template.new.send(:tokenize, '{{funk}}')
|
||||
assert_equal [' ', '{{funk}}', ' '], Template.new.send(:tokenize, ' {{funk}} ')
|
||||
assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], Template.new.send(:tokenize, ' {{funk}} {{so}} {{brother}} ')
|
||||
assert_equal [' ', '{{ funk }}', ' '], Template.new.send(:tokenize, ' {{ funk }} ')
|
||||
end
|
||||
|
||||
def test_tokenize_blocks
|
||||
assert_equal ['{%comment%}'], Template.new.send(:tokenize, '{%comment%}')
|
||||
assert_equal [' ', '{%comment%}', ' '], Template.new.send(:tokenize, ' {%comment%} ')
|
||||
|
||||
assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], Template.new.send(:tokenize, ' {%comment%} {%endcomment%} ')
|
||||
assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], Template.new.send(:tokenize, " {% comment %} {% endcomment %} ")
|
||||
end
|
||||
|
||||
def test_instance_assigns_persist_on_same_template_object_between_parses
|
||||
t = Template.new
|
||||
assert_equal 'from instance assigns', t.parse("{% assign foo = 'from instance assigns' %}{{ foo }}").render
|
||||
|
||||
64
test/liquid/tokenizer_test.rb
Normal file
64
test/liquid/tokenizer_test.rb
Normal file
@@ -0,0 +1,64 @@
|
||||
require 'test_helper'
|
||||
|
||||
class TokenizerTest < Test::Unit::TestCase
|
||||
def test_tokenize_strings
|
||||
assert_equal [' '], tokenize(' ')
|
||||
assert_equal ['hello world'], tokenize('hello world')
|
||||
end
|
||||
|
||||
def test_tokenize_variables
|
||||
assert_equal ['{{funk}}'], tokenize('{{funk}}')
|
||||
assert_equal [' ', '{{funk}}', ' '], tokenize(' {{funk}} ')
|
||||
assert_equal [' ', '{{funk}}', ' ', '{{so}}', ' ', '{{brother}}', ' '], tokenize(' {{funk}} {{so}} {{brother}} ')
|
||||
assert_equal [' ', '{{ funk }}', ' '], tokenize(' {{ funk }} ')
|
||||
end
|
||||
|
||||
def test_tokenize_blocks
|
||||
assert_equal ['{%comment%}'], tokenize('{%comment%}')
|
||||
assert_equal [' ', '{%comment%}', ' '], tokenize(' {%comment%} ')
|
||||
|
||||
assert_equal [' ', '{%comment%}', ' ', '{%endcomment%}', ' '], tokenize(' {%comment%} {%endcomment%} ')
|
||||
assert_equal [' ', '{% comment %}', ' ', '{% endcomment %}', ' '], tokenize(" {% comment %} {% endcomment %} ")
|
||||
end
|
||||
|
||||
def test_tokenize_incomplete_end
|
||||
assert_tokens 'before{{ incomplete }after', ['before', '{{ incomplete }', 'after']
|
||||
assert_tokens 'before{% incomplete %after', ['before', '{%', ' incomplete %after']
|
||||
end
|
||||
|
||||
def test_tokenize_no_end
|
||||
assert_tokens 'before{{ unterminated ', ['before', '{{', ' unterminated ']
|
||||
assert_tokens 'before{% unterminated ', ['before', '{%', ' unterminated ']
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def assert_tokens(source, expected)
|
||||
assert_equal expected, tokenize(source)
|
||||
assert_equal expected, old_tokenize(source)
|
||||
end
|
||||
|
||||
def tokenize(source)
|
||||
tokenizer = Liquid::Tokenizer.new(source)
|
||||
tokens = []
|
||||
while token = tokenizer.next
|
||||
tokens << token
|
||||
end
|
||||
tokens
|
||||
end
|
||||
|
||||
AnyStartingTag = /\{\{|\{\%/
|
||||
VariableIncompleteEnd = /\}\}?/
|
||||
PartialTemplateParser = /#{Liquid::TagStart}.*?#{Liquid::TagEnd}|#{Liquid::VariableStart}.*?#{VariableIncompleteEnd}/o
|
||||
TemplateParser = /(#{PartialTemplateParser}|#{AnyStartingTag})/o
|
||||
|
||||
def old_tokenize(source)
|
||||
return [] if source.to_s.empty?
|
||||
tokens = source.split(TemplateParser)
|
||||
|
||||
# removes the rogue empty element at the beginning of the array
|
||||
tokens.shift if tokens[0] and tokens[0].empty?
|
||||
|
||||
tokens
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user