From fa44e594de51132bdb187e50f1039a5f074133c5 Mon Sep 17 00:00:00 2001 From: Max Melentiev Date: Mon, 10 Dec 2018 22:26:13 +0530 Subject: [PATCH] Cached 2-step storage (similar to shrine gem) --- .gitignore | 1 + Appraisals | 1 + Gemfile | 9 +- gemfiles/rails_42.gemfile | 5 +- gemfiles/rails_5.gemfile | 4 +- lib/paperclip/storage.rb | 1 + lib/paperclip/storage/cached.rb | 229 ++++++++++++++++++++++++++++++++ test/storage/cached_test.rb | 80 +++++++++++ 8 files changed, 326 insertions(+), 4 deletions(-) create mode 100644 lib/paperclip/storage/cached.rb create mode 100644 test/storage/cached_test.rb diff --git a/.gitignore b/.gitignore index 2650159..ef6a658 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .ruby-version Gemfile.lock gemfiles/*.lock +gemfiles/.bundle/ tmp/ diff --git a/Appraisals b/Appraisals index fd7e74f..2906dd0 100644 --- a/Appraisals +++ b/Appraisals @@ -1,5 +1,6 @@ appraise 'rails_42' do gem 'rails', '~> 4.2.0' + gem 'test_after_commit' end appraise 'rails_5' do diff --git a/Gemfile b/Gemfile index 33c3cec..a2236ba 100644 --- a/Gemfile +++ b/Gemfile @@ -1,12 +1,17 @@ -# coding: utf-8 -source 'http://rubygems.org' +source 'https://rubygems.org' +git_source(:github) { |repo_name| "https://github.com/#{repo_name}.git" } + gemspec gem 'appraisal' gem 'fastimage' gem 'sqlite3' + gem 'aws-sdk-s3' +gem 'fog-local' + +gem 'delayed_paperclip', github: 'insales/delayed_paperclip' gem 'sidekiq', '~> 2.0' gem 'test-unit' diff --git a/gemfiles/rails_42.gemfile b/gemfiles/rails_42.gemfile index 569d1d6..a162a92 100644 --- a/gemfiles/rails_42.gemfile +++ b/gemfiles/rails_42.gemfile @@ -1,11 +1,13 @@ # This file was generated by Appraisal -source "http://rubygems.org" +source "https://rubygems.org" gem "appraisal" gem "fastimage" gem "sqlite3" gem "aws-sdk-s3" +gem "fog-local" +gem "delayed_paperclip", git: "https://github.com/insales/delayed_paperclip.git" gem "sidekiq", "~> 2.0" gem "test-unit" gem "thoughtbot-shoulda", ">= 2.9.0" @@ -13,5 +15,6 @@ gem "mocha" gem "pry" gem "pry-byebug" gem "rails", "~> 4.2.0" +gem "test_after_commit" gemspec path: "../" diff --git a/gemfiles/rails_5.gemfile b/gemfiles/rails_5.gemfile index e3f2ff6..00f7914 100644 --- a/gemfiles/rails_5.gemfile +++ b/gemfiles/rails_5.gemfile @@ -1,11 +1,13 @@ # This file was generated by Appraisal -source "http://rubygems.org" +source "https://rubygems.org" gem "appraisal" gem "fastimage" gem "sqlite3" gem "aws-sdk-s3" +gem "fog-local" +gem "delayed_paperclip", git: "https://github.com/insales/delayed_paperclip.git" gem "sidekiq", "~> 2.0" gem "test-unit" gem "thoughtbot-shoulda", ">= 2.9.0" diff --git a/lib/paperclip/storage.rb b/lib/paperclip/storage.rb index 170d3dd..800f6f7 100644 --- a/lib/paperclip/storage.rb +++ b/lib/paperclip/storage.rb @@ -2,5 +2,6 @@ module Paperclip module Storage autoload :Filesystem, 'paperclip/storage/filesystem' autoload :Delayeds3, 'paperclip/storage/delayeds3' + autoload :Cached, 'paperclip/storage/cached' end end diff --git a/lib/paperclip/storage/cached.rb b/lib/paperclip/storage/cached.rb new file mode 100644 index 0000000..ff65a91 --- /dev/null +++ b/lib/paperclip/storage/cached.rb @@ -0,0 +1,229 @@ +require "sidekiq" + +module Paperclip + module Storage + # Saves file to `:cache` store, and run jobs to copy files to one ore more `:store` store. + # All stores are Fog::Storage::Directory instances (it has S3 and filesystem adapters). + # + # Options: + # - `:cache` - temporary storage, + # - `:stores` - one or more permanent storages (hash of {id => fog_directory}), + # first one is main others are mirrors, + # - `:key` - identifier template. + # - `:url` - hash of tamples {cache: t1, store: t2}. + # Values support :key interpolation which is merformed at configuration-time. + # - `:to_file_using_fog` - use fog interface in #to_file to fetch file from store. + # If disabled, downloads file by url via usual HTTP request. + # + # It uses `#{attachement_name}_synced_to_#{store_id}` field to mark that file + # is uploaded to particular storage. + module Cached + class << self + def included(base) + base.extend(ClassMethods) + end + end + + module ClassMethods + attr_reader :key_template, + :url_templates, + :directories, + :store_ids, + :main_store_id, + :download_by_url + + def setup(*) + super + + @key_template = options.fetch(:key) + @key_template = key_template[1..-1] if key_template.start_with?('/') + + @url_templates = options.fetch(:url).map { |k, v| [k, v.gsub(':key', key_template)] }.to_h + + @directories = options.fetch(:stores).symbolize_keys + @directories[:cache] = @options.fetch(:cache) + + @store_ids = options[:stores].keys.map(&:to_sym) + @main_store_id = store_ids.first + + @download_by_url = options[:download_by_url] + end + + def directory_for(store_id) + directories.fetch(store_id.to_sym) + end + + def synced_field_name(store_id) + @synced_field_names ||= store_ids.each_with_object({}) do |key, result| + result[key] = :"#{attachment_name}_synced_to_#{key}" + end + @synced_field_names[store_id.to_sym] + end + end + + class UploadWorker + include ::Sidekiq::Worker + sidekiq_options queue: :paperclip + + def perform(class_name, id, attachment_name, store_id) + instance = class_name.constantize.find_by_id(id) + return unless instance + attachment = instance.public_send(attachment_name) + attachment.sync_to(store_id) + attachment.clear_cache + end + end + + def initialize(*) + super + @queued_jobs = [] + end + + def key(style = default_style) + interpolate(self.class.key_template, style) + end + + def storage_url(style = default_style) + current_store = synced_to?(self.class.main_store_id) ? :store : :cache + interpolate(self.class.url_templates.fetch(current_store), style) + end + + def reprocess! + super + flush_jobs + end + + # If store_id is given, it forces download from specific store using fog interface. + # Otherway it tries to download from cache store and finally uses url to download file + # via HTTP. This is the most compatible way to delayeds3. + def to_file(style = default_style, store_id = nil) + style_key = key(style) + return download_from_fog(store_id, style_key) if store_id + result = super(style) || download_from_fog(:cache, style_key) + return result if result + # Download by URL only if file is synced to main store. Similar to delayeds3. + return unless synced_to?(self.class.main_store_id) + if self.class.download_by_url + uri = URI(URI.encode(storage_url(style))) + response = Net::HTTP.get_response(uri) + create_tempfile(response.body) if response.is_a?(Net::HTTPOK) + else + download_from_fog(self.class.main_store_id, style_key) + end + end + + def path(*) + raise '#path is not available for this type of storage, use #to_file instead' + end + + # Checking only cache for backward compatibility with deleayeds3 + def exists?(style = default_style, store_id = :cache) + !self.class.directory_for(store_id).files.head(key(style)).nil? + end + + def flush_writes #:nodoc: + return if queued_for_write.empty? + write_to_directory(:cache, queued_for_write) + unless delay_processing? && dirty? + self.class.store_ids.each { |store_id| enqueue_sync_job(store_id) } + end + queued_for_write.clear + end + + # Important: It does not delete files from permanent stores. + def flush_deletes #:nodoc: + # если мы картинку заливали в облака, значит мы скорее всего ее уже удалили + # и можно не нагружать хранилище проверками + clear_directory(:cache, queued_for_delete) unless all_synced? + queued_for_delete.clear + end + + # Enqueues all pending jobs. First, jobs are placed to internal queue in flush_writes + # (in after_save) and this method pushes them for execution (in after_commit). + def flush_jobs + queued_jobs&.each(&:call).clear + end + + # Writes files from cache to permanent store. + def sync_to(store_id) + synced_field_name = self.class.synced_field_name(store_id) + return unless instance.respond_to?(synced_field_name) + return true if instance.public_send(synced_field_name) + files = self.class.all_styles.each_with_object({}) do |style, result| + file = to_file(style, :cache) + # For easier monitoring + unless file + raise "Missing cached files for #{instance.class.name}:#{instance.id}:#{style}" + end + result[style] = file + end + write_to_directory(store_id, files) + # ignore deleted objects and skip callbacks + if instance.class.unscoped.where(id: instance.id).update_all(synced_field_name => true) == 1 + instance.touch + instance[synced_field_name] = true + end + end + + def clear_cache + clear_directory(:cache) if all_synced? + end + + private + + def synced_to?(store_id) + instance.try(self.class.synced_field_name(store_id)) + end + + def all_synced? + self.class.store_ids.all? do |store_id| + synced_field_name = self.class.synced_field_name(store_id) + !instance.respond_to?(synced_field_name) || instance[synced_field_name] + end + end + + attr_reader :queued_jobs + + def enqueue_sync_job(store_id) + synced_field_name = self.class.synced_field_name(store_id) + return unless instance.respond_to?(synced_field_name) + instance.update_column(synced_field_name, false) if instance[synced_field_name] + queued_jobs.push -> { + UploadWorker.perform_async(instance.class.name, instance.id, name, store_id) + } + end + + def download_from_fog(store_id, key) + body = self.class.directory_for(store_id).files.get(key)&.body + create_tempfile(body) if body + end + + def write_to_directory(store_id, files) + directory = self.class.directory_for(store_id) + common_options = { + content_type: instance_read(:content_type), + cache_control: "max-age=#{10.years.to_i}", + } + files.each do |style, file| + path = key(style) + log "Saving to #{store_id}:#{path}" + directory.files.create( + key: path, + public: true, + body: file, + **common_options, + ) + end + end + + def clear_directory(store_id, styles = self.class.all_styles) + directory = self.class.directory_for(store_id) + styles.each do |style| + path = key(style) + log("Deleting #{store_id}:#{path}") + directory.files.head(path)&.destroy + end + end + end + end +end diff --git a/test/storage/cached_test.rb b/test/storage/cached_test.rb new file mode 100644 index 0000000..ad52c27 --- /dev/null +++ b/test/storage/cached_test.rb @@ -0,0 +1,80 @@ +require 'test_helper' +require 'fog/local' +require 'sidekiq' +require 'sidekiq/testing' + +require 'delayed_paperclip' +DelayedPaperclip::Railtie.insert + +class FakeModel + attr_accessor :synced_to_store_1, :synced_to_store_2 +end + +class CachedStorageTest < Test::Unit::TestCase + TEST_ROOT = Pathname(__dir__).join('test') + + def fog_directory(suffix) + Fog::Storage.new(provider: 'Local', local_root: TEST_ROOT.join(suffix.to_s)) + .directories.new(key: '', public: true) + end + + def stub_file(name, content) + StringIO.new(content).tap { |x| x.stubs(:original_filename).returns(name) } + end + + setup do + rebuild_model( + storage: :cached, + key: ':filename', + url: { + cache: 'http://cache.local/:key', + store: 'http://store.local/:key' + }, + cache: fog_directory(:cache), + stores: { + store_1: fog_directory(:store_1), + store_2: fog_directory(:store_2), + } + ) + modify_table(:dummies) do |t| + t.boolean :avatar_synced_to_store_1, null: false, default: false + t.boolean :avatar_synced_to_store_2, null: false, default: false + end + @instance = Dummy.create + end + + teardown { TEST_ROOT.rmtree if TEST_ROOT.exist? } + + context 'assigning file' do + setup { Sidekiq::Testing.fake! } + + should 'write to cache and enqueue jobs' do + @instance.update!(avatar: stub_file('test.txt', 'qwe')) + @instance.reload + attachment = @instance.avatar + key = attachment.key + assert_equal true, attachment.exists? + assert_equal false, attachment.class.directory_for(:cache).files.head(key).nil? + assert_equal true, attachment.class.directory_for(:store_1).files.head(key).nil? + assert_equal true, attachment.class.directory_for(:store_2).files.head(key).nil? + assert_equal 'http://cache.local/test.txt', attachment.url(:original, false) + end + + context 'with inline jobs' do + setup { Sidekiq::Testing.inline! } + teardown { Sidekiq::Testing.fake! } + + should 'write to permanent stores and crear cache' do + @instance.update!(avatar: stub_file('test.txt', 'qwe')) + @instance.reload + attachment = @instance.avatar + key = attachment.key + assert_equal false, attachment.exists? + assert_equal true, attachment.class.directory_for(:cache).files.head(key).nil? + assert_equal false, attachment.class.directory_for(:store_1).files.head(key).nil? + assert_equal false, attachment.class.directory_for(:store_2).files.head(key).nil? + assert_equal 'http://store.local/test.txt', attachment.url(:original, false) + end + end + end +end