diff --git a/.gitignore b/.gitignore index c75412598..c0f9f219b 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,9 @@ ehthumbs.db # Ignore temporary files public/system/* +# Ignore ActiveStorage Disc service storage directory +storage/ + # Ignore robots.txt public/robots.txt diff --git a/Dockerfile b/Dockerfile index a653a486e..4b13d676d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ruby:2.5.5 +FROM ruby:2.6.3 MAINTAINER BioSistemika # Get version of Debian (lsb_release substitute) and save it to /tmp/lsb_release for further commands diff --git a/Dockerfile.production b/Dockerfile.production index ca922f441..533c1e64f 100644 --- a/Dockerfile.production +++ b/Dockerfile.production @@ -1,4 +1,4 @@ -FROM ruby:2.5.5 +FROM ruby:2.6.3 MAINTAINER BioSistemika RUN echo deb "http://http.debian.net/debian stretch-backports main" >> /etc/apt/sources.list diff --git a/Gemfile b/Gemfile index 4b7c719bd..4946767cd 100644 --- a/Gemfile +++ b/Gemfile @@ -2,7 +2,7 @@ source 'http://rubygems.org' -ruby '2.5.5' +ruby '2.6.3' gem 'bootsnap', require: false @@ -80,7 +80,9 @@ gem 'underscore-rails' gem 'wicked_pdf', '~> 1.1.0' gem 'wkhtmltopdf-heroku' +gem 'aws-sdk-rails' gem 'aws-sdk-s3' +gem 'mini_magick' gem 'paperclip', '~> 6.1' # File attachment, image attachment library gem 'delayed_job_active_record' gem 'devise-async', diff --git a/Gemfile.lock b/Gemfile.lock index ac5cda6f2..e2e5e6687 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -115,10 +115,16 @@ GEM aws-sdk-kms (1.21.0) aws-sdk-core (~> 3, >= 3.53.0) aws-sigv4 (~> 1.1) + aws-sdk-rails (2.1.0) + aws-sdk-ses (~> 1) + railties (>= 3) aws-sdk-s3 (1.42.0) aws-sdk-core (~> 3, >= 3.53.0) aws-sdk-kms (~> 1) aws-sigv4 (~> 1.1) + aws-sdk-ses (1.22.0) + aws-sdk-core (~> 3, >= 3.53.0) + aws-sigv4 (~> 1.1) aws-sigv4 (1.1.0) aws-eventstream (~> 1.0, >= 1.0.2) backports (3.15.0) @@ -310,6 +316,7 @@ GEM method_source (0.9.2) mime-types (1.25.1) mimemagic (0.3.3) + mini_magick (4.9.3) mini_mime (1.0.1) mini_portile2 (2.3.0) minitest (5.11.3) @@ -567,6 +574,7 @@ DEPENDENCIES auto_strip_attributes (~> 2.1) autosize-rails awesome_print + aws-sdk-rails aws-sdk-s3 base62 bcrypt (~> 3.1.10) @@ -610,6 +618,7 @@ DEPENDENCIES kaminari listen (~> 3.0) logging (~> 2.0.0) + mini_magick momentjs-rails (~> 2.17.1) nested_form_fields newrelic_rpm @@ -664,7 +673,7 @@ DEPENDENCIES yomu RUBY VERSION - ruby 2.5.5p157 + ruby 2.6.3p62 BUNDLED WITH 1.17.3 diff --git a/app/models/asset.rb b/app/models/asset.rb index 6d7b332e6..ff78b324e 100644 --- a/app/models/asset.rb +++ b/app/models/asset.rb @@ -6,48 +6,51 @@ class Asset < ApplicationRecord require 'tempfile' # Lock duration set to 30 minutes - LOCK_DURATION = 60*30 + LOCK_DURATION = 60 * 30 + + # ActiveStorage configuration + has_one_attached :file # Paperclip validation - has_attached_file :file, - styles: lambda { |a| - if a.previewable_document? - { - large: { processors: [:custom_file_preview], - geometry: Constants::LARGE_PIC_FORMAT, - format: :jpg }, - medium: { processors: [:custom_file_preview], - geometry: Constants::MEDIUM_PIC_FORMAT, - format: :jpg } - } - else - { - large: [Constants::LARGE_PIC_FORMAT, :jpg], - medium: [Constants::MEDIUM_PIC_FORMAT, :jpg] - } - end - }, - convert_options: { - medium: '-quality 70 -strip', - all: '-background "#d2d2d2" -flatten +matte' - } + # has_attached_file :file, + # styles: lambda { |a| + # if a.previewable_document? + # { + # large: { processors: [:custom_file_preview], + # geometry: Constants::LARGE_PIC_FORMAT, + # format: :jpg }, + # medium: { processors: [:custom_file_preview], + # geometry: Constants::MEDIUM_PIC_FORMAT, + # format: :jpg } + # } + # else + # { + # large: [Constants::LARGE_PIC_FORMAT, :jpg], + # medium: [Constants::MEDIUM_PIC_FORMAT, :jpg] + # } + # end + # }, + # convert_options: { + # medium: '-quality 70 -strip', + # all: '-background "#d2d2d2" -flatten +matte' + # } - before_post_process :previewable? - before_post_process :extract_image_quality + # before_post_process :previewable? + # before_post_process :extract_image_quality # adds image processing in background job - process_in_background :file, processing_image_url: '/images/:style/processing.gif' + # process_in_background :file, processing_image_url: '/images/:style/processing.gif' - validates_attachment :file, - presence: true, - size: { - less_than: Rails.configuration.x.file_max_size_mb.megabytes - } - validates :estimated_size, presence: true - validates :file_present, inclusion: { in: [true, false] } + # validates_attachment :file, + # presence: true, + # size: { + # less_than: Rails.configuration.x.file_max_size_mb.megabytes + # } + # validates :estimated_size, presence: true + # validates :file_present, inclusion: { in: [true, false] } # Should be checked for any security leaks - do_not_validate_attachment_file_type :file + # do_not_validate_attachment_file_type :file # Asset validation # This could cause some problems if you create empty asset and want to @@ -200,6 +203,18 @@ class Asset < ApplicationRecord end end + def medium_preview + file.variant(resize: Constants::MEDIUM_PIC_FORMAT) + end + + def large_preview + file.variant(resize: Constants::LARGE_PIC_FORMAT) + end + + def file_size + file.blob.byte_size + end + def extract_image_quality return unless ['image/jpeg', 'image/pjpeg'].include? file_content_type diff --git a/config/storage.yml b/config/storage.yml index d32f76e8f..fa3150876 100644 --- a/config/storage.yml +++ b/config/storage.yml @@ -6,13 +6,13 @@ local: service: Disk root: <%= Rails.root.join("storage") %> -# Use rails credentials:edit to set the AWS secrets (as aws:access_key_id|secret_access_key) -# amazon: -# service: S3 -# access_key_id: <%= Rails.application.credentials.dig(:aws, :access_key_id) %> -# secret_access_key: <%= Rails.application.credentials.dig(:aws, :secret_access_key) %> -# region: us-east-1 -# bucket: your_own_bucket +amazon: + service: CustomS3 + access_key_id: <%= ENV["AWS_ACCESS_KEY_ID"] %> + secret_access_key: <%= ENV["AWS_SECRET_ACCESS_KEY"] %> + region: <%= ENV["AWS_REGION"] %> + bucket: <%= ENV["S3_BUCKET"] %> + subfolder: <%= ENV["S3_SUBFOLDER"] %> # Remember not to checkin your GCS keyfile to a repository # google: diff --git a/db/migrate/20190613134100_convert_to_active_storage.rb b/db/migrate/20190613134100_convert_to_active_storage.rb new file mode 100644 index 000000000..ea9de35b3 --- /dev/null +++ b/db/migrate/20190613134100_convert_to_active_storage.rb @@ -0,0 +1,117 @@ +# frozen_string_literal: true + +class ConvertToActiveStorage < ActiveRecord::Migration[5.2] + require 'open-uri' + + ID_PARTITION_LIMIT = 1_000_000_000 + DIGEST = OpenSSL::Digest.const_get('SHA1').new + + def up + ActiveRecord::Base.connection.raw_connection.prepare('active_storage_blob_statement', <<-SQL) + INSERT INTO active_storage_blobs ( + key, filename, content_type, metadata, byte_size, checksum, created_at + ) VALUES ($1, $2, $3, '{}', $4, $5, $6) + RETURNING id; + SQL + + ActiveRecord::Base.connection.raw_connection.prepare('active_storage_attachment_statement', <<-SQL) + INSERT INTO active_storage_attachments ( + name, record_type, record_id, blob_id, created_at + ) VALUES ($1, $2, $3, $4, $5) + SQL + + Rails.application.eager_load! + models = ApplicationRecord.descendants.reject(&:abstract_class?) + + transaction do + models.each do |model| + attachments = model.column_names.map do |c| + $1 if c =~ /(.+)_file_name$/ + end.compact + + next if attachments.blank? + + model.find_each.each do |instance| + attachments.each do |attachment| + next if instance.__send__("#{attachment}_file_name").blank? + + res = ActiveRecord::Base.connection.raw_connection.exec_prepared( + 'active_storage_blob_statement', [ + key(instance, attachment), + instance.__send__("#{attachment}_file_name"), + instance.__send__("#{attachment}_content_type"), + instance.__send__("#{attachment}_file_size") || 0, + checksum(instance.__send__(attachment)), + instance.updated_at.iso8601 + ] + ) + + ActiveRecord::Base.connection.raw_connection.exec_prepared( + 'active_storage_attachment_statement', [ + attachment, + model.name, + instance.id, + res[0]['id'], + instance.updated_at.iso8601 + ] + ) + end + end + end + end + end + + def down + # raise ActiveRecord::IrreversibleMigration + end + + private + + ID_PARTITION_LIMIT = 1_000_000_000 + DIGEST = OpenSSL::Digest.const_get('SHA1').new + + def id_partition(id) + if id < ID_PARTITION_LIMIT + format('%09d', id).scan(/\d{3}/).join('/') + else + format('%012d', id).scan(/\d{3}/).join('/') + end + end + + def hash_data(instance, attachment) + "#{instance.class.to_s.underscore.pluralize}/#{attachment.pluralize}/#{instance.id}/original" + end + + def interpolate(pattern, instance, attachment) + path = pattern + path = path.gsub(':class', instance.class.to_s.underscore.pluralize) + path = path.gsub(':attachment', attachment.pluralize) + path = path.gsub(':id_partition', id_partition(instance.id)) + path = path.gsub(':hash', OpenSSL::HMAC.hexdigest(DIGEST, + ENV['PAPERCLIP_HASH_SECRET'], + hash_data(instance, attachment))) + path.gsub(':filename', instance.__send__("#{attachment}_file_name")) + end + + def key(instance, attachment) + # SecureRandom.uuid + # Alternatively: + pattern = if ENV['PAPERCLIP_STORAGE'] == 's3' + ':class/:attachment/:id_partition/:hash/original/:filename' + else + "#{Rails.root}/public/system/:class/:attachment/:id_partition/:hash/original/:filename" + end + interpolate(pattern, instance, attachment) + end + + def checksum(_attachment) + 'dummy' + # local files stored on disk: + # url = attachment.path + # Digest::MD5.base64digest(File.read(url)) + + # remote files stored on another person's computer: + # url = attachment.url + # Digest::MD5.base64digest(Net::HTTP.get(URI(url))) + end +end diff --git a/db/schema.rb b/db/schema.rb index d2216eae0..c69194b9d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_06_13_094834) do +ActiveRecord::Schema.define(version: 2019_06_13_134100) do # These are extensions that must be enabled in order to support this database enable_extension "btree_gist" diff --git a/lib/active_storage/service/custom_s3_service.rb b/lib/active_storage/service/custom_s3_service.rb new file mode 100644 index 000000000..7847284b7 --- /dev/null +++ b/lib/active_storage/service/custom_s3_service.rb @@ -0,0 +1,172 @@ +# frozen_string_literal: true + +# Copyright (c) 2017-2019 David Heinemeier Hansson, Basecamp +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +gem 'aws-sdk-s3', '~> 1.14' + +require 'aws-sdk-s3' +require 'active_support/core_ext/numeric/bytes' + +module ActiveStorage + # Wraps the Amazon Simple Storage Service (S3) as an Active Storage service. + # See ActiveStorage::Service for the generic API documentation that applies to all services. + class Service::CustomS3Service < Service + attr_reader :client, :bucket + attr_reader :multipart_upload_threshold, :upload_options + attr_reader :subfolder + + def initialize(bucket:, upload: {}, **options) + @subfolder = options.delete(:subfolder) + + @client = Aws::S3::Resource.new(**options) + @bucket = @client.bucket(bucket) + + @multipart_upload_threshold = upload.fetch(:multipart_threshold, 100.megabytes) + @upload_options = upload + end + + def upload(key, io, checksum: nil, content_type: nil, **) + instrument :upload, key: key, checksum: checksum do + if io.size < multipart_upload_threshold + upload_with_single_part key, io, checksum: checksum, content_type: content_type + else + upload_with_multipart key, io, content_type: content_type + end + end + end + + def download(key, &block) + if block_given? + instrument :streaming_download, key: key do + stream(key, &block) + end + else + instrument :download, key: key do + object_for(key).get.body.string.force_encoding(Encoding::BINARY) + rescue Aws::S3::Errors::NoSuchKey + raise ActiveStorage::FileNotFoundError + end + end + end + + def download_chunk(key, range) + instrument :download_chunk, key: key, range: range do + object_for(key).get(range: "bytes=#{range.begin}-#{range.exclude_end? ? range.end - 1 : range.end}") + .body + .string + .force_encoding(Encoding::BINARY) + rescue Aws::S3::Errors::NoSuchKey + raise ActiveStorage::FileNotFoundError + end + end + + def delete(key) + instrument :delete, key: key do + object_for(key).delete + end + end + + def delete_prefixed(prefix) + instrument :delete_prefixed, prefix: prefix do + prefix = subfolder.present? ? File.join(subfolder, prefix) : prefix + bucket.objects(prefix: prefix).batch_delete! + end + end + + def exist?(key) + instrument :exist, key: key do |payload| + answer = object_for(key).exists? + payload[:exist] = answer + answer + end + end + + def url(key, expires_in:, filename:, disposition:, content_type:) + instrument :url, key: key do |payload| + generated_url = object_for(key).presigned_url :get, expires_in: expires_in.to_i, + response_content_disposition: content_disposition_with(type: disposition, filename: filename), + response_content_type: content_type + + payload[:url] = generated_url + + generated_url + end + end + + def url_for_direct_upload(key, expires_in:, content_type:, content_length:, checksum:) + instrument :url, key: key do |payload| + generated_url = object_for(key).presigned_url :put, expires_in: expires_in.to_i, + content_type: content_type, content_length: content_length, content_md5: checksum + + payload[:url] = generated_url + + generated_url + end + end + + def headers_for_direct_upload(_key, content_type:, checksum:, **) + { 'Content-Type' => content_type, 'Content-MD5' => checksum } + end + + private + + MAXIMUM_UPLOAD_PARTS_COUNT = 10000 + MINIMUM_UPLOAD_PART_SIZE = 5.megabytes + + def upload_with_single_part(key, io, checksum: nil, content_type: nil) + object_for(key).put(body: io, content_md5: checksum, content_type: content_type, **upload_options) + rescue Aws::S3::Errors::BadDigest + raise ActiveStorage::IntegrityError + end + + def upload_with_multipart(key, io, content_type: nil) + part_size = [io.size.fdiv(MAXIMUM_UPLOAD_PARTS_COUNT).ceil, MINIMUM_UPLOAD_PART_SIZE].max + + object_for(key).upload_stream(content_type: content_type, part_size: part_size, **upload_options) do |out| + IO.copy_stream(io, out) + end + end + + def object_for(key) + key = subfolder.present? ? File.join(subfolder, key) : key + bucket.object(key) + end + + # Reads the object for the given key in chunks, yielding each to the block. + def stream(key) + object = object_for(key) + + chunk_size = 5.megabytes + offset = 0 + + raise ActiveStorage::FileNotFoundError unless object.exists? + + while offset < object.content_length + yield object.get(range: "bytes=#{offset}-#{offset + chunk_size - 1}") + .body + .string + .force_encoding(Encoding::BINARY) + offset += chunk_size + end + end + end +end diff --git a/lib/tasks/active_storage.rake b/lib/tasks/active_storage.rake new file mode 100644 index 000000000..4c7292076 --- /dev/null +++ b/lib/tasks/active_storage.rake @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +namespace :active_storage do + ID_PARTITION_LIMIT = 1_000_000_000 + DIGEST = OpenSSL::Digest.const_get('SHA1').new + + def id_partition(id) + if id < ID_PARTITION_LIMIT + format('%09d', id).scan(/\d{3}/).join('/') + else + format('%012d', id).scan(/\d{3}/).join('/') + end + end + + def hash_data(attachment) + "#{attachment.record_type.underscore.pluralize}/#{attachment.name.pluralize}/#{attachment.record.id}/original" + end + + def interpolate(pattern, attachment) + path = pattern + path = path.gsub(':class', attachment.record_type.underscore.pluralize) + path = path.gsub(':attachment', attachment.name.pluralize) + path = path.gsub(':id_partition', id_partition(attachment.record.id)) + path = path.gsub(':hash', OpenSSL::HMAC.hexdigest(DIGEST, ENV['PAPERCLIP_HASH_SECRET'], hash_data(attachment))) + path.gsub(':filename', attachment.blob.filename.to_s) + end + + desc 'Copy all files from Paperclip to ActiveStorage' + task :migrate_files, [:before] => :environment do |_, _args| + if ENV['PAPERCLIP_STORAGE'] == 'filesystem' + local_path = "#{Rails.root}/public/system/:class/:attachment/:id_partition/:hash/original/:filename" + + ActiveStorage::Attachment.find_each do |attachment| + src = interpolate(local_path, attachment) + dst_dir = File.join( + 'storage', + attachment.blob.key.first(2), + attachment.blob.key.first(4).last(2) + ) + dst = File.join(dst_dir, attachment.blob.key) + + FileUtils.mkdir_p(dst_dir) + puts "Copying #{src} to #{dst}" + FileUtils.cp(src, dst) + end + elsif ENV['PAPERCLIP_STORAGE'] == 's3' + + s3_path = ':class/:attachment/:id_partition/:hash/original/:filename' + s3_path = "#{ENV['S3_SUBFOLDER']}/" + s3_path if ENV['S3_SUBFOLDER'] + + ActiveStorage::Attachment.find_each do |attachment| + src_path = interpolate(s3_path, attachment) + + next unless S3_BUCKET.object(src_path).exists? + + dst_path = ENV['S3_SUBFOLDER'] ? File.join(ENV['S3_SUBFOLDER'], attachment.blob.key) : attachment.blob.key + + puts "Copying #{src_path} to #{dst_path}" + + s3.copy_object(bucket: S3_BUCKET.name, + copy_source: S3_BUCKET.name + src_path, + key: dst_path) + rescue StandardError => e + puts 'Caught exception copying object ' + src_path + ':' + puts e.message + end + end + end +end