Optimize memory usage in samples/repositories import [SCI-1665]

This commit is contained in:
Oleksii Kriuchykhin 2017-10-12 14:43:25 +02:00
parent 03743a21f6
commit 56f52ebfa7
8 changed files with 213 additions and 165 deletions

View file

@ -47,6 +47,7 @@ gem 'commit_param_routing' # Enables different submit actions in the same form t
gem 'kaminari'
gem "i18n-js", ">= 3.0.0.rc11" # Localization in javascript files
gem 'roo', '~> 2.7.1' # Spreadsheet parser
gem 'creek'
gem 'wicked_pdf'
gem 'silencer' # Silence certain Rails logs
gem 'wkhtmltopdf-heroku'
@ -72,7 +73,6 @@ gem 'ruby-graphviz', '~> 1.2' # Graphviz for rails
gem 'tinymce-rails', '~> 4.5.7' # Rich text editor
gem 'base62' # Used for smart annotations
gem 'newrelic_rpm'
group :development, :test do
gem 'byebug'
@ -85,6 +85,7 @@ group :development, :test do
end
group :production do
gem 'newrelic_rpm'
gem 'puma'
gem 'rails_12factor'
end

View file

@ -115,6 +115,9 @@ GEM
commit_param_routing (0.0.1)
concurrent-ruby (1.0.0)
crass (1.0.2)
creek (1.1.2)
nokogiri (~> 1.6.0)
rubyzip (>= 1.0.0)
debug_inspector (0.0.2)
deface (1.0.2)
colorize (>= 0.5.8)
@ -362,6 +365,7 @@ DEPENDENCIES
bootstrap_form
byebug
commit_param_routing
creek
deface (~> 1.0)
delayed_job_active_record
delayed_paperclip!

View file

@ -198,12 +198,15 @@ class RepositoriesController < ApplicationController
if parsed_file.too_large?
repository_response(t('general.file.size_exceeded',
file_size: Constants::FILE_MAX_SIZE_MB))
elsif parsed_file.empty?
flash[:notice] = t('teams.parse_sheet.errors.empty_file')
redirect_to back and return
else
@import_data = parsed_file.data
if parsed_file.generated_temp_file?
unless @import_data.header.any? && @import_data.columns.any?
return repository_response(t('teams.parse_sheet.errors.empty_file'))
end
@temp_file = parsed_file.generate_temp_file
if @temp_file
respond_to do |format|
format.json do
render json: {

View file

@ -7,106 +7,77 @@ class TeamsController < ApplicationController
def parse_sheet
session[:return_to] ||= request.referer
respond_to do |format|
if params[:file]
begin
unless params[:file]
return parse_sheet_error(t('teams.parse_sheet.errors.no_file_selected'))
end
if params[:file].size > Constants::FILE_MAX_SIZE_MB.megabytes
error = t('general.file.size_exceeded',
file_size: Constants::FILE_MAX_SIZE_MB)
return parse_sheet_error(error)
end
if params[:file].size > Constants::FILE_MAX_SIZE_MB.megabytes
error = t 'general.file.size_exceeded',
file_size: Constants::FILE_MAX_SIZE_MB
format.html {
flash[:alert] = error
redirect_to session.delete(:return_to)
}
format.json {
render json: {message: error},
status: :unprocessable_entity
}
else
sheet = Team.open_spreadsheet(params[:file])
# Check if we actually have any rows (last_row > 1)
if sheet.last_row.between?(0, 1)
flash[:notice] = t(
"teams.parse_sheet.errors.empty_file")
redirect_to session.delete(:return_to) and return
end
# Get data (it will trigger any errors as well)
@header = sheet.row(1)
@columns = sheet.row(2)
# Fill in fields for dropdown
@available_fields = @team.get_available_sample_fields
# Truncate long fields
@available_fields.update(@available_fields) do |_k, v|
v.truncate(Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
end
# Save file for next step (importing)
@temp_file = TempFile.new(
session_id: session.id,
file: params[:file]
)
if @temp_file.save
@temp_file.destroy_obsolete
# format.html
format.json {
render :json => {
:html => render_to_string({
:partial => "samples/parse_samples_modal.html.erb"
})
}
}
else
error = t("teams.parse_sheet.errors.temp_file_failure")
format.html {
flash[:alert] = error
redirect_to session.delete(:return_to)
}
format.json {
render json: {message: error},
status: :unprocessable_entity
}
end
end
rescue ArgumentError, CSV::MalformedCSVError
error = t('teams.parse_sheet.errors.invalid_file',
encoding: ''.encoding)
format.html {
flash[:alert] = error
redirect_to session.delete(:return_to)
}
format.json {
render json: {message: error},
status: :unprocessable_entity
}
rescue TypeError
error = t("teams.parse_sheet.errors.invalid_extension")
format.html {
flash[:alert] = error
redirect_to session.delete(:return_to)
}
format.json {
render json: {message: error},
status: :unprocessable_entity
}
begin
sheet = Team.open_spreadsheet(params[:file])
# Get data (it will trigger any errors as well)
if sheet.is_a?(Roo::CSV)
@header = sheet.row(1)
@columns = sheet.row(2)
elsif sheet.is_a?(Roo::Excelx)
i = 1
sheet.each_row_streaming do |row|
@header = row.map(&:cell_value) if i == 1
@columns = row.map(&:cell_value) if i == 2
i += 1
break if i > 2
end
else
error = t("teams.parse_sheet.errors.no_file_selected")
format.html {
flash[:alert] = error
session[:return_to] ||= request.referer
redirect_to session.delete(:return_to)
}
format.json {
render json: {message: error},
status: :unprocessable_entity
}
i = 1
sheet.rows.each do |row|
@header = row.values if i == 1
@columns = row.values if i == 2
i += 1
break if i > 2
end
end
unless @header && @header.any? && @columns && @columns.any?
return parse_sheet_error(t('teams.parse_sheet.errors.empty_file'))
end
# Fill in fields for dropdown
@available_fields = @team.get_available_sample_fields
# Truncate long fields
@available_fields.update(@available_fields) do |_k, v|
v.truncate(Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
end
# Save file for next step (importing)
@temp_file = TempFile.new(
session_id: session.id,
file: params[:file]
)
if @temp_file.save
@temp_file.destroy_obsolete
respond_to do |format|
format.json do
render json: {
html: render_to_string(
partial: 'samples/parse_samples_modal.html.erb'
)
}
end
end
else
return parse_sheet_error(
t('teams.parse_sheet.errors.temp_file_failure')
)
end
rescue ArgumentError, CSV::MalformedCSVError
return parse_sheet_error(t('teams.parse_sheet.errors.invalid_file',
encoding: ''.encoding))
rescue TypeError
return parse_sheet_error(t('teams.parse_sheet.errors.invalid_extension'))
end
end
@ -275,6 +246,20 @@ class TeamsController < ApplicationController
private
def parse_sheet_error(error)
respond_to do |format|
format.html do
flash[:alert] = error
session[:return_to] ||= request.referer
redirect_to session.delete(:return_to)
end
format.json do
render json: { message: error },
status: :unprocessable_entity
end
end
end
def load_vars
@team = Team.find_by_id(params[:id])

View file

@ -114,6 +114,7 @@ class Repository < ActiveRecord::Base
name_index = -1
total_nr = 0
nr_of_added = 0
header_skipped = false
mappings.each.with_index do |(_k, value), index|
if value == '-1'
@ -130,54 +131,69 @@ class Repository < ActiveRecord::Base
unless col_compact.map(&:id).uniq.length == col_compact.length
return { status: :error, nr_of_added: nr_of_added, total_nr: total_nr }
end
rows = if sheet.is_a?(Roo::CSV)
sheet
elsif sheet.is_a?(Roo::Excelx)
sheet.each_row_streaming
else
sheet.rows
end
# Now we can iterate through record data and save stuff into db
transaction do
(2..sheet.last_row).each do |i|
total_nr += 1
record_row = RepositoryRow.new(name: sheet.row(i)[name_index],
repository: self,
created_by: user,
last_modified_by: user)
record_row.transaction(requires_new: true) do
unless record_row.save
errors = true
raise ActiveRecord::Rollback
end
rows.each do |row|
# Skip empty rows
next if row.empty?
unless header_skipped
header_skipped = true
next
end
total_nr += 1
row_cell_values = []
# Creek XLSX parser returns Hash of the row, Roo - Array
row = row.is_a?(Hash) ? row.values.map(&:to_s) : row.map(&:to_s)
sheet.row(i).each.with_index do |value, index|
if columns[index] && value
cell_value = RepositoryTextValue.new(
data: value,
created_by: user,
last_modified_by: user,
repository_cell_attributes: {
repository_row: record_row,
repository_column: columns[index]
}
)
cell = RepositoryCell.new(repository_row: record_row,
repository_column: columns[index],
value: cell_value)
cell.skip_on_import = true
cell_value.repository_cell = cell
unless cell.valid? && cell_value.valid?
errors = true
raise ActiveRecord::Rollback
end
row_cell_values << cell_value
end
end
if RepositoryTextValue.import(row_cell_values,
recursive: true,
validate: false).failed_instances.any?
errors = true
raise ActiveRecord::Rollback
end
nr_of_added += 1
record_row = RepositoryRow.new(name: row[name_index],
repository: self,
created_by: user,
last_modified_by: user)
record_row.transaction do
unless record_row.save
errors = true
raise ActiveRecord::Rollback
end
row_cell_values = []
row.each.with_index do |value, index|
if columns[index] && value
cell_value = RepositoryTextValue.new(
data: value,
created_by: user,
last_modified_by: user,
repository_cell_attributes: {
repository_row: record_row,
repository_column: columns[index]
}
)
cell = RepositoryCell.new(repository_row: record_row,
repository_column: columns[index],
value: cell_value)
cell.skip_on_import = true
cell_value.repository_cell = cell
unless cell.valid? && cell_value.valid?
errors = true
raise ActiveRecord::Rollback
end
row_cell_values << cell_value
end
end
if RepositoryTextValue.import(row_cell_values,
recursive: true,
validate: false).failed_instances.any?
errors = true
raise ActiveRecord::Rollback
end
nr_of_added += 1
end
end
@ -199,7 +215,10 @@ class Repository < ActiveRecord::Base
# This assumption is based purely on biologist's habits
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
when '.xlsx'
Roo::Excelx.new(file_path)
# Roo Excel parcel was replaced with Creek, but it can be enabled back,
# just swap lines below. But only one can be enabled at the same time.
# Roo::Excelx.new(file_path)
Creek::Book.new(file_path).sheets[0]
else
raise TypeError
end

View file

@ -45,7 +45,10 @@ class Team < ActiveRecord::Base
# This assumption is based purely on biologist's habits
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
when '.xlsx' then
Roo::Excelx.new(file_path)
# Roo Excel parcel was replaced with Creek, but it can be enabled back,
# just swap lines below. But only one can be enabled at the same time.
# Roo::Excelx.new(file_path)
Creek::Book.new(file_path).sheets[0]
else
raise TypeError
end
@ -66,6 +69,7 @@ class Team < ActiveRecord::Base
errors = false
nr_of_added = 0
total_nr = 0
header_skipped = false
# First let's query for all custom_fields we're refering to
custom_fields = []
@ -91,10 +95,28 @@ class Team < ActiveRecord::Base
custom_fields << cf
end
end
rows = if sheet.is_a?(Roo::CSV)
sheet
elsif sheet.is_a?(Roo::Excelx)
sheet.each_row_streaming
else
sheet.rows
end
# Now we can iterate through sample data and save stuff into db
(2..sheet.last_row).each do |i|
rows.each do |row|
# Skip empty rows
next if row.empty?
unless header_skipped
header_skipped = true
next
end
total_nr += 1
sample = Sample.new(name: sheet.row(i)[sname_index],
# Creek XLSX parser returns Hash of the row, Roo - Array
row = row.is_a?(Hash) ? row.values.map(&:to_s) : row.map(&:to_s)
sample = Sample.new(name: row[sname_index],
team: self,
user: user)
@ -104,7 +126,7 @@ class Team < ActiveRecord::Base
raise ActiveRecord::Rollback
end
sheet.row(i).each.with_index do |value, index|
row.each.with_index do |value, index|
if index == stype_index
stype = SampleType.where(name: value.strip, team: self).take

View file

@ -10,43 +10,57 @@ module ImportRepository
def data
# Get data (it will trigger any errors as well)
header = @sheet.row(1)
columns = @sheet.row(2)
if @sheet.is_a?(Roo::CSV)
header = @sheet.row(1)
columns = @sheet.row(2)
elsif @sheet.is_a?(Roo::Excelx)
i = 1
@sheet.each_row_streaming do |row|
header = row.map(&:cell_value) if i == 1
columns = row.map(&:cell_value) if i == 2
i += 1
break if i > 2
end
else
i = 1
@sheet.rows.each do |row|
header = row.values if i == 1
columns = row.values if i == 2
i += 1
break if i > 2
end
end
# Fill in fields for dropdown
@repository.available_repository_fields.transform_values! do |name|
truncate(name, length: Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
end
@temp_file = TempFile.create(session_id: @session.id, file: @file)
header ||= []
columns ||= []
Data.new(header,
columns,
@repository.available_repository_fields,
@repository,
@temp_file)
@repository)
end
def too_large?
@file.size > Constants::FILE_MAX_SIZE_MB.megabytes
end
def empty?
@sheet.last_row.between?(0, 1)
end
def generated_temp_file?
def generate_temp_file
# Save file for next step (importing)
@temp_file = TempFile.new(
temp_file = TempFile.new(
session_id: @session.id,
file: @file
)
if @temp_file.save
@temp_file.destroy_obsolete
return true
if temp_file.save
temp_file.destroy_obsolete
return temp_file
end
end
Data = Struct.new(
:header, :columns, :available_fields, :repository, :temp_file
:header, :columns, :available_fields, :repository
)
end
end

View file

@ -58,7 +58,7 @@
</tbody>
</table>
</div>
<%= hidden_field_tag 'file_id', @import_data.temp_file.id %>
<%= hidden_field_tag 'file_id', @temp_file.id %>
<div id="import-errors-container">
</div>