mirror of
https://github.com/scinote-eln/scinote-web.git
synced 2025-09-06 05:04:35 +08:00
Optimize memory usage in samples/repositories import [SCI-1665]
This commit is contained in:
parent
03743a21f6
commit
56f52ebfa7
8 changed files with 213 additions and 165 deletions
3
Gemfile
3
Gemfile
|
@ -47,6 +47,7 @@ gem 'commit_param_routing' # Enables different submit actions in the same form t
|
|||
gem 'kaminari'
|
||||
gem "i18n-js", ">= 3.0.0.rc11" # Localization in javascript files
|
||||
gem 'roo', '~> 2.7.1' # Spreadsheet parser
|
||||
gem 'creek'
|
||||
gem 'wicked_pdf'
|
||||
gem 'silencer' # Silence certain Rails logs
|
||||
gem 'wkhtmltopdf-heroku'
|
||||
|
@ -72,7 +73,6 @@ gem 'ruby-graphviz', '~> 1.2' # Graphviz for rails
|
|||
gem 'tinymce-rails', '~> 4.5.7' # Rich text editor
|
||||
|
||||
gem 'base62' # Used for smart annotations
|
||||
gem 'newrelic_rpm'
|
||||
|
||||
group :development, :test do
|
||||
gem 'byebug'
|
||||
|
@ -85,6 +85,7 @@ group :development, :test do
|
|||
end
|
||||
|
||||
group :production do
|
||||
gem 'newrelic_rpm'
|
||||
gem 'puma'
|
||||
gem 'rails_12factor'
|
||||
end
|
||||
|
|
|
@ -115,6 +115,9 @@ GEM
|
|||
commit_param_routing (0.0.1)
|
||||
concurrent-ruby (1.0.0)
|
||||
crass (1.0.2)
|
||||
creek (1.1.2)
|
||||
nokogiri (~> 1.6.0)
|
||||
rubyzip (>= 1.0.0)
|
||||
debug_inspector (0.0.2)
|
||||
deface (1.0.2)
|
||||
colorize (>= 0.5.8)
|
||||
|
@ -362,6 +365,7 @@ DEPENDENCIES
|
|||
bootstrap_form
|
||||
byebug
|
||||
commit_param_routing
|
||||
creek
|
||||
deface (~> 1.0)
|
||||
delayed_job_active_record
|
||||
delayed_paperclip!
|
||||
|
|
|
@ -198,12 +198,15 @@ class RepositoriesController < ApplicationController
|
|||
if parsed_file.too_large?
|
||||
repository_response(t('general.file.size_exceeded',
|
||||
file_size: Constants::FILE_MAX_SIZE_MB))
|
||||
elsif parsed_file.empty?
|
||||
flash[:notice] = t('teams.parse_sheet.errors.empty_file')
|
||||
redirect_to back and return
|
||||
else
|
||||
@import_data = parsed_file.data
|
||||
if parsed_file.generated_temp_file?
|
||||
|
||||
unless @import_data.header.any? && @import_data.columns.any?
|
||||
return repository_response(t('teams.parse_sheet.errors.empty_file'))
|
||||
end
|
||||
|
||||
@temp_file = parsed_file.generate_temp_file
|
||||
if @temp_file
|
||||
respond_to do |format|
|
||||
format.json do
|
||||
render json: {
|
||||
|
|
|
@ -7,106 +7,77 @@ class TeamsController < ApplicationController
|
|||
def parse_sheet
|
||||
session[:return_to] ||= request.referer
|
||||
|
||||
respond_to do |format|
|
||||
if params[:file]
|
||||
begin
|
||||
unless params[:file]
|
||||
return parse_sheet_error(t('teams.parse_sheet.errors.no_file_selected'))
|
||||
end
|
||||
if params[:file].size > Constants::FILE_MAX_SIZE_MB.megabytes
|
||||
error = t('general.file.size_exceeded',
|
||||
file_size: Constants::FILE_MAX_SIZE_MB)
|
||||
return parse_sheet_error(error)
|
||||
end
|
||||
|
||||
if params[:file].size > Constants::FILE_MAX_SIZE_MB.megabytes
|
||||
error = t 'general.file.size_exceeded',
|
||||
file_size: Constants::FILE_MAX_SIZE_MB
|
||||
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
|
||||
else
|
||||
sheet = Team.open_spreadsheet(params[:file])
|
||||
|
||||
# Check if we actually have any rows (last_row > 1)
|
||||
if sheet.last_row.between?(0, 1)
|
||||
flash[:notice] = t(
|
||||
"teams.parse_sheet.errors.empty_file")
|
||||
redirect_to session.delete(:return_to) and return
|
||||
end
|
||||
|
||||
# Get data (it will trigger any errors as well)
|
||||
@header = sheet.row(1)
|
||||
@columns = sheet.row(2)
|
||||
|
||||
# Fill in fields for dropdown
|
||||
@available_fields = @team.get_available_sample_fields
|
||||
# Truncate long fields
|
||||
@available_fields.update(@available_fields) do |_k, v|
|
||||
v.truncate(Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
|
||||
end
|
||||
|
||||
# Save file for next step (importing)
|
||||
@temp_file = TempFile.new(
|
||||
session_id: session.id,
|
||||
file: params[:file]
|
||||
)
|
||||
|
||||
if @temp_file.save
|
||||
@temp_file.destroy_obsolete
|
||||
# format.html
|
||||
format.json {
|
||||
render :json => {
|
||||
:html => render_to_string({
|
||||
:partial => "samples/parse_samples_modal.html.erb"
|
||||
})
|
||||
}
|
||||
}
|
||||
else
|
||||
error = t("teams.parse_sheet.errors.temp_file_failure")
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
end
|
||||
end
|
||||
rescue ArgumentError, CSV::MalformedCSVError
|
||||
error = t('teams.parse_sheet.errors.invalid_file',
|
||||
encoding: ''.encoding)
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
rescue TypeError
|
||||
error = t("teams.parse_sheet.errors.invalid_extension")
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
begin
|
||||
sheet = Team.open_spreadsheet(params[:file])
|
||||
# Get data (it will trigger any errors as well)
|
||||
if sheet.is_a?(Roo::CSV)
|
||||
@header = sheet.row(1)
|
||||
@columns = sheet.row(2)
|
||||
elsif sheet.is_a?(Roo::Excelx)
|
||||
i = 1
|
||||
sheet.each_row_streaming do |row|
|
||||
@header = row.map(&:cell_value) if i == 1
|
||||
@columns = row.map(&:cell_value) if i == 2
|
||||
i += 1
|
||||
break if i > 2
|
||||
end
|
||||
else
|
||||
error = t("teams.parse_sheet.errors.no_file_selected")
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
session[:return_to] ||= request.referer
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
i = 1
|
||||
sheet.rows.each do |row|
|
||||
@header = row.values if i == 1
|
||||
@columns = row.values if i == 2
|
||||
i += 1
|
||||
break if i > 2
|
||||
end
|
||||
end
|
||||
|
||||
unless @header && @header.any? && @columns && @columns.any?
|
||||
return parse_sheet_error(t('teams.parse_sheet.errors.empty_file'))
|
||||
end
|
||||
|
||||
# Fill in fields for dropdown
|
||||
@available_fields = @team.get_available_sample_fields
|
||||
# Truncate long fields
|
||||
@available_fields.update(@available_fields) do |_k, v|
|
||||
v.truncate(Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
|
||||
end
|
||||
|
||||
# Save file for next step (importing)
|
||||
@temp_file = TempFile.new(
|
||||
session_id: session.id,
|
||||
file: params[:file]
|
||||
)
|
||||
|
||||
if @temp_file.save
|
||||
@temp_file.destroy_obsolete
|
||||
respond_to do |format|
|
||||
format.json do
|
||||
render json: {
|
||||
html: render_to_string(
|
||||
partial: 'samples/parse_samples_modal.html.erb'
|
||||
)
|
||||
}
|
||||
end
|
||||
end
|
||||
else
|
||||
return parse_sheet_error(
|
||||
t('teams.parse_sheet.errors.temp_file_failure')
|
||||
)
|
||||
end
|
||||
rescue ArgumentError, CSV::MalformedCSVError
|
||||
return parse_sheet_error(t('teams.parse_sheet.errors.invalid_file',
|
||||
encoding: ''.encoding))
|
||||
rescue TypeError
|
||||
return parse_sheet_error(t('teams.parse_sheet.errors.invalid_extension'))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -275,6 +246,20 @@ class TeamsController < ApplicationController
|
|||
|
||||
private
|
||||
|
||||
def parse_sheet_error(error)
|
||||
respond_to do |format|
|
||||
format.html do
|
||||
flash[:alert] = error
|
||||
session[:return_to] ||= request.referer
|
||||
redirect_to session.delete(:return_to)
|
||||
end
|
||||
format.json do
|
||||
render json: { message: error },
|
||||
status: :unprocessable_entity
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def load_vars
|
||||
@team = Team.find_by_id(params[:id])
|
||||
|
||||
|
|
|
@ -114,6 +114,7 @@ class Repository < ActiveRecord::Base
|
|||
name_index = -1
|
||||
total_nr = 0
|
||||
nr_of_added = 0
|
||||
header_skipped = false
|
||||
|
||||
mappings.each.with_index do |(_k, value), index|
|
||||
if value == '-1'
|
||||
|
@ -130,54 +131,69 @@ class Repository < ActiveRecord::Base
|
|||
unless col_compact.map(&:id).uniq.length == col_compact.length
|
||||
return { status: :error, nr_of_added: nr_of_added, total_nr: total_nr }
|
||||
end
|
||||
rows = if sheet.is_a?(Roo::CSV)
|
||||
sheet
|
||||
elsif sheet.is_a?(Roo::Excelx)
|
||||
sheet.each_row_streaming
|
||||
else
|
||||
sheet.rows
|
||||
end
|
||||
|
||||
# Now we can iterate through record data and save stuff into db
|
||||
transaction do
|
||||
(2..sheet.last_row).each do |i|
|
||||
total_nr += 1
|
||||
record_row = RepositoryRow.new(name: sheet.row(i)[name_index],
|
||||
repository: self,
|
||||
created_by: user,
|
||||
last_modified_by: user)
|
||||
record_row.transaction(requires_new: true) do
|
||||
unless record_row.save
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
rows.each do |row|
|
||||
# Skip empty rows
|
||||
next if row.empty?
|
||||
unless header_skipped
|
||||
header_skipped = true
|
||||
next
|
||||
end
|
||||
total_nr += 1
|
||||
|
||||
row_cell_values = []
|
||||
# Creek XLSX parser returns Hash of the row, Roo - Array
|
||||
row = row.is_a?(Hash) ? row.values.map(&:to_s) : row.map(&:to_s)
|
||||
|
||||
sheet.row(i).each.with_index do |value, index|
|
||||
if columns[index] && value
|
||||
cell_value = RepositoryTextValue.new(
|
||||
data: value,
|
||||
created_by: user,
|
||||
last_modified_by: user,
|
||||
repository_cell_attributes: {
|
||||
repository_row: record_row,
|
||||
repository_column: columns[index]
|
||||
}
|
||||
)
|
||||
cell = RepositoryCell.new(repository_row: record_row,
|
||||
repository_column: columns[index],
|
||||
value: cell_value)
|
||||
cell.skip_on_import = true
|
||||
cell_value.repository_cell = cell
|
||||
unless cell.valid? && cell_value.valid?
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
row_cell_values << cell_value
|
||||
end
|
||||
end
|
||||
if RepositoryTextValue.import(row_cell_values,
|
||||
recursive: true,
|
||||
validate: false).failed_instances.any?
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
nr_of_added += 1
|
||||
record_row = RepositoryRow.new(name: row[name_index],
|
||||
repository: self,
|
||||
created_by: user,
|
||||
last_modified_by: user)
|
||||
record_row.transaction do
|
||||
unless record_row.save
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
|
||||
row_cell_values = []
|
||||
|
||||
row.each.with_index do |value, index|
|
||||
if columns[index] && value
|
||||
cell_value = RepositoryTextValue.new(
|
||||
data: value,
|
||||
created_by: user,
|
||||
last_modified_by: user,
|
||||
repository_cell_attributes: {
|
||||
repository_row: record_row,
|
||||
repository_column: columns[index]
|
||||
}
|
||||
)
|
||||
cell = RepositoryCell.new(repository_row: record_row,
|
||||
repository_column: columns[index],
|
||||
value: cell_value)
|
||||
cell.skip_on_import = true
|
||||
cell_value.repository_cell = cell
|
||||
unless cell.valid? && cell_value.valid?
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
row_cell_values << cell_value
|
||||
end
|
||||
end
|
||||
if RepositoryTextValue.import(row_cell_values,
|
||||
recursive: true,
|
||||
validate: false).failed_instances.any?
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
nr_of_added += 1
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -199,7 +215,10 @@ class Repository < ActiveRecord::Base
|
|||
# This assumption is based purely on biologist's habits
|
||||
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
||||
when '.xlsx'
|
||||
Roo::Excelx.new(file_path)
|
||||
# Roo Excel parcel was replaced with Creek, but it can be enabled back,
|
||||
# just swap lines below. But only one can be enabled at the same time.
|
||||
# Roo::Excelx.new(file_path)
|
||||
Creek::Book.new(file_path).sheets[0]
|
||||
else
|
||||
raise TypeError
|
||||
end
|
||||
|
|
|
@ -45,7 +45,10 @@ class Team < ActiveRecord::Base
|
|||
# This assumption is based purely on biologist's habits
|
||||
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
||||
when '.xlsx' then
|
||||
Roo::Excelx.new(file_path)
|
||||
# Roo Excel parcel was replaced with Creek, but it can be enabled back,
|
||||
# just swap lines below. But only one can be enabled at the same time.
|
||||
# Roo::Excelx.new(file_path)
|
||||
Creek::Book.new(file_path).sheets[0]
|
||||
else
|
||||
raise TypeError
|
||||
end
|
||||
|
@ -66,6 +69,7 @@ class Team < ActiveRecord::Base
|
|||
errors = false
|
||||
nr_of_added = 0
|
||||
total_nr = 0
|
||||
header_skipped = false
|
||||
|
||||
# First let's query for all custom_fields we're refering to
|
||||
custom_fields = []
|
||||
|
@ -91,10 +95,28 @@ class Team < ActiveRecord::Base
|
|||
custom_fields << cf
|
||||
end
|
||||
end
|
||||
|
||||
rows = if sheet.is_a?(Roo::CSV)
|
||||
sheet
|
||||
elsif sheet.is_a?(Roo::Excelx)
|
||||
sheet.each_row_streaming
|
||||
else
|
||||
sheet.rows
|
||||
end
|
||||
|
||||
# Now we can iterate through sample data and save stuff into db
|
||||
(2..sheet.last_row).each do |i|
|
||||
rows.each do |row|
|
||||
# Skip empty rows
|
||||
next if row.empty?
|
||||
unless header_skipped
|
||||
header_skipped = true
|
||||
next
|
||||
end
|
||||
total_nr += 1
|
||||
sample = Sample.new(name: sheet.row(i)[sname_index],
|
||||
# Creek XLSX parser returns Hash of the row, Roo - Array
|
||||
row = row.is_a?(Hash) ? row.values.map(&:to_s) : row.map(&:to_s)
|
||||
|
||||
sample = Sample.new(name: row[sname_index],
|
||||
team: self,
|
||||
user: user)
|
||||
|
||||
|
@ -104,7 +126,7 @@ class Team < ActiveRecord::Base
|
|||
raise ActiveRecord::Rollback
|
||||
end
|
||||
|
||||
sheet.row(i).each.with_index do |value, index|
|
||||
row.each.with_index do |value, index|
|
||||
if index == stype_index
|
||||
stype = SampleType.where(name: value.strip, team: self).take
|
||||
|
||||
|
|
|
@ -10,43 +10,57 @@ module ImportRepository
|
|||
|
||||
def data
|
||||
# Get data (it will trigger any errors as well)
|
||||
header = @sheet.row(1)
|
||||
columns = @sheet.row(2)
|
||||
if @sheet.is_a?(Roo::CSV)
|
||||
header = @sheet.row(1)
|
||||
columns = @sheet.row(2)
|
||||
elsif @sheet.is_a?(Roo::Excelx)
|
||||
i = 1
|
||||
@sheet.each_row_streaming do |row|
|
||||
header = row.map(&:cell_value) if i == 1
|
||||
columns = row.map(&:cell_value) if i == 2
|
||||
i += 1
|
||||
break if i > 2
|
||||
end
|
||||
else
|
||||
i = 1
|
||||
@sheet.rows.each do |row|
|
||||
header = row.values if i == 1
|
||||
columns = row.values if i == 2
|
||||
i += 1
|
||||
break if i > 2
|
||||
end
|
||||
end
|
||||
# Fill in fields for dropdown
|
||||
@repository.available_repository_fields.transform_values! do |name|
|
||||
truncate(name, length: Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
|
||||
end
|
||||
@temp_file = TempFile.create(session_id: @session.id, file: @file)
|
||||
header ||= []
|
||||
columns ||= []
|
||||
Data.new(header,
|
||||
columns,
|
||||
@repository.available_repository_fields,
|
||||
@repository,
|
||||
@temp_file)
|
||||
@repository)
|
||||
end
|
||||
|
||||
def too_large?
|
||||
@file.size > Constants::FILE_MAX_SIZE_MB.megabytes
|
||||
end
|
||||
|
||||
def empty?
|
||||
@sheet.last_row.between?(0, 1)
|
||||
end
|
||||
|
||||
def generated_temp_file?
|
||||
def generate_temp_file
|
||||
# Save file for next step (importing)
|
||||
@temp_file = TempFile.new(
|
||||
temp_file = TempFile.new(
|
||||
session_id: @session.id,
|
||||
file: @file
|
||||
)
|
||||
|
||||
if @temp_file.save
|
||||
@temp_file.destroy_obsolete
|
||||
return true
|
||||
if temp_file.save
|
||||
temp_file.destroy_obsolete
|
||||
return temp_file
|
||||
end
|
||||
end
|
||||
|
||||
Data = Struct.new(
|
||||
:header, :columns, :available_fields, :repository, :temp_file
|
||||
:header, :columns, :available_fields, :repository
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<%= hidden_field_tag 'file_id', @import_data.temp_file.id %>
|
||||
<%= hidden_field_tag 'file_id', @temp_file.id %>
|
||||
|
||||
<div id="import-errors-container">
|
||||
</div>
|
||||
|
|
Loading…
Add table
Reference in a new issue