mirror of
https://github.com/scinote-eln/scinote-web.git
synced 2025-02-13 18:35:25 +08:00
Merge pull request #825 from okriuchykhin/ok_SCI_1665
Optimize memory usage in samples/repositories import [SCI-1665]
This commit is contained in:
commit
8e90e041b9
10 changed files with 209 additions and 218 deletions
1
Gemfile
1
Gemfile
|
@ -47,6 +47,7 @@ gem 'commit_param_routing' # Enables different submit actions in the same form t
|
|||
gem 'kaminari'
|
||||
gem "i18n-js", ">= 3.0.0.rc11" # Localization in javascript files
|
||||
gem 'roo', '~> 2.7.1' # Spreadsheet parser
|
||||
gem 'creek'
|
||||
gem 'wicked_pdf'
|
||||
gem 'silencer' # Silence certain Rails logs
|
||||
gem 'wkhtmltopdf-heroku'
|
||||
|
|
|
@ -117,6 +117,9 @@ GEM
|
|||
commit_param_routing (0.0.1)
|
||||
concurrent-ruby (1.0.0)
|
||||
crass (1.0.2)
|
||||
creek (1.1.2)
|
||||
nokogiri (~> 1.6.0)
|
||||
rubyzip (>= 1.0.0)
|
||||
debug_inspector (0.0.2)
|
||||
deface (1.0.2)
|
||||
colorize (>= 0.5.8)
|
||||
|
@ -364,6 +367,7 @@ DEPENDENCIES
|
|||
bootstrap_form
|
||||
byebug
|
||||
commit_param_routing
|
||||
creek
|
||||
deface (~> 1.0)
|
||||
delayed_job_active_record
|
||||
delayed_paperclip!
|
||||
|
|
|
@ -198,12 +198,14 @@ class RepositoriesController < ApplicationController
|
|||
if parsed_file.too_large?
|
||||
repository_response(t('general.file.size_exceeded',
|
||||
file_size: Constants::FILE_MAX_SIZE_MB))
|
||||
elsif parsed_file.empty?
|
||||
flash[:notice] = t('teams.parse_sheet.errors.empty_file')
|
||||
redirect_to back and return
|
||||
else
|
||||
@import_data = parsed_file.data
|
||||
if parsed_file.generated_temp_file?
|
||||
|
||||
if @import_data.header.empty? || @import_data.columns.empty?
|
||||
return repository_response(t('teams.parse_sheet.errors.empty_file'))
|
||||
end
|
||||
|
||||
if (@temp_file = parsed_file.generate_temp_file)
|
||||
respond_to do |format|
|
||||
format.json do
|
||||
render json: {
|
||||
|
|
|
@ -7,106 +7,57 @@ class TeamsController < ApplicationController
|
|||
def parse_sheet
|
||||
session[:return_to] ||= request.referer
|
||||
|
||||
respond_to do |format|
|
||||
if params[:file]
|
||||
begin
|
||||
unless params[:file]
|
||||
return parse_sheet_error(t('teams.parse_sheet.errors.no_file_selected'))
|
||||
end
|
||||
if params[:file].size > Constants::FILE_MAX_SIZE_MB.megabytes
|
||||
error = t('general.file.size_exceeded',
|
||||
file_size: Constants::FILE_MAX_SIZE_MB)
|
||||
return parse_sheet_error(error)
|
||||
end
|
||||
|
||||
if params[:file].size > Constants::FILE_MAX_SIZE_MB.megabytes
|
||||
error = t 'general.file.size_exceeded',
|
||||
file_size: Constants::FILE_MAX_SIZE_MB
|
||||
begin
|
||||
sheet = SpreadsheetParser.open_spreadsheet(params[:file])
|
||||
@header, @columns = SpreadsheetParser.first_two_rows(sheet)
|
||||
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
redirect_to session.delete(:return_to)
|
||||
if @header.empty? || @columns.empty?
|
||||
return parse_sheet_error(t('teams.parse_sheet.errors.empty_file'))
|
||||
end
|
||||
|
||||
# Fill in fields for dropdown
|
||||
@available_fields = @team.get_available_sample_fields
|
||||
# Truncate long fields
|
||||
@available_fields.update(@available_fields) do |_k, v|
|
||||
v.truncate(Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
|
||||
end
|
||||
|
||||
# Save file for next step (importing)
|
||||
@temp_file = TempFile.new(
|
||||
session_id: session.id,
|
||||
file: params[:file]
|
||||
)
|
||||
|
||||
if @temp_file.save
|
||||
@temp_file.destroy_obsolete
|
||||
respond_to do |format|
|
||||
format.json do
|
||||
render json: {
|
||||
html: render_to_string(
|
||||
partial: 'samples/parse_samples_modal.html.erb'
|
||||
)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
|
||||
else
|
||||
sheet = Team.open_spreadsheet(params[:file])
|
||||
|
||||
# Check if we actually have any rows (last_row > 1)
|
||||
if sheet.last_row.between?(0, 1)
|
||||
flash[:notice] = t(
|
||||
"teams.parse_sheet.errors.empty_file")
|
||||
redirect_to session.delete(:return_to) and return
|
||||
end
|
||||
|
||||
# Get data (it will trigger any errors as well)
|
||||
@header = sheet.row(1)
|
||||
@columns = sheet.row(2)
|
||||
|
||||
# Fill in fields for dropdown
|
||||
@available_fields = @team.get_available_sample_fields
|
||||
# Truncate long fields
|
||||
@available_fields.update(@available_fields) do |_k, v|
|
||||
v.truncate(Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
|
||||
end
|
||||
|
||||
# Save file for next step (importing)
|
||||
@temp_file = TempFile.new(
|
||||
session_id: session.id,
|
||||
file: params[:file]
|
||||
)
|
||||
|
||||
if @temp_file.save
|
||||
@temp_file.destroy_obsolete
|
||||
# format.html
|
||||
format.json {
|
||||
render :json => {
|
||||
:html => render_to_string({
|
||||
:partial => "samples/parse_samples_modal.html.erb"
|
||||
})
|
||||
}
|
||||
}
|
||||
else
|
||||
error = t("teams.parse_sheet.errors.temp_file_failure")
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
end
|
||||
end
|
||||
rescue ArgumentError, CSV::MalformedCSVError
|
||||
error = t('teams.parse_sheet.errors.invalid_file',
|
||||
encoding: ''.encoding)
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
rescue TypeError
|
||||
error = t("teams.parse_sheet.errors.invalid_extension")
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
end
|
||||
else
|
||||
error = t("teams.parse_sheet.errors.no_file_selected")
|
||||
format.html {
|
||||
flash[:alert] = error
|
||||
session[:return_to] ||= request.referer
|
||||
redirect_to session.delete(:return_to)
|
||||
}
|
||||
format.json {
|
||||
render json: {message: error},
|
||||
status: :unprocessable_entity
|
||||
}
|
||||
return parse_sheet_error(
|
||||
t('teams.parse_sheet.errors.temp_file_failure')
|
||||
)
|
||||
end
|
||||
rescue ArgumentError, CSV::MalformedCSVError
|
||||
return parse_sheet_error(t('teams.parse_sheet.errors.invalid_file',
|
||||
encoding: ''.encoding))
|
||||
rescue TypeError
|
||||
return parse_sheet_error(t('teams.parse_sheet.errors.invalid_extension'))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -122,7 +73,7 @@ class TeamsController < ApplicationController
|
|||
if @temp_file.session_id == session.id
|
||||
# Check if mappings exists or else we don't have anything to parse
|
||||
if params[:mappings]
|
||||
@sheet = Team.open_spreadsheet(@temp_file.file)
|
||||
@sheet = SpreadsheetParser.open_spreadsheet(@temp_file.file)
|
||||
|
||||
# Check for duplicated values
|
||||
h1 = params[:mappings].clone.delete_if { |k, v| v.empty? }
|
||||
|
@ -275,6 +226,20 @@ class TeamsController < ApplicationController
|
|||
|
||||
private
|
||||
|
||||
def parse_sheet_error(error)
|
||||
respond_to do |format|
|
||||
format.html do
|
||||
flash[:alert] = error
|
||||
session[:return_to] ||= request.referer
|
||||
redirect_to session.delete(:return_to)
|
||||
end
|
||||
format.json do
|
||||
render json: { message: error },
|
||||
status: :unprocessable_entity
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def load_vars
|
||||
@team = Team.find_by_id(params[:id])
|
||||
|
||||
|
|
|
@ -58,17 +58,6 @@ class Repository < ActiveRecord::Base
|
|||
end
|
||||
end
|
||||
|
||||
def open_spreadsheet(file)
|
||||
filename = file.original_filename
|
||||
file_path = file.path
|
||||
|
||||
if file.class == Paperclip::Attachment && file.is_stored_on_s3?
|
||||
fa = file.fetch
|
||||
file_path = fa.path
|
||||
end
|
||||
generate_file(filename, file_path)
|
||||
end
|
||||
|
||||
def available_repository_fields
|
||||
fields = {}
|
||||
# First and foremost add record name
|
||||
|
@ -114,6 +103,7 @@ class Repository < ActiveRecord::Base
|
|||
name_index = -1
|
||||
total_nr = 0
|
||||
nr_of_added = 0
|
||||
header_skipped = false
|
||||
|
||||
mappings.each.with_index do |(_k, value), index|
|
||||
if value == '-1'
|
||||
|
@ -130,54 +120,63 @@ class Repository < ActiveRecord::Base
|
|||
unless col_compact.map(&:id).uniq.length == col_compact.length
|
||||
return { status: :error, nr_of_added: nr_of_added, total_nr: total_nr }
|
||||
end
|
||||
rows = SpreadsheetParser.spreadsheet_enumerator(sheet)
|
||||
|
||||
# Now we can iterate through record data and save stuff into db
|
||||
transaction do
|
||||
(2..sheet.last_row).each do |i|
|
||||
total_nr += 1
|
||||
record_row = RepositoryRow.new(name: sheet.row(i)[name_index],
|
||||
repository: self,
|
||||
created_by: user,
|
||||
last_modified_by: user)
|
||||
record_row.transaction(requires_new: true) do
|
||||
unless record_row.save
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
rows.each do |row|
|
||||
# Skip empty rows
|
||||
next if row.empty?
|
||||
unless header_skipped
|
||||
header_skipped = true
|
||||
next
|
||||
end
|
||||
total_nr += 1
|
||||
|
||||
row_cell_values = []
|
||||
# Creek XLSX parser returns Hash of the row, Roo - Array
|
||||
row = row.is_a?(Hash) ? row.values.map(&:to_s) : row.map(&:to_s)
|
||||
|
||||
sheet.row(i).each.with_index do |value, index|
|
||||
if columns[index] && value
|
||||
cell_value = RepositoryTextValue.new(
|
||||
data: value,
|
||||
created_by: user,
|
||||
last_modified_by: user,
|
||||
repository_cell_attributes: {
|
||||
repository_row: record_row,
|
||||
repository_column: columns[index]
|
||||
}
|
||||
)
|
||||
cell = RepositoryCell.new(repository_row: record_row,
|
||||
repository_column: columns[index],
|
||||
value: cell_value)
|
||||
cell.skip_on_import = true
|
||||
cell_value.repository_cell = cell
|
||||
unless cell.valid? && cell_value.valid?
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
row_cell_values << cell_value
|
||||
end
|
||||
end
|
||||
if RepositoryTextValue.import(row_cell_values,
|
||||
recursive: true,
|
||||
validate: false).failed_instances.any?
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
nr_of_added += 1
|
||||
record_row = RepositoryRow.new(name: row[name_index],
|
||||
repository: self,
|
||||
created_by: user,
|
||||
last_modified_by: user)
|
||||
record_row.transaction do
|
||||
unless record_row.save
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
|
||||
row_cell_values = []
|
||||
|
||||
row.each.with_index do |value, index|
|
||||
if columns[index] && value
|
||||
cell_value = RepositoryTextValue.new(
|
||||
data: value,
|
||||
created_by: user,
|
||||
last_modified_by: user,
|
||||
repository_cell_attributes: {
|
||||
repository_row: record_row,
|
||||
repository_column: columns[index]
|
||||
}
|
||||
)
|
||||
cell = RepositoryCell.new(repository_row: record_row,
|
||||
repository_column: columns[index],
|
||||
value: cell_value)
|
||||
cell.skip_on_import = true
|
||||
cell_value.repository_cell = cell
|
||||
unless cell.valid? && cell_value.valid?
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
row_cell_values << cell_value
|
||||
end
|
||||
end
|
||||
if RepositoryTextValue.import(row_cell_values,
|
||||
recursive: true,
|
||||
validate: false).failed_instances.any?
|
||||
errors = true
|
||||
raise ActiveRecord::Rollback
|
||||
end
|
||||
nr_of_added += 1
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -186,22 +185,4 @@ class Repository < ActiveRecord::Base
|
|||
end
|
||||
{ status: :ok, nr_of_added: nr_of_added, total_nr: total_nr }
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def generate_file(filename, file_path)
|
||||
case File.extname(filename)
|
||||
when '.csv'
|
||||
Roo::CSV.new(file_path, extension: :csv)
|
||||
when '.tsv'
|
||||
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
||||
when '.txt'
|
||||
# This assumption is based purely on biologist's habits
|
||||
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
||||
when '.xlsx'
|
||||
Roo::Excelx.new(file_path)
|
||||
else
|
||||
raise TypeError
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -26,30 +26,6 @@ class Team < ActiveRecord::Base
|
|||
has_many :protocol_keywords, inverse_of: :team, dependent: :destroy
|
||||
has_many :tiny_mce_assets, inverse_of: :team, dependent: :destroy
|
||||
has_many :repositories, dependent: :destroy
|
||||
# Based on file's extension opens file (used for importing)
|
||||
def self.open_spreadsheet(file)
|
||||
filename = file.original_filename
|
||||
file_path = file.path
|
||||
|
||||
if file.class == Paperclip::Attachment and file.is_stored_on_s3?
|
||||
fa = file.fetch
|
||||
file_path = fa.path
|
||||
end
|
||||
|
||||
case File.extname(filename)
|
||||
when '.csv' then
|
||||
Roo::CSV.new(file_path, extension: :csv)
|
||||
when '.tsv' then
|
||||
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
||||
when '.txt' then
|
||||
# This assumption is based purely on biologist's habits
|
||||
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
||||
when '.xlsx' then
|
||||
Roo::Excelx.new(file_path)
|
||||
else
|
||||
raise TypeError
|
||||
end
|
||||
end
|
||||
|
||||
def search_users(query = nil)
|
||||
a_query = "%#{query}%"
|
||||
|
@ -66,6 +42,7 @@ class Team < ActiveRecord::Base
|
|||
errors = false
|
||||
nr_of_added = 0
|
||||
total_nr = 0
|
||||
header_skipped = false
|
||||
|
||||
# First let's query for all custom_fields we're refering to
|
||||
custom_fields = []
|
||||
|
@ -91,10 +68,22 @@ class Team < ActiveRecord::Base
|
|||
custom_fields << cf
|
||||
end
|
||||
end
|
||||
|
||||
rows = SpreadsheetParser.spreadsheet_enumerator(sheet)
|
||||
|
||||
# Now we can iterate through sample data and save stuff into db
|
||||
(2..sheet.last_row).each do |i|
|
||||
rows.each do |row|
|
||||
# Skip empty rows
|
||||
next if row.empty?
|
||||
unless header_skipped
|
||||
header_skipped = true
|
||||
next
|
||||
end
|
||||
total_nr += 1
|
||||
sample = Sample.new(name: sheet.row(i)[sname_index],
|
||||
# Creek XLSX parser returns Hash of the row, Roo - Array
|
||||
row = row.is_a?(Hash) ? row.values.map(&:to_s) : row.map(&:to_s)
|
||||
|
||||
sample = Sample.new(name: row[sname_index],
|
||||
team: self,
|
||||
user: user)
|
||||
|
||||
|
@ -104,7 +93,7 @@ class Team < ActiveRecord::Base
|
|||
raise ActiveRecord::Rollback
|
||||
end
|
||||
|
||||
sheet.row(i).each.with_index do |value, index|
|
||||
row.each.with_index do |value, index|
|
||||
if index == stype_index
|
||||
stype = SampleType.where(team: self)
|
||||
.where('name ILIKE ?', value.strip)
|
||||
|
|
|
@ -17,9 +17,11 @@ module ImportRepository
|
|||
private
|
||||
|
||||
def run_import_actions
|
||||
@repository.import_records(@repository.open_spreadsheet(@temp_file.file),
|
||||
@mappings,
|
||||
@user)
|
||||
@repository.import_records(
|
||||
SpreadsheetParser.open_spreadsheet(@temp_file.file),
|
||||
@mappings,
|
||||
@user
|
||||
)
|
||||
end
|
||||
|
||||
def run_checks
|
||||
|
|
|
@ -5,48 +5,40 @@ module ImportRepository
|
|||
@file = options.fetch(:file)
|
||||
@repository = options.fetch(:repository)
|
||||
@session = options.fetch(:session)
|
||||
@sheet = @repository.open_spreadsheet(@file)
|
||||
@sheet = SpreadsheetParser.open_spreadsheet(@file)
|
||||
end
|
||||
|
||||
def data
|
||||
# Get data (it will trigger any errors as well)
|
||||
header = @sheet.row(1)
|
||||
columns = @sheet.row(2)
|
||||
header, columns = SpreadsheetParser.first_two_rows(@sheet)
|
||||
# Fill in fields for dropdown
|
||||
@repository.available_repository_fields.transform_values! do |name|
|
||||
truncate(name, length: Constants::NAME_TRUNCATION_LENGTH_DROPDOWN)
|
||||
end
|
||||
@temp_file = TempFile.create(session_id: @session.id, file: @file)
|
||||
Data.new(header,
|
||||
columns,
|
||||
@repository.available_repository_fields,
|
||||
@repository,
|
||||
@temp_file)
|
||||
@repository)
|
||||
end
|
||||
|
||||
def too_large?
|
||||
@file.size > Constants::FILE_MAX_SIZE_MB.megabytes
|
||||
end
|
||||
|
||||
def empty?
|
||||
@sheet.last_row.between?(0, 1)
|
||||
end
|
||||
|
||||
def generated_temp_file?
|
||||
def generate_temp_file
|
||||
# Save file for next step (importing)
|
||||
@temp_file = TempFile.new(
|
||||
temp_file = TempFile.new(
|
||||
session_id: @session.id,
|
||||
file: @file
|
||||
)
|
||||
|
||||
if @temp_file.save
|
||||
@temp_file.destroy_obsolete
|
||||
return true
|
||||
if temp_file.save
|
||||
temp_file.destroy_obsolete
|
||||
return temp_file
|
||||
end
|
||||
end
|
||||
|
||||
Data = Struct.new(
|
||||
:header, :columns, :available_fields, :repository, :temp_file
|
||||
:header, :columns, :available_fields, :repository
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
55
app/services/spreadsheet_parser.rb
Normal file
55
app/services/spreadsheet_parser.rb
Normal file
|
@ -0,0 +1,55 @@
|
|||
class SpreadsheetParser
|
||||
# Based on file's extension opens file (used for importing)
|
||||
def self.open_spreadsheet(file)
|
||||
filename = file.original_filename
|
||||
file_path = file.path
|
||||
|
||||
if file.class == Paperclip::Attachment && file.is_stored_on_s3?
|
||||
fa = file.fetch
|
||||
file_path = fa.path
|
||||
end
|
||||
|
||||
case File.extname(filename)
|
||||
when '.csv'
|
||||
Roo::CSV.new(file_path, extension: :csv)
|
||||
when '.tsv'
|
||||
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
||||
when '.txt'
|
||||
# This assumption is based purely on biologist's habits
|
||||
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
||||
when '.xlsx'
|
||||
# Roo Excel parcel was replaced with Creek, but it can be enabled back,
|
||||
# just swap lines below. But only one can be enabled at the same time.
|
||||
# Roo::Excelx.new(file_path)
|
||||
Creek::Book.new(file_path).sheets[0]
|
||||
else
|
||||
raise TypeError
|
||||
end
|
||||
end
|
||||
|
||||
def self.spreadsheet_enumerator(sheet)
|
||||
if sheet.is_a?(Roo::CSV)
|
||||
sheet
|
||||
elsif sheet.is_a?(Roo::Excelx)
|
||||
sheet.each_row_streaming
|
||||
else
|
||||
sheet.rows
|
||||
end
|
||||
end
|
||||
|
||||
def self.first_two_rows(sheet)
|
||||
rows = spreadsheet_enumerator(sheet)
|
||||
header = []
|
||||
columns = []
|
||||
i = 1
|
||||
rows.each do |row|
|
||||
# Creek XLSX parser returns Hash of the row, Roo - Array
|
||||
row = row.is_a?(Hash) ? row.values.map(&:to_s) : row.map(&:to_s)
|
||||
header = row if i == 1 && row
|
||||
columns = row if i == 2 && row
|
||||
i += 1
|
||||
break if i > 2
|
||||
end
|
||||
return header, columns
|
||||
end
|
||||
end
|
|
@ -58,7 +58,7 @@
|
|||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<%= hidden_field_tag 'file_id', @import_data.temp_file.id %>
|
||||
<%= hidden_field_tag 'file_id', @temp_file.id %>
|
||||
|
||||
<div id="import-errors-container">
|
||||
</div>
|
||||
|
|
Loading…
Reference in a new issue