From 0fec8f4d95f983f26e306298b95e23efa6d7f04b Mon Sep 17 00:00:00 2001 From: Oleksii Kriuchykhin Date: Tue, 11 Feb 2020 13:51:34 +0100 Subject: [PATCH 1/2] Improve speed of inventory import [SCI-4336] --- app/models/repository_cell.rb | 3 +- app/models/repository_checklist_value.rb | 4 +- app/models/repository_list_value.rb | 4 +- app/models/repository_status_value.rb | 4 +- app/models/repository_text_value.rb | 4 +- .../repository_import_parser/importer.rb | 126 ++++++++++-------- config/initializers/extends.rb | 2 + 7 files changed, 84 insertions(+), 63 deletions(-) diff --git a/app/models/repository_cell.rb b/app/models/repository_cell.rb index 4ee890f14..b834b42c6 100644 --- a/app/models/repository_cell.rb +++ b/app/models/repository_cell.rb @@ -105,7 +105,8 @@ class RepositoryCell < ApplicationRecord validates :repository_column, inclusion: { in: (lambda do |cell| cell.repository_row&.repository&.repository_columns || [] - end) } + end) }, + unless: :importing validates :repository_column, presence: true validate :repository_column_data_type validates :repository_row, diff --git a/app/models/repository_checklist_value.rb b/app/models/repository_checklist_value.rb index c3253578b..e7311c56c 100644 --- a/app/models/repository_checklist_value.rb +++ b/app/models/repository_checklist_value.rb @@ -55,9 +55,9 @@ class RepositoryChecklistValue < ApplicationRecord def self.import_from_text(text, attributes) value = new(attributes) - column = value.repository_cell.repository_column + column = attributes.dig(:repository_cell_attributes, :repository_column) text.split("\n").each do |item_text| - checklist_item = column.repository_checklist_items.find_by(data: item_text) + checklist_item = column.repository_checklist_items.find { |item| item.data == item_text } if checklist_item.blank? checklist_item = column.repository_checklist_items.new(data: text, diff --git a/app/models/repository_list_value.rb b/app/models/repository_list_value.rb index 49f660ec7..82431dbe4 100644 --- a/app/models/repository_list_value.rb +++ b/app/models/repository_list_value.rb @@ -53,8 +53,8 @@ class RepositoryListValue < ApplicationRecord def self.import_from_text(text, attributes) value = new(attributes) - column = value.repository_cell.repository_column - list_item = column.repository_list_items.find_by(data: text) + column = attributes.dig(:repository_cell_attributes, :repository_column) + list_item = column.repository_list_items.find { |item| item.data == text } if list_item.blank? list_item = column.repository_list_items.new(data: text, diff --git a/app/models/repository_status_value.rb b/app/models/repository_status_value.rb index 8b20c0b5a..8e657b2c5 100644 --- a/app/models/repository_status_value.rb +++ b/app/models/repository_status_value.rb @@ -48,8 +48,8 @@ class RepositoryStatusValue < ApplicationRecord icon = text[0] status = text[1..-1].strip value = new(attributes) - column = value.repository_cell.repository_column - status_item = column.repository_status_items.find_by(status: status) + column = attributes.dig(:repository_cell_attributes, :repository_column) + status_item = column.repository_status_items.find { |item| item.status == status } if status_item.blank? status_item = column.repository_status_items.new(icon: icon, diff --git a/app/models/repository_text_value.rb b/app/models/repository_text_value.rb index 9fcc51aba..f91e233db 100644 --- a/app/models/repository_text_value.rb +++ b/app/models/repository_text_value.rb @@ -34,7 +34,9 @@ class RepositoryTextValue < ApplicationRecord end def self.import_from_text(text, attributes) - new(attributes.merge(data: text)) + return nil if text.blank? + + new(attributes.merge(data: text.truncate(Constants::TEXT_MAX_LENGTH))) end alias export_formatted formatted diff --git a/app/utilities/repository_import_parser/importer.rb b/app/utilities/repository_import_parser/importer.rb index 9c3abe7cf..9b576452a 100644 --- a/app/utilities/repository_import_parser/importer.rb +++ b/app/utilities/repository_import_parser/importer.rb @@ -8,6 +8,8 @@ # @repository: the repository in which we import the items module RepositoryImportParser class Importer + IMPORT_BATCH_SIZE = 500 + def initialize(sheet, mappings, user, repository) @columns = [] @name_index = -1 @@ -38,7 +40,10 @@ module RepositoryImportParser @columns << nil @name_index = index else - @columns << @repository_columns.find_by_id(value) + column = @repository_columns.preload(Extends::REPOSITORY_IMPORT_COLUMN_PRELOADS).find_by(id: value) + next unless column && Extends::REPOSITORY_IMPORTABLE_TYPES.include?(column.data_type.to_sym) + + @columns << column end end end @@ -46,64 +51,59 @@ module RepositoryImportParser def check_for_duplicate_columns col_compact = @columns.compact if col_compact.map(&:id).uniq.length != col_compact.length - return { status: :error, - nr_of_added: @new_rows_added, - total_nr: @total_new_rows } + { status: :error, nr_of_added: @new_rows_added, total_nr: @total_new_rows } end end def import_rows! errors = false - @rows.each do |row| - # Skip empty rows - next if row.empty? - unless @header_skipped - @header_skipped = true - next - end - @total_new_rows += 1 + @repository.transaction do + batch_counter = 0 + full_row_import_batch = [] - row = SpreadsheetParser.parse_row(row, @sheet) - repository_row = new_repository_row(row) - repository_row.transaction do - unless repository_row.save - errors = true - Rails.logger.error cell_value.errors.full_messages - raise ActiveRecord::Rollback + @rows.each do |row| + # Skip empty rows + next if row.empty? + + unless @header_skipped + @header_skipped = true + next end + @total_new_rows += 1 - row_cell_values = [] - row.each.with_index do |value, index| - column = @columns[index] - if column && value.present? - attributes = { created_by: @user, - last_modified_by: @user, - repository_cell_attributes: { repository_row: repository_row, - repository_column: column, - importing: true } } - - cell_value = column.data_type.constantize.import_from_text(value, attributes) - next if cell_value.nil? - - cell_value.repository_cell.value = cell_value - - unless cell_value.valid? + new_full_row = {} + SpreadsheetParser.parse_row(row, @sheet).each.with_index do |value, index| + if index == @name_index + new_row = + RepositoryRow.new(name: value, repository: @repository, created_by: @user, last_modified_by: @user) + unless new_row.valid? errors = true - Rails.logger.error cell_value.errors.full_messages - raise ActiveRecord::Rollback + break end - row_cell_values << cell_value + + new_full_row[:repository_row] = new_row + next end + next unless @columns[index] + + new_full_row[index] = value end - unless import_to_database(row_cell_values) - errors = true - raise ActiveRecord::Rollback + if new_full_row[:repository_row].present? + full_row_import_batch << new_full_row + batch_counter += 1 end - @new_rows_added += 1 + next if batch_counter < IMPORT_BATCH_SIZE + + import_batch_to_database(full_row_import_batch) + full_row_import_batch = [] + batch_counter = 0 end + + # Import of the remaining rows + import_batch_to_database(full_row_import_batch) if full_row_import_batch.any? end if errors @@ -114,23 +114,39 @@ module RepositoryImportParser { status: :ok, nr_of_added: @new_rows_added, total_nr: @total_new_rows } end - def new_repository_row(row) - RepositoryRow.new(name: row[@name_index], - repository: @repository, - created_by: @user, - last_modified_by: @user) - end + def import_batch_to_database(full_row_import_batch) + repository_rows = full_row_import_batch.collect { |row| row[:repository_row] } + @new_rows_added += RepositoryRow.import(repository_rows, recursive: false, validate: false).ids.length + repository_rows.each { |row| row.run_callbacks(:create) } - def import_to_database(row_cell_values) - Extends::REPOSITORY_IMPORTABLE_TYPES.each do |data_type| - value_class = data_type.to_s.constantize - values = row_cell_values.select { |v| v.is_a? value_class } - next if values.blank? + import_mappings = Hash[@columns.map { |column| column&.data_type&.to_sym } + .compact + .uniq + .map { |data_type| [data_type, []] }] - return false if value_class.import(values, recursive: true, validate: false).failed_instances.any? + full_row_import_batch.each do |row| + next unless row[:repository_row].id + + row.reject { |k| k == :repository_row }.each do |index, value| + column = @columns[index] + cell_value_attributes = { created_by: @user, + last_modified_by: @user, + repository_cell_attributes: { repository_row: row[:repository_row], + repository_column: column, + importing: true } } + + cell_value = column.data_type.constantize.import_from_text(value, cell_value_attributes) + next if cell_value.nil? + + cell_value.repository_cell.value = cell_value + + import_mappings[column.data_type.to_sym] << cell_value + end end - true + import_mappings.each do |data_type, cell_values| + data_type.to_s.constantize.import(cell_values, recursive: true, validate: false) + end end end end diff --git a/config/initializers/extends.rb b/config/initializers/extends.rb index d574d0432..8d7f4b902 100644 --- a/config/initializers/extends.rb +++ b/config/initializers/extends.rb @@ -61,6 +61,8 @@ class Extends RepositoryDateValue RepositoryDateTimeValue RepositoryTimeValue RepositoryStatusValue RepositoryChecklistValue) + REPOSITORY_IMPORT_COLUMN_PRELOADS = %i(repository_list_items repository_status_items repository_checklist_items) + # Extra attributes used for search in repositories, 'filed_name' => include_hash REPOSITORY_EXTRA_SEARCH_ATTR = {'repository_text_values.data' => :repository_text_value, 'repository_number_values.data' => :repository_number_value, From 3310d79c4c04c3825d0d1d582e5917c6a98c3060 Mon Sep 17 00:00:00 2001 From: Oleksii Kriuchykhin Date: Tue, 11 Feb 2020 16:12:53 +0100 Subject: [PATCH 2/2] Fix failing tests [SCI-4336] --- app/utilities/repository_import_parser/importer.rb | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/app/utilities/repository_import_parser/importer.rb b/app/utilities/repository_import_parser/importer.rb index 9b576452a..ba9c2b326 100644 --- a/app/utilities/repository_import_parser/importer.rb +++ b/app/utilities/repository_import_parser/importer.rb @@ -34,16 +34,15 @@ module RepositoryImportParser private def fetch_columns - @mappings.each.with_index do |(_, value), index| + @mappings.each_with_index do |(_, value), index| if value == '-1' # Fill blank space, so our indices stay the same @columns << nil @name_index = index else - column = @repository_columns.preload(Extends::REPOSITORY_IMPORT_COLUMN_PRELOADS).find_by(id: value) - next unless column && Extends::REPOSITORY_IMPORTABLE_TYPES.include?(column.data_type.to_sym) - - @columns << column + @columns << @repository_columns.where(data_type: Extends::REPOSITORY_IMPORTABLE_TYPES) + .preload(Extends::REPOSITORY_IMPORT_COLUMN_PRELOADS) + .find_by(id: value) end end end @@ -73,7 +72,7 @@ module RepositoryImportParser @total_new_rows += 1 new_full_row = {} - SpreadsheetParser.parse_row(row, @sheet).each.with_index do |value, index| + SpreadsheetParser.parse_row(row, @sheet).each_with_index do |value, index| if index == @name_index new_row = RepositoryRow.new(name: value, repository: @repository, created_by: @user, last_modified_by: @user)