Optimize repository export jobs [SCI-10940] (#7783)

This commit is contained in:
Alex Kriuchykhin 2024-08-06 13:26:14 +02:00 committed by GitHub
parent 4f7000a68f
commit bc4b010fa2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 234 additions and 178 deletions

View file

@ -384,7 +384,7 @@ Layout/DefEndAlignment:
EnforcedStyleAlignWith: start_of_line EnforcedStyleAlignWith: start_of_line
Layout/LineLength: Layout/LineLength:
Max: 120 Max: 180
AllowHeredoc: true AllowHeredoc: true
AllowURI: true AllowURI: true
URISchemes: URISchemes:

View file

@ -69,7 +69,7 @@ class RepositoriesExportJob < ApplicationJob
# Generate CSV / XLSX # Generate CSV / XLSX
service = RepositoryExportService service = RepositoryExportService
.new(@file_type, repository.repository_rows, col_ids, @user, repository, handle_name_func) .new(@file_type, repository.repository_rows, col_ids, repository, handle_name_func)
exported_data = service.export! exported_data = service.export!
File.binwrite(repository_items_file_name, exported_data) File.binwrite(repository_items_file_name, exported_data)

View file

@ -22,20 +22,14 @@ class RepositoryZipExportJob < ZipExportJob
'my_module_repository_rows.stock_consumption' 'my_module_repository_rows.stock_consumption'
) )
end end
service = RepositoryExportService.new(@file_type, rows, params[:header_ids].map(&:to_i),
repository, in_module: true)
else else
ordered_row_ids = params[:row_ids] ordered_row_ids = params[:row_ids].map(&:to_i)
id_row_map = RepositoryRow.where(id: ordered_row_ids, rows = repository.repository_rows.where(id: ordered_row_ids)
repository: repository) service = RepositoryExportService.new(@file_type, rows, params[:header_ids].map(&:to_i),
.index_by(&:id) repository, in_module: false, ordered_row_ids: ordered_row_ids)
rows = ordered_row_ids.collect { |id| id_row_map[id.to_i] }
end end
service = RepositoryExportService
.new(@file_type,
rows,
params[:header_ids].map(&:to_i),
@user,
repository,
in_module: params[:my_module_id].present?)
exported_data = service.export! exported_data = service.export!
File.binwrite("#{dir}/export.#{@file_type}", exported_data) File.binwrite("#{dir}/export.#{@file_type}", exported_data)
end end

View file

@ -276,7 +276,7 @@ class TeamZipExportJob < ZipExportJob
end end
# Generate CSV # Generate CSV
csv_data = RepositoryCsvExport.to_csv(repo.repository_rows, col_ids, @user, repo, handle_name_func, false) csv_data = RepositoryCsvExport.to_csv(repo.repository_rows, col_ids, repo, handle_name_func, false)
File.binwrite(csv_file_path, csv_data.encode('UTF-8', invalid: :replace, undef: :replace)) File.binwrite(csv_file_path, csv_data.encode('UTF-8', invalid: :replace, undef: :replace))
# Save all attachments (it doesn't work directly in callback function # Save all attachments (it doesn't work directly in callback function

View file

@ -2,82 +2,116 @@
require 'csv' require 'csv'
module RepositoryCsvExport class RepositoryCsvExport
def self.to_csv(rows, column_ids, user, repository, handle_file_name_func, in_module) def self.to_csv(rows, column_ids, repository, handle_file_name_func, in_module, ordered_row_ids = nil)
# Parse column names
csv_header = []
add_consumption = in_module && !repository.is_a?(RepositorySnapshot) && repository.has_stock_management? add_consumption = in_module && !repository.is_a?(RepositorySnapshot) && repository.has_stock_management?
column_ids.each do |c_id| csv_header = build_header(repository, column_ids, add_consumption)
case c_id
when -1, -2
next
when -3
csv_header << I18n.t('repositories.table.id')
when -4
csv_header << I18n.t('repositories.table.row_name')
when -5
csv_header << I18n.t('repositories.table.added_by')
when -6
csv_header << I18n.t('repositories.table.added_on')
when -7
csv_header << I18n.t('repositories.table.updated_on')
when -8
csv_header << I18n.t('repositories.table.updated_by')
when -9
csv_header << I18n.t('repositories.table.archived_by')
when -10
csv_header << I18n.t('repositories.table.archived_on')
when -11
csv_header << I18n.t('repositories.table.parents')
csv_header << I18n.t('repositories.table.children')
else
csv_header << repository.repository_columns.find_by(id: c_id)&.name
end
end
csv_header << I18n.t('repositories.table.row_consumption') if add_consumption
CSV.generate do |csv| CSV.generate do |csv|
csv << csv_header csv << csv_header
rows.each do |row|
csv_row = []
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
csv_row << (repository.is_a?(RepositorySnapshot) ? row.parent.code : row.code)
when -4
csv_row << row.name
when -5
csv_row << row.created_by.full_name
when -6
csv_row << I18n.l(row.created_at, format: :full)
when -7
csv_row << (row.updated_at ? I18n.l(row.updated_at, format: :full) : '')
when -8
csv_row << row.last_modified_by.full_name
when -9
csv_row << (row.archived? && row.archived_by.present? ? row.archived_by.full_name : '')
when -10
csv_row << (row.archived? && row.archived_on.present? ? I18n.l(row.archived_on, format: :full) : '')
when -11
csv_row << row.parent_repository_rows.map(&:code).join(' | ')
csv_row << row.child_repository_rows.map(&:code).join(' | ')
else
cell = row.repository_cells.find_by(repository_column_id: c_id)
csv_row << if cell rows = rows.preload(:parent) if repository.is_a?(RepositorySnapshot)
if cell.value_type == 'RepositoryAssetValue' && handle_file_name_func rows = rows.left_outer_joins(:created_by, :last_modified_by, :archived_by)
handle_file_name_func.call(cell.value.asset) .joins('LEFT OUTER JOIN "users" "created_by" ON "created_by"."id" = "repository_rows"."created_by_id"')
else .joins('LEFT OUTER JOIN "users" "last_modified_by" ON "last_modified_by"."id" = "repository_rows"."last_modified_by_id"')
cell.value.export_formatted .joins('LEFT OUTER JOIN "users" "archived_by" ON "archived_by"."id" = "repository_rows"."archived_by_id"')
end .preload(:parent_repository_rows,
end :child_repository_rows,
end repository_cells: { repository_column: nil, value: repository.cell_preload_includes })
.select('repository_rows.* AS repository_rows')
.select('created_by.full_name AS created_by_full_name')
.select('last_modified_by.full_name AS last_modified_by_full_name')
.select('archived_by.full_name AS archived_by_full_name')
if ordered_row_ids.present?
rows = rows.order(RepositoryRow.sanitize_sql_for_order([Arel.sql('array_position(ARRAY[?], repository_rows.id)'), ordered_row_ids]))
rows.each do |row|
csv << build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
end
else
rows.find_each(batch_size: 100) do |row|
csv << build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
end end
csv_row << row.row_consumption(row.stock_consumption) if add_consumption
csv << csv_row
end end
end.encode('UTF-8', invalid: :replace, undef: :replace) end.encode('UTF-8', invalid: :replace, undef: :replace)
end end
class << self
private
def build_header(repository, column_ids, add_consumption)
# Parse column names
csv_header = []
custom_columns = repository.repository_columns.select(:id, :name)
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
csv_header << I18n.t('repositories.table.id')
when -4
csv_header << I18n.t('repositories.table.row_name')
when -5
csv_header << I18n.t('repositories.table.added_by')
when -6
csv_header << I18n.t('repositories.table.added_on')
when -7
csv_header << I18n.t('repositories.table.updated_on')
when -8
csv_header << I18n.t('repositories.table.updated_by')
when -9
csv_header << I18n.t('repositories.table.archived_by')
when -10
csv_header << I18n.t('repositories.table.archived_on')
when -11
csv_header << I18n.t('repositories.table.parents')
csv_header << I18n.t('repositories.table.children')
else
csv_header << custom_columns.find { |column| column.id == c_id }&.name
end
end
csv_header << I18n.t('repositories.table.row_consumption') if add_consumption
csv_header
end
def build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
csv_row = []
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
csv_row << (repository.is_a?(RepositorySnapshot) ? row.parent.code : row.code)
when -4
csv_row << row.name
when -5
csv_row << row.created_by_full_name
when -6
csv_row << I18n.l(row.created_at, format: :full)
when -7
csv_row << (row.updated_at ? I18n.l(row.updated_at, format: :full) : '')
when -8
csv_row << row.last_modified_by_full_name
when -9
csv_row << (row.archived? && row.archived_by.present? ? row.archived_by_full_name : '')
when -10
csv_row << (row.archived? && row.archived_on.present? ? I18n.l(row.archived_on, format: :full) : '')
when -11
csv_row << row.parent_repository_rows.map(&:code).join(' | ')
csv_row << row.child_repository_rows.map(&:code).join(' | ')
else
cell = row.repository_cells.find { |c| c.repository_column_id == c_id }
csv_row << if cell
if cell.value_type == 'RepositoryAssetValue' && handle_file_name_func
handle_file_name_func.call(cell.value.asset)
else
cell.value.export_formatted
end
end
end
end
csv_row << row.row_consumption(row.stock_consumption) if add_consumption
csv_row
end
end
end end

View file

@ -1,22 +1,22 @@
# frozen_string_literal: true # frozen_string_literal: true
class RepositoryExportService class RepositoryExportService
def initialize(file_type, rows, columns, user, repository, handle_name_func = nil, in_module: false) def initialize(file_type, rows, columns, repository, handle_name_func = nil, in_module: false, ordered_row_ids: nil)
@file_type = file_type @file_type = file_type
@user = user
@rows = rows @rows = rows
@columns = columns @columns = columns
@repository = repository @repository = repository
@handle_name_func = handle_name_func @handle_name_func = handle_name_func
@in_module = in_module @in_module = in_module
@ordered_row_ids = ordered_row_ids
end end
def export! def export!
case @file_type case @file_type
when :csv when :csv
file_data = RepositoryCsvExport.to_csv(@rows, @columns, @user, @repository, @handle_name_func, @in_module) file_data = RepositoryCsvExport.to_csv(@rows, @columns, @repository, @handle_name_func, @in_module, @ordered_row_ids)
when :xlsx when :xlsx
file_data = RepositoryXlsxExport.to_xlsx(@rows, @columns, @user, @repository, @handle_name_func, @in_module) file_data = RepositoryXlsxExport.to_xlsx(@rows, @columns, @repository, @handle_name_func, @in_module, @ordered_row_ids)
end end
file_data file_data

View file

@ -2,12 +2,12 @@
require 'caxlsx' require 'caxlsx'
module RepositoryXlsxExport class RepositoryXlsxExport
def self.to_empty_xlsx(repository, column_ids) def self.to_empty_xlsx(repository, column_ids)
package = Axlsx::Package.new package = Axlsx::Package.new
workbook = package.workbook workbook = package.workbook
workbook.add_worksheet(name: 'Data Export') do |sheet| workbook.add_worksheet(name: 'Data Export') do |sheet|
sheet.add_row prepare_header(repository, column_ids, false) sheet.add_row build_header(repository, column_ids, false)
end end
add_instruction(workbook) add_instruction(workbook)
@ -15,7 +15,7 @@ module RepositoryXlsxExport
package.to_stream.read package.to_stream.read
end end
def self.to_xlsx(rows, column_ids, user, repository, handle_file_name_func, in_module) def self.to_xlsx(rows, column_ids, repository, handle_file_name_func, in_module, ordered_row_ids = nil)
package = Axlsx::Package.new package = Axlsx::Package.new
workbook = package.workbook workbook = package.workbook
datetime_style = workbook.styles.add_style format_code: 'dd-mmm-yyyy hh:mm:ss' datetime_style = workbook.styles.add_style format_code: 'dd-mmm-yyyy hh:mm:ss'
@ -24,61 +24,32 @@ module RepositoryXlsxExport
add_consumption = in_module && !repository.is_a?(RepositorySnapshot) && repository.has_stock_management? add_consumption = in_module && !repository.is_a?(RepositorySnapshot) && repository.has_stock_management?
workbook.add_worksheet(name: 'Data Export') do |sheet| workbook.add_worksheet(name: 'Data Export') do |sheet|
sheet.add_row prepare_header(repository, column_ids, add_consumption) sheet.add_row build_header(repository, column_ids, add_consumption)
rows.each do |row| rows = rows.preload(:parent) if repository.is_a?(RepositorySnapshot)
row_data = [] rows = rows.left_outer_joins(:created_by, :last_modified_by, :archived_by)
column_ids.each do |c_id| .joins('LEFT OUTER JOIN "users" "created_by" ON "created_by"."id" = "repository_rows"."created_by_id"')
case c_id .joins('LEFT OUTER JOIN "users" "last_modified_by" ON "last_modified_by"."id" = "repository_rows"."last_modified_by_id"')
when -1, -2 .joins('LEFT OUTER JOIN "users" "archived_by" ON "archived_by"."id" = "repository_rows"."archived_by_id"')
next .preload(:parent_repository_rows,
when -3 :child_repository_rows,
row_data << (repository.is_a?(RepositorySnapshot) ? row.parent.code : row.code) repository_cells: { repository_column: nil, value: repository.cell_preload_includes })
when -4 .select('repository_rows.* AS repository_rows')
row_data << row.name .select('created_by.full_name AS created_by_full_name')
when -5 .select('last_modified_by.full_name AS last_modified_by_full_name')
row_data << row.created_by.full_name .select('archived_by.full_name AS archived_by_full_name')
when -6
row_data << row.created_at if ordered_row_ids.present?
when -7 rows = rows.order(RepositoryRow.sanitize_sql_for_order([Arel.sql('array_position(ARRAY[?], repository_rows.id)'), ordered_row_ids]))
row_data << row.updated_at rows.each do |row|
when -8 row_data = build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
row_data << row.last_modified_by.full_name sheet.add_row(row_data, style: build_row_style(row_data, datetime_style, date_style))
when -9
row_data << (row.archived? && row.archived_by.present? ? row.archived_by.full_name : '')
when -10
row_data << row.archived_on
when -11
row_data << row.parent_repository_rows.map(&:code).join(' | ')
row_data << row.child_repository_rows.map(&:code).join(' | ')
else
cell = row.repository_cells.find_by(repository_column_id: c_id)
row_data << if cell
if cell.value_type == 'RepositoryAssetValue' && handle_file_name_func
handle_file_name_func.call(cell.value.asset)
elsif cell.value.is_a?(RepositoryDateTimeValue) || cell.value.is_a?(RepositoryDateValue)
cell.value.data
else
cell.value.export_formatted
end
end
end
end end
row_data << row.row_consumption(row.stock_consumption) if add_consumption else
rows.find_each(batch_size: 100) do |row|
style = row_data.map do |c| row_data = build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
case c sheet.add_row(row_data, style: build_row_style(row_data, datetime_style, date_style))
when ActiveSupport::TimeWithZone
datetime_style
when Time # Date values are of class Time for some reason
date_style
end
end end
sheet.add_row(
row_data,
style: style
)
end end
end end
@ -87,44 +58,101 @@ module RepositoryXlsxExport
package.to_stream.read package.to_stream.read
end end
def self.add_instruction(workbook) class << self
workbook.add_worksheet(name: 'Instruction') do |sheet| private
image = File.expand_path('app/assets/images/import_instruction.png')
sheet.add_image(image_src: image, start_at: 'A1', width: 1260, height: 994)
end
end
def self.prepare_header(repository, column_ids, add_consumption) def add_instruction(workbook)
header = [] workbook.add_worksheet(name: 'Instruction') do |sheet|
column_ids.each do |c_id| image = File.expand_path('app/assets/images/import_instruction.png')
case c_id sheet.add_image(image_src: image, start_at: 'A1', width: 1260, height: 994)
when -1, -2
next
when -3
header << I18n.t('repositories.table.id')
when -4
header << I18n.t('repositories.table.row_name')
when -5
header << I18n.t('repositories.table.added_by')
when -6
header << I18n.t('repositories.table.added_on')
when -7
header << I18n.t('repositories.table.updated_on')
when -8
header << I18n.t('repositories.table.updated_by')
when -9
header << I18n.t('repositories.table.archived_by')
when -10
header << I18n.t('repositories.table.archived_on')
when -11
header << I18n.t('repositories.table.parents')
header << I18n.t('repositories.table.children')
else
header << repository.repository_columns.find_by(id: c_id)&.name
end end
end end
header << I18n.t('repositories.table.row_consumption') if add_consumption
header def build_header(repository, column_ids, add_consumption)
header = []
custom_columns = repository.repository_columns.select(:id, :name)
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
header << I18n.t('repositories.table.id')
when -4
header << I18n.t('repositories.table.row_name')
when -5
header << I18n.t('repositories.table.added_by')
when -6
header << I18n.t('repositories.table.added_on')
when -7
header << I18n.t('repositories.table.updated_on')
when -8
header << I18n.t('repositories.table.updated_by')
when -9
header << I18n.t('repositories.table.archived_by')
when -10
header << I18n.t('repositories.table.archived_on')
when -11
header << I18n.t('repositories.table.parents')
header << I18n.t('repositories.table.children')
else
header << custom_columns.find { |column| column.id == c_id }&.name
end
end
header << I18n.t('repositories.table.row_consumption') if add_consumption
header
end
def build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
row_data = []
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
row_data << (repository.is_a?(RepositorySnapshot) ? row.parent.code : row.code)
when -4
row_data << row.name
when -5
row_data << row.created_by_full_name
when -6
row_data << row.created_at
when -7
row_data << row.updated_at
when -8
row_data << row.last_modified_by_full_name
when -9
row_data << (row.archived? && row.archived_by.present? ? row.archived_by_full_name : '')
when -10
row_data << row.archived_on
when -11
row_data << row.parent_repository_rows.map(&:code).join(' | ')
row_data << row.child_repository_rows.map(&:code).join(' | ')
else
cell = row.repository_cells.find { |c| c.repository_column_id == c_id }
row_data << if cell
if cell.value_type == 'RepositoryAssetValue' && handle_file_name_func
handle_file_name_func.call(cell.value.asset)
elsif cell.value.is_a?(RepositoryDateTimeValue) || cell.value.is_a?(RepositoryDateValue)
cell.value.data
else
cell.value.export_formatted
end
end
end
end
row_data << row.row_consumption(row.stock_consumption) if add_consumption
row_data
end
def build_row_style(row_data, datetime_style, date_style)
row_data.map do |c|
case c
when ActiveSupport::TimeWithZone
datetime_style
when Time # Date values are of class Time for some reason
date_style
end
end
end
end end
end end