Optimize repository export jobs [SCI-10940] (#7783)

This commit is contained in:
Alex Kriuchykhin 2024-08-06 13:26:14 +02:00 committed by GitHub
parent 4f7000a68f
commit bc4b010fa2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 234 additions and 178 deletions

View file

@ -384,7 +384,7 @@ Layout/DefEndAlignment:
EnforcedStyleAlignWith: start_of_line
Layout/LineLength:
Max: 120
Max: 180
AllowHeredoc: true
AllowURI: true
URISchemes:

View file

@ -69,7 +69,7 @@ class RepositoriesExportJob < ApplicationJob
# Generate CSV / XLSX
service = RepositoryExportService
.new(@file_type, repository.repository_rows, col_ids, @user, repository, handle_name_func)
.new(@file_type, repository.repository_rows, col_ids, repository, handle_name_func)
exported_data = service.export!
File.binwrite(repository_items_file_name, exported_data)

View file

@ -22,20 +22,14 @@ class RepositoryZipExportJob < ZipExportJob
'my_module_repository_rows.stock_consumption'
)
end
service = RepositoryExportService.new(@file_type, rows, params[:header_ids].map(&:to_i),
repository, in_module: true)
else
ordered_row_ids = params[:row_ids]
id_row_map = RepositoryRow.where(id: ordered_row_ids,
repository: repository)
.index_by(&:id)
rows = ordered_row_ids.collect { |id| id_row_map[id.to_i] }
ordered_row_ids = params[:row_ids].map(&:to_i)
rows = repository.repository_rows.where(id: ordered_row_ids)
service = RepositoryExportService.new(@file_type, rows, params[:header_ids].map(&:to_i),
repository, in_module: false, ordered_row_ids: ordered_row_ids)
end
service = RepositoryExportService
.new(@file_type,
rows,
params[:header_ids].map(&:to_i),
@user,
repository,
in_module: params[:my_module_id].present?)
exported_data = service.export!
File.binwrite("#{dir}/export.#{@file_type}", exported_data)
end

View file

@ -276,7 +276,7 @@ class TeamZipExportJob < ZipExportJob
end
# Generate CSV
csv_data = RepositoryCsvExport.to_csv(repo.repository_rows, col_ids, @user, repo, handle_name_func, false)
csv_data = RepositoryCsvExport.to_csv(repo.repository_rows, col_ids, repo, handle_name_func, false)
File.binwrite(csv_file_path, csv_data.encode('UTF-8', invalid: :replace, undef: :replace))
# Save all attachments (it doesn't work directly in callback function

View file

@ -2,82 +2,116 @@
require 'csv'
module RepositoryCsvExport
def self.to_csv(rows, column_ids, user, repository, handle_file_name_func, in_module)
# Parse column names
csv_header = []
class RepositoryCsvExport
def self.to_csv(rows, column_ids, repository, handle_file_name_func, in_module, ordered_row_ids = nil)
add_consumption = in_module && !repository.is_a?(RepositorySnapshot) && repository.has_stock_management?
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
csv_header << I18n.t('repositories.table.id')
when -4
csv_header << I18n.t('repositories.table.row_name')
when -5
csv_header << I18n.t('repositories.table.added_by')
when -6
csv_header << I18n.t('repositories.table.added_on')
when -7
csv_header << I18n.t('repositories.table.updated_on')
when -8
csv_header << I18n.t('repositories.table.updated_by')
when -9
csv_header << I18n.t('repositories.table.archived_by')
when -10
csv_header << I18n.t('repositories.table.archived_on')
when -11
csv_header << I18n.t('repositories.table.parents')
csv_header << I18n.t('repositories.table.children')
else
csv_header << repository.repository_columns.find_by(id: c_id)&.name
end
end
csv_header << I18n.t('repositories.table.row_consumption') if add_consumption
csv_header = build_header(repository, column_ids, add_consumption)
CSV.generate do |csv|
csv << csv_header
rows.each do |row|
csv_row = []
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
csv_row << (repository.is_a?(RepositorySnapshot) ? row.parent.code : row.code)
when -4
csv_row << row.name
when -5
csv_row << row.created_by.full_name
when -6
csv_row << I18n.l(row.created_at, format: :full)
when -7
csv_row << (row.updated_at ? I18n.l(row.updated_at, format: :full) : '')
when -8
csv_row << row.last_modified_by.full_name
when -9
csv_row << (row.archived? && row.archived_by.present? ? row.archived_by.full_name : '')
when -10
csv_row << (row.archived? && row.archived_on.present? ? I18n.l(row.archived_on, format: :full) : '')
when -11
csv_row << row.parent_repository_rows.map(&:code).join(' | ')
csv_row << row.child_repository_rows.map(&:code).join(' | ')
else
cell = row.repository_cells.find_by(repository_column_id: c_id)
csv_row << if cell
if cell.value_type == 'RepositoryAssetValue' && handle_file_name_func
handle_file_name_func.call(cell.value.asset)
else
cell.value.export_formatted
end
end
end
rows = rows.preload(:parent) if repository.is_a?(RepositorySnapshot)
rows = rows.left_outer_joins(:created_by, :last_modified_by, :archived_by)
.joins('LEFT OUTER JOIN "users" "created_by" ON "created_by"."id" = "repository_rows"."created_by_id"')
.joins('LEFT OUTER JOIN "users" "last_modified_by" ON "last_modified_by"."id" = "repository_rows"."last_modified_by_id"')
.joins('LEFT OUTER JOIN "users" "archived_by" ON "archived_by"."id" = "repository_rows"."archived_by_id"')
.preload(:parent_repository_rows,
:child_repository_rows,
repository_cells: { repository_column: nil, value: repository.cell_preload_includes })
.select('repository_rows.* AS repository_rows')
.select('created_by.full_name AS created_by_full_name')
.select('last_modified_by.full_name AS last_modified_by_full_name')
.select('archived_by.full_name AS archived_by_full_name')
if ordered_row_ids.present?
rows = rows.order(RepositoryRow.sanitize_sql_for_order([Arel.sql('array_position(ARRAY[?], repository_rows.id)'), ordered_row_ids]))
rows.each do |row|
csv << build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
end
else
rows.find_each(batch_size: 100) do |row|
csv << build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
end
csv_row << row.row_consumption(row.stock_consumption) if add_consumption
csv << csv_row
end
end.encode('UTF-8', invalid: :replace, undef: :replace)
end
class << self
private
def build_header(repository, column_ids, add_consumption)
# Parse column names
csv_header = []
custom_columns = repository.repository_columns.select(:id, :name)
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
csv_header << I18n.t('repositories.table.id')
when -4
csv_header << I18n.t('repositories.table.row_name')
when -5
csv_header << I18n.t('repositories.table.added_by')
when -6
csv_header << I18n.t('repositories.table.added_on')
when -7
csv_header << I18n.t('repositories.table.updated_on')
when -8
csv_header << I18n.t('repositories.table.updated_by')
when -9
csv_header << I18n.t('repositories.table.archived_by')
when -10
csv_header << I18n.t('repositories.table.archived_on')
when -11
csv_header << I18n.t('repositories.table.parents')
csv_header << I18n.t('repositories.table.children')
else
csv_header << custom_columns.find { |column| column.id == c_id }&.name
end
end
csv_header << I18n.t('repositories.table.row_consumption') if add_consumption
csv_header
end
def build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
csv_row = []
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
csv_row << (repository.is_a?(RepositorySnapshot) ? row.parent.code : row.code)
when -4
csv_row << row.name
when -5
csv_row << row.created_by_full_name
when -6
csv_row << I18n.l(row.created_at, format: :full)
when -7
csv_row << (row.updated_at ? I18n.l(row.updated_at, format: :full) : '')
when -8
csv_row << row.last_modified_by_full_name
when -9
csv_row << (row.archived? && row.archived_by.present? ? row.archived_by_full_name : '')
when -10
csv_row << (row.archived? && row.archived_on.present? ? I18n.l(row.archived_on, format: :full) : '')
when -11
csv_row << row.parent_repository_rows.map(&:code).join(' | ')
csv_row << row.child_repository_rows.map(&:code).join(' | ')
else
cell = row.repository_cells.find { |c| c.repository_column_id == c_id }
csv_row << if cell
if cell.value_type == 'RepositoryAssetValue' && handle_file_name_func
handle_file_name_func.call(cell.value.asset)
else
cell.value.export_formatted
end
end
end
end
csv_row << row.row_consumption(row.stock_consumption) if add_consumption
csv_row
end
end
end

View file

@ -1,22 +1,22 @@
# frozen_string_literal: true
class RepositoryExportService
def initialize(file_type, rows, columns, user, repository, handle_name_func = nil, in_module: false)
def initialize(file_type, rows, columns, repository, handle_name_func = nil, in_module: false, ordered_row_ids: nil)
@file_type = file_type
@user = user
@rows = rows
@columns = columns
@repository = repository
@handle_name_func = handle_name_func
@in_module = in_module
@ordered_row_ids = ordered_row_ids
end
def export!
case @file_type
when :csv
file_data = RepositoryCsvExport.to_csv(@rows, @columns, @user, @repository, @handle_name_func, @in_module)
file_data = RepositoryCsvExport.to_csv(@rows, @columns, @repository, @handle_name_func, @in_module, @ordered_row_ids)
when :xlsx
file_data = RepositoryXlsxExport.to_xlsx(@rows, @columns, @user, @repository, @handle_name_func, @in_module)
file_data = RepositoryXlsxExport.to_xlsx(@rows, @columns, @repository, @handle_name_func, @in_module, @ordered_row_ids)
end
file_data

View file

@ -2,12 +2,12 @@
require 'caxlsx'
module RepositoryXlsxExport
class RepositoryXlsxExport
def self.to_empty_xlsx(repository, column_ids)
package = Axlsx::Package.new
workbook = package.workbook
workbook.add_worksheet(name: 'Data Export') do |sheet|
sheet.add_row prepare_header(repository, column_ids, false)
sheet.add_row build_header(repository, column_ids, false)
end
add_instruction(workbook)
@ -15,7 +15,7 @@ module RepositoryXlsxExport
package.to_stream.read
end
def self.to_xlsx(rows, column_ids, user, repository, handle_file_name_func, in_module)
def self.to_xlsx(rows, column_ids, repository, handle_file_name_func, in_module, ordered_row_ids = nil)
package = Axlsx::Package.new
workbook = package.workbook
datetime_style = workbook.styles.add_style format_code: 'dd-mmm-yyyy hh:mm:ss'
@ -24,61 +24,32 @@ module RepositoryXlsxExport
add_consumption = in_module && !repository.is_a?(RepositorySnapshot) && repository.has_stock_management?
workbook.add_worksheet(name: 'Data Export') do |sheet|
sheet.add_row prepare_header(repository, column_ids, add_consumption)
sheet.add_row build_header(repository, column_ids, add_consumption)
rows.each do |row|
row_data = []
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
row_data << (repository.is_a?(RepositorySnapshot) ? row.parent.code : row.code)
when -4
row_data << row.name
when -5
row_data << row.created_by.full_name
when -6
row_data << row.created_at
when -7
row_data << row.updated_at
when -8
row_data << row.last_modified_by.full_name
when -9
row_data << (row.archived? && row.archived_by.present? ? row.archived_by.full_name : '')
when -10
row_data << row.archived_on
when -11
row_data << row.parent_repository_rows.map(&:code).join(' | ')
row_data << row.child_repository_rows.map(&:code).join(' | ')
else
cell = row.repository_cells.find_by(repository_column_id: c_id)
row_data << if cell
if cell.value_type == 'RepositoryAssetValue' && handle_file_name_func
handle_file_name_func.call(cell.value.asset)
elsif cell.value.is_a?(RepositoryDateTimeValue) || cell.value.is_a?(RepositoryDateValue)
cell.value.data
else
cell.value.export_formatted
end
end
end
rows = rows.preload(:parent) if repository.is_a?(RepositorySnapshot)
rows = rows.left_outer_joins(:created_by, :last_modified_by, :archived_by)
.joins('LEFT OUTER JOIN "users" "created_by" ON "created_by"."id" = "repository_rows"."created_by_id"')
.joins('LEFT OUTER JOIN "users" "last_modified_by" ON "last_modified_by"."id" = "repository_rows"."last_modified_by_id"')
.joins('LEFT OUTER JOIN "users" "archived_by" ON "archived_by"."id" = "repository_rows"."archived_by_id"')
.preload(:parent_repository_rows,
:child_repository_rows,
repository_cells: { repository_column: nil, value: repository.cell_preload_includes })
.select('repository_rows.* AS repository_rows')
.select('created_by.full_name AS created_by_full_name')
.select('last_modified_by.full_name AS last_modified_by_full_name')
.select('archived_by.full_name AS archived_by_full_name')
if ordered_row_ids.present?
rows = rows.order(RepositoryRow.sanitize_sql_for_order([Arel.sql('array_position(ARRAY[?], repository_rows.id)'), ordered_row_ids]))
rows.each do |row|
row_data = build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
sheet.add_row(row_data, style: build_row_style(row_data, datetime_style, date_style))
end
row_data << row.row_consumption(row.stock_consumption) if add_consumption
style = row_data.map do |c|
case c
when ActiveSupport::TimeWithZone
datetime_style
when Time # Date values are of class Time for some reason
date_style
end
else
rows.find_each(batch_size: 100) do |row|
row_data = build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
sheet.add_row(row_data, style: build_row_style(row_data, datetime_style, date_style))
end
sheet.add_row(
row_data,
style: style
)
end
end
@ -87,44 +58,101 @@ module RepositoryXlsxExport
package.to_stream.read
end
def self.add_instruction(workbook)
workbook.add_worksheet(name: 'Instruction') do |sheet|
image = File.expand_path('app/assets/images/import_instruction.png')
sheet.add_image(image_src: image, start_at: 'A1', width: 1260, height: 994)
end
end
class << self
private
def self.prepare_header(repository, column_ids, add_consumption)
header = []
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
header << I18n.t('repositories.table.id')
when -4
header << I18n.t('repositories.table.row_name')
when -5
header << I18n.t('repositories.table.added_by')
when -6
header << I18n.t('repositories.table.added_on')
when -7
header << I18n.t('repositories.table.updated_on')
when -8
header << I18n.t('repositories.table.updated_by')
when -9
header << I18n.t('repositories.table.archived_by')
when -10
header << I18n.t('repositories.table.archived_on')
when -11
header << I18n.t('repositories.table.parents')
header << I18n.t('repositories.table.children')
else
header << repository.repository_columns.find_by(id: c_id)&.name
def add_instruction(workbook)
workbook.add_worksheet(name: 'Instruction') do |sheet|
image = File.expand_path('app/assets/images/import_instruction.png')
sheet.add_image(image_src: image, start_at: 'A1', width: 1260, height: 994)
end
end
header << I18n.t('repositories.table.row_consumption') if add_consumption
header
def build_header(repository, column_ids, add_consumption)
header = []
custom_columns = repository.repository_columns.select(:id, :name)
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
header << I18n.t('repositories.table.id')
when -4
header << I18n.t('repositories.table.row_name')
when -5
header << I18n.t('repositories.table.added_by')
when -6
header << I18n.t('repositories.table.added_on')
when -7
header << I18n.t('repositories.table.updated_on')
when -8
header << I18n.t('repositories.table.updated_by')
when -9
header << I18n.t('repositories.table.archived_by')
when -10
header << I18n.t('repositories.table.archived_on')
when -11
header << I18n.t('repositories.table.parents')
header << I18n.t('repositories.table.children')
else
header << custom_columns.find { |column| column.id == c_id }&.name
end
end
header << I18n.t('repositories.table.row_consumption') if add_consumption
header
end
def build_row(row, column_ids, repository, handle_file_name_func, add_consumption)
row_data = []
column_ids.each do |c_id|
case c_id
when -1, -2
next
when -3
row_data << (repository.is_a?(RepositorySnapshot) ? row.parent.code : row.code)
when -4
row_data << row.name
when -5
row_data << row.created_by_full_name
when -6
row_data << row.created_at
when -7
row_data << row.updated_at
when -8
row_data << row.last_modified_by_full_name
when -9
row_data << (row.archived? && row.archived_by.present? ? row.archived_by_full_name : '')
when -10
row_data << row.archived_on
when -11
row_data << row.parent_repository_rows.map(&:code).join(' | ')
row_data << row.child_repository_rows.map(&:code).join(' | ')
else
cell = row.repository_cells.find { |c| c.repository_column_id == c_id }
row_data << if cell
if cell.value_type == 'RepositoryAssetValue' && handle_file_name_func
handle_file_name_func.call(cell.value.asset)
elsif cell.value.is_a?(RepositoryDateTimeValue) || cell.value.is_a?(RepositoryDateValue)
cell.value.data
else
cell.value.export_formatted
end
end
end
end
row_data << row.row_consumption(row.stock_consumption) if add_consumption
row_data
end
def build_row_style(row_data, datetime_style, date_style)
row_data.map do |c|
case c
when ActiveSupport::TimeWithZone
datetime_style
when Time # Date values are of class Time for some reason
date_style
end
end
end
end
end