2019-09-05 14:35:11 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-10-17 20:42:06 +08:00
|
|
|
class SpreadsheetParser
|
|
|
|
# Based on file's extension opens file (used for importing)
|
|
|
|
def self.open_spreadsheet(file)
|
2020-02-03 22:20:01 +08:00
|
|
|
file_path = file.path
|
|
|
|
filename = if file.class.name.split('::')[-1] == 'UploadedFile'
|
|
|
|
file.original_filename
|
|
|
|
else
|
|
|
|
File.basename(file.path)
|
|
|
|
end
|
2017-10-17 20:42:06 +08:00
|
|
|
|
|
|
|
case File.extname(filename)
|
|
|
|
when '.csv'
|
|
|
|
Roo::CSV.new(file_path, extension: :csv)
|
|
|
|
when '.tsv'
|
|
|
|
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
|
|
|
when '.txt'
|
|
|
|
# This assumption is based purely on biologist's habits
|
|
|
|
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
|
|
|
when '.xlsx'
|
2017-12-19 23:17:11 +08:00
|
|
|
Roo::Excelx.new(file_path)
|
2017-10-17 20:42:06 +08:00
|
|
|
else
|
|
|
|
raise TypeError
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.spreadsheet_enumerator(sheet)
|
|
|
|
if sheet.is_a?(Roo::CSV)
|
|
|
|
sheet
|
|
|
|
elsif sheet.is_a?(Roo::Excelx)
|
2019-10-21 18:06:45 +08:00
|
|
|
sheet.each_row_streaming(pad_cells: true)
|
2017-10-17 20:42:06 +08:00
|
|
|
else
|
|
|
|
sheet.rows
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.first_two_rows(sheet)
|
|
|
|
rows = spreadsheet_enumerator(sheet)
|
|
|
|
header = []
|
|
|
|
columns = []
|
2020-05-18 15:52:37 +08:00
|
|
|
rows.take(2).each_with_index do |row_values, i|
|
|
|
|
row = parse_row(row_values, sheet, header: i.zero?)
|
|
|
|
if row && i.zero?
|
|
|
|
header = row
|
|
|
|
else
|
|
|
|
columns = row
|
|
|
|
end
|
2017-10-17 20:42:06 +08:00
|
|
|
end
|
2020-05-18 15:52:37 +08:00
|
|
|
|
2017-10-17 20:42:06 +08:00
|
|
|
return header, columns
|
|
|
|
end
|
2018-01-12 00:52:57 +08:00
|
|
|
|
2020-05-18 15:52:37 +08:00
|
|
|
def self.parse_row(row, sheet, header: false)
|
|
|
|
if sheet.is_a?(Roo::Excelx) && !header
|
2020-04-22 18:30:52 +08:00
|
|
|
row.map do |cell|
|
|
|
|
if cell.is_a?(Roo::Excelx::Cell::Number) && cell.format == 'General'
|
|
|
|
cell&.value&.to_d
|
|
|
|
else
|
|
|
|
cell&.formatted_value
|
|
|
|
end
|
|
|
|
end
|
2018-01-12 00:52:57 +08:00
|
|
|
else
|
|
|
|
row.map(&:to_s)
|
|
|
|
end
|
|
|
|
end
|
2017-10-17 20:42:06 +08:00
|
|
|
end
|