2019-09-05 14:35:11 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2017-10-17 20:42:06 +08:00
|
|
|
class SpreadsheetParser
|
|
|
|
# Based on file's extension opens file (used for importing)
|
|
|
|
def self.open_spreadsheet(file)
|
2019-09-30 22:27:45 +08:00
|
|
|
if file.class.name.split('::')[-1] == 'UploadedFile'
|
2019-09-23 19:33:02 +08:00
|
|
|
filename = file.original_filename
|
|
|
|
file_path = file.path
|
|
|
|
else
|
2019-10-17 15:29:19 +08:00
|
|
|
filename = File.basename(file.path)
|
|
|
|
file_path = file.path
|
2019-09-23 19:33:02 +08:00
|
|
|
end
|
2017-10-17 20:42:06 +08:00
|
|
|
|
|
|
|
case File.extname(filename)
|
|
|
|
when '.csv'
|
|
|
|
Roo::CSV.new(file_path, extension: :csv)
|
|
|
|
when '.tsv'
|
|
|
|
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
|
|
|
when '.txt'
|
|
|
|
# This assumption is based purely on biologist's habits
|
|
|
|
Roo::CSV.new(file_path, csv_options: { col_sep: "\t" })
|
|
|
|
when '.xlsx'
|
|
|
|
# Roo Excel parcel was replaced with Creek, but it can be enabled back,
|
|
|
|
# just swap lines below. But only one can be enabled at the same time.
|
2017-12-19 23:17:11 +08:00
|
|
|
Roo::Excelx.new(file_path)
|
|
|
|
# Creek::Book.new(file_path).sheets[0]
|
2017-10-17 20:42:06 +08:00
|
|
|
else
|
|
|
|
raise TypeError
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.spreadsheet_enumerator(sheet)
|
|
|
|
if sheet.is_a?(Roo::CSV)
|
|
|
|
sheet
|
|
|
|
elsif sheet.is_a?(Roo::Excelx)
|
2019-10-21 18:06:45 +08:00
|
|
|
sheet.each_row_streaming(pad_cells: true)
|
2017-10-17 20:42:06 +08:00
|
|
|
else
|
|
|
|
sheet.rows
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.first_two_rows(sheet)
|
|
|
|
rows = spreadsheet_enumerator(sheet)
|
|
|
|
header = []
|
|
|
|
columns = []
|
|
|
|
i = 1
|
2018-07-05 20:08:03 +08:00
|
|
|
rows.each do |row_values|
|
2017-10-17 20:42:06 +08:00
|
|
|
# Creek XLSX parser returns Hash of the row, Roo - Array
|
2018-07-05 20:08:03 +08:00
|
|
|
row = parse_row(row_values, sheet)
|
2017-10-17 20:42:06 +08:00
|
|
|
header = row if i == 1 && row
|
|
|
|
columns = row if i == 2 && row
|
|
|
|
i += 1
|
|
|
|
break if i > 2
|
|
|
|
end
|
|
|
|
return header, columns
|
|
|
|
end
|
2018-01-12 00:52:57 +08:00
|
|
|
|
|
|
|
def self.parse_row(row, sheet)
|
|
|
|
# Creek XLSX parser returns Hash of the row, Roo - Array
|
|
|
|
if row.is_a?(Hash)
|
|
|
|
row.values.map(&:to_s)
|
|
|
|
elsif sheet.is_a?(Roo::Excelx)
|
2019-10-21 18:06:45 +08:00
|
|
|
row.map { |cell| cell&.value&.to_s }
|
2018-01-12 00:52:57 +08:00
|
|
|
else
|
|
|
|
row.map(&:to_s)
|
|
|
|
end
|
|
|
|
end
|
2017-10-17 20:42:06 +08:00
|
|
|
end
|