From 92b4d95c721efa77df8b35b22d2974ff38cf067f Mon Sep 17 00:00:00 2001 From: Urban Rotnik Date: Thu, 1 Oct 2020 17:19:14 +0200 Subject: [PATCH 1/7] Implement lists in DOCX --- app/services/reports/docx.rb | 120 +++++++++++++++++-- app/services/reports/docx/private_methods.rb | 54 ++++++++- spec/services/reports/docx/docx_spec.rb | 29 +++++ 3 files changed, 192 insertions(+), 11 deletions(-) create mode 100644 spec/services/reports/docx/docx_spec.rb diff --git a/app/services/reports/docx.rb b/app/services/reports/docx.rb index 4fd93088f..dbe19c94a 100644 --- a/app/services/reports/docx.rb +++ b/app/services/reports/docx.rb @@ -40,8 +40,6 @@ class Reports::Docx end def self.render_p_element(docx, element, options = {}) - scinote_url = options[:scinote_url] - link_style = options[:link_style] docx.p do element[:children].each do |text_el| if text_el[:type] == 'text' @@ -51,18 +49,25 @@ class Reports::Docx elsif text_el[:type] == 'br' && !options[:skip_br] br elsif text_el[:type] == 'a' - if text_el[:link] - link_url = Reports::Docx.link_prepare(scinote_url, text_el[:link]) - link text_el[:value], link_url, link_style - else - text text_el[:value], link_style - end - text ' ' if text_el[:value] != '' + Reports::Docx.render_link_element(self, text_el, options) end end end end + def self.render_link_element(node, link_item, options = {}) + scinote_url = options[:scinote_url] + link_style = options[:link_style] + + if link_item[:link] + link_url = Reports::Docx.link_prepare(scinote_url, link_item[:link]) + node.link link_item[:value], link_url, link_style + else + node.text link_item[:value], link_style + end + node.text ' ' if link_item[:value] != '' + end + def self.render_img_element(docx, element, options = {}) style = element[:style] @@ -81,6 +86,103 @@ class Reports::Docx align style[:align] || :left end end + + def self.render_list_element(docx, element) + bookmark_items = Reports::Docx.recursive_list_items_renderer(docx, element) + + bookmark_items.each_with_index do |(key, item), index| + if item[:type] == 'image' + docx.bookmark_start id: index, name: key + Reports::Docx.render_img_element(docx, item) + docx.bookmark_end id: index + elsif item[:type] == 'table' + docx.bookmark_start id: index, name: key + # How to draw table here? + # docx = Caracal::Document + # self = Reports::Docx + # But you have instance method on Reports::Docx. How to access Reports::Docx of current docx? + # docx.tiny_mce_table(item) + docx.p do + text 'Table here soon' + end + docx.bookmark_end id: index + end + end + end + + # rubocop:disable Metrics/BlockLength + + def self.recursive_list_items_renderer(node, element, bookmark_items: {}) + node.public_send(element[:type]) do + element[:data].each do |values_array| + li do + values_array.each do |item| + case item + when Hash + if %w(ul ol li).include?(item[:type]) + Reports::Docx.recursive_list_items_renderer(self, item, bookmark_items: bookmark_items) + elsif %w(a).include?(item[:type]) + Reports::Docx.render_link_element(self, item) + elsif %w(image).include?(item[:type]) + bookmark_items[item[:bookmark_id]] = item + link 'Appended image', item[:bookmark_id] do + internal true + end + elsif %w(table).include?(item[:type]) + bookmark_items[item[:bookmark_id]] = item + link 'Appended table', item[:bookmark_id] do + internal true + end + end + else + text item + end + end + end + end + end + bookmark_items + end + + # Testing renderer, will be removed + def self.render_list_element1(docx, _elem) + docx.ol do + li 'some' + li do + text 'kekec' + text 'kekec2' + text 'kekec3' + ul do + li 'nes1' + li 'nes2' do + ul do + li '3 level1' + li '3 leve 2' do + link 'Click Here', 'https://image.shutterstock.com/image-vector/example-stamp-260nw-426673501.jpg' + p do + text 'Click Here', 'https://image.shutterstock.com/image-vector/example-stamp-260nw-426673501.jpg' + end + end + end + end + li 'nes3' + li do + bookmark_start id: 'img1', name: 'image1' + text 'bookmark is here' + bookmark_end id: 'img1' + end + end + end + li 'som3' + li 'some4' + end + docx.p do + bookmark_start id: 'img1', name: 'image1' + text 'bookmark is here' + bookmark_end id: 'img1' + end + end end +# rubocop:enable Metrics/BlockLength # rubocop:enable Style/ClassAndModuleChildren diff --git a/app/services/reports/docx/private_methods.rb b/app/services/reports/docx/private_methods.rb index 257d71e4b..8e3d3b664 100644 --- a/app/services/reports/docx/private_methods.rb +++ b/app/services/reports/docx/private_methods.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module Reports::Docx::PrivateMethods - private + # private #Commented out just for testing. # RTE fields support def html_to_word_converter(text) @@ -33,15 +33,18 @@ module Reports::Docx::PrivateMethods end elsif elem[:type] == 'image' Reports::Docx.render_img_element(@docx, elem) + elsif %w(ul ol).include?(elem[:type]) + Reports::Docx.render_list_element(@docx, elem) end end end def combine_docx_elements(raw_elements) + # Word does not support some nested elements, move some elements to root level elements = [] temp_p = [] raw_elements.each do |elem| - if %w(image newline table).include? elem[:type] + if %w(image newline table ol ul).include? elem[:type] unless temp_p.empty? elements.push(type: 'p', children: temp_p) temp_p = [] @@ -114,6 +117,11 @@ module Reports::Docx::PrivateMethods next end + if %w(ul ol).include?(elem.name) + elements.push(list_element(elem)) + next + end + elements = recursive_children(elem.children, elements) if elem.children end elements @@ -135,6 +143,48 @@ module Reports::Docx::PrivateMethods } end + # rubocop:disable Metrics/BlockLength + def list_element(list_element) + data_array = list_element.children.select { |n| %w(li ul ol a img).include?(n.name) }.map do |li_child| + li_child.children.map do |item| + if item.is_a? Nokogiri::XML::Text + item.text.chomp + elsif %w(ul ol).include?(item.name) + list_element(item) + elsif %w(a).include?(item.name) + link_element(item) + elsif %w(img).include?(item.name) + + # this will be extracted to new method with code from line 85 + next unless item.attributes['data-mce-token'] + + image = TinyMceAsset.find_by(id: Base62.decode(item.attributes['data-mce-token'].value)) + next unless image + + image_path = image_path(image.image) + dimension = FastImage.size(image_path) + + next unless dimension + + style = image_styling(item, dimension) + + { + type: 'image', + data: image_path.split('&')[0], + blob: image.blob, + style: style, + bookmark_id: SecureRandom.hex + } + elsif %w(table).include?(item.name) + item = tiny_mce_table(item, nested_table: true) + { type: 'table', data: item, bookmark_id: SecureRandom.hex } + end + end.reject(&:blank?) + end + { type: list_element.name, data: data_array } + end + # rubocop:enable Metrics/BlockLength + def smart_annotation_check(elem) return "[#{elem.text}]" if elem.parent.attributes['class']&.value == 'sa-type' diff --git a/spec/services/reports/docx/docx_spec.rb b/spec/services/reports/docx/docx_spec.rb new file mode 100644 index 000000000..ad8ad6cbf --- /dev/null +++ b/spec/services/reports/docx/docx_spec.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe Reports::Docx do + let(:user) { create :user } + let(:team) { create :team } + let(:docx) { double('docx') } + let(:report) { described_class.new({}.to_json, docx, user: user, team: team, scinote_url: 'scinote.test') } + + describe 'html_list' do + let(:text) do + '' + end + let(:xml_elements) { Nokogiri::HTML(text).css('body').children.first } + let(:result) do + { + type: 'ul', + data: [%w(1), + ['2', { type: 'ul', data: [%w(one), ['two', { type: 'ol', data: [%w(uno), %w(due)] }]] }], + %w(3), %w(4), %w(5)] + } + end + it '' do + expect(report.send(:list_element, xml_elements)).to be == result + end + end +end From dfca798e2dedbbd4375742da7a68a7f48df8d53c Mon Sep 17 00:00:00 2001 From: Urban Rotnik Date: Fri, 9 Oct 2020 08:22:38 +0200 Subject: [PATCH 2/7] Refactored tiny_mce_table parser and renderer --- app/services/reports/docx.rb | 34 ++++-- app/services/reports/docx/private_methods.rb | 103 +++++++++++-------- spec/services/reports/docx/docx_spec.rb | 36 ++++++- 3 files changed, 124 insertions(+), 49 deletions(-) diff --git a/app/services/reports/docx.rb b/app/services/reports/docx.rb index dbe19c94a..3a937d6aa 100644 --- a/app/services/reports/docx.rb +++ b/app/services/reports/docx.rb @@ -87,7 +87,7 @@ class Reports::Docx end end - def self.render_list_element(docx, element) + def self.render_list_element(docx, element, options = {}) bookmark_items = Reports::Docx.recursive_list_items_renderer(docx, element) bookmark_items.each_with_index do |(key, item), index| @@ -97,14 +97,11 @@ class Reports::Docx docx.bookmark_end id: index elsif item[:type] == 'table' docx.bookmark_start id: index, name: key - # How to draw table here? - # docx = Caracal::Document - # self = Reports::Docx - # But you have instance method on Reports::Docx. How to access Reports::Docx of current docx? - # docx.tiny_mce_table(item) + docx.p do - text 'Table here soon' + text '' end + Reports::Docx.render_table_element(docx, item, options) docx.bookmark_end id: index end end @@ -144,6 +141,29 @@ class Reports::Docx bookmark_items end + def self.render_table_element(docx, element, options = {}) + docx_table = [] + element[:data].each do |row| + docx_row = [] + row[:data].each do |cell| + docx_cell = Caracal::Core::Models::TableCellModel.new do |c| + cell.each do |content| + if content[:type] == 'p' + Reports::Docx.render_p_element(c, content, options.merge({ skip_br: true })) + elsif content[:type] == 'table' + Reports::Docx.render_table_element(c, content, options) + elsif content[:type] == 'image' + Reports::Docx.render_img_element(c, content, table: { columns: row.children.length / 3 }) + end + end + end + docx_row.push(docx_cell) + end + docx_table.push(docx_row) + end + docx.table docx_table, border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE + end + # Testing renderer, will be removed def self.render_list_element1(docx, _elem) docx.ol do diff --git a/app/services/reports/docx/private_methods.rb b/app/services/reports/docx/private_methods.rb index 8e3d3b664..8b15bbe20 100644 --- a/app/services/reports/docx/private_methods.rb +++ b/app/services/reports/docx/private_methods.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module Reports::Docx::PrivateMethods - # private #Commented out just for testing. + private # RTE fields support def html_to_word_converter(text) @@ -16,7 +16,8 @@ module Reports::Docx::PrivateMethods if elem[:type] == 'p' Reports::Docx.render_p_element(@docx, elem, scinote_url: @scinote_url, link_style: @link_style) elsif elem[:type] == 'table' - tiny_mce_table(elem[:data]) + # tiny_mce_table(elem[:data]) + Reports::Docx.render_table_element(@docx, elem) elsif elem[:type] == 'newline' style = elem[:style] || {} # print heading if its heading @@ -59,7 +60,7 @@ module Reports::Docx::PrivateMethods end # Convert HTML structure to plain text structure - def recursive_children(children, elements, options = {}) + def recursive_children(children, elements, _options = {}) children.each do |elem| if elem.class == Nokogiri::XML::Text next if elem.text.strip == ' ' # Invisible symbol @@ -109,11 +110,12 @@ module Reports::Docx::PrivateMethods end if elem.name == 'table' - elem = tiny_mce_table(elem, nested_table: true) if options[:nested_tables] - elements.push( - type: 'table', - data: elem - ) + # elem = tiny_mce_table(elem, nested_table: true) if options[:nested_tables] + # elements.push( + # type: 'table', + # data: elem + # ) + elements.push(tiny_mce_table_element(elem)) next end @@ -176,8 +178,7 @@ module Reports::Docx::PrivateMethods bookmark_id: SecureRandom.hex } elsif %w(table).include?(item.name) - item = tiny_mce_table(item, nested_table: true) - { type: 'table', data: item, bookmark_id: SecureRandom.hex } + { type: 'table', data: tiny_mce_table_element(item)[:data], bookmark_id: SecureRandom.hex } end end.reject(&:blank?) end @@ -320,46 +321,66 @@ module Reports::Docx::PrivateMethods } end - def tiny_mce_table(table_data, options = {}) - docx_table = [] - scinote_url = @scinote_url - link_style = @link_style - table_data.css('tbody').first.children.each do |row| - docx_row = [] + def tiny_mce_table_element(table_element) + # array of elements + rows = table_element.css('tbody').first.children.map do |row| next unless row.name == 'tr' - row.children.each do |cell| + cells = row.children.map do |cell| next unless cell.name == 'td' # Parse cell content - formated_cell = recursive_children(cell.children, [], nested_tables: true) + formated_cell = recursive_children(cell.children, []) + # Combine text elements to single paragraph formated_cell = combine_docx_elements(formated_cell) - - docx_cell = Caracal::Core::Models::TableCellModel.new do |c| - formated_cell.each do |cell_content| - if cell_content[:type] == 'p' - Reports::Docx.render_p_element(c, cell_content, - scinote_url: scinote_url, link_style: link_style, skip_br: true) - elsif cell_content[:type] == 'table' - c.table formated_cell_content[:data], border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE - elsif cell_content[:type] == 'image' - Reports::Docx.render_img_element(c, cell_content, table: { columns: row.children.length / 3 }) - end - end - end - docx_row.push(docx_cell) - end - docx_table.push(docx_row) - end - - if options[:nested_table] - docx_table - else - @docx.table docx_table, border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE - end + formated_cell + end.reject(&:blank?) + { type: 'tr', data: cells } + end.reject(&:blank?) + { type: 'table', data: rows } end + # def tiny_mce_table(table_data, options = {}) + # docx_table = [] + # scinote_url = @scinote_url + # link_style = @link_style + # table_data.css('tbody').first.children.each do |row| + # docx_row = [] + # next unless row.name == 'tr' + # + # row.children.each do |cell| + # next unless cell.name == 'td' + # + # # Parse cell content + # formated_cell = recursive_children(cell.children, [], nested_tables: true) + # # Combine text elements to single paragraph + # formated_cell = combine_docx_elements(formated_cell) + # + # docx_cell = Caracal::Core::Models::TableCellModel.new do |c| + # formated_cell.each do |cell_content| + # if cell_content[:type] == 'p' + # Reports::Docx.render_p_element(c, cell_content, + # scinote_url: scinote_url, link_style: link_style, skip_br: true) + # elsif cell_content[:type] == 'table' + # c.table formated_cell_content[:data], border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE + # elsif cell_content[:type] == 'image' + # Reports::Docx.render_img_element(c, cell_content, table: { columns: row.children.length / 3 }) + # end + # end + # end + # docx_row.push(docx_cell) + # end + # docx_table.push(docx_row) + # end + # + # if options[:nested_table] + # docx_table + # else + # @docx.table docx_table, border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE + # end + # end + def image_path(attachment) attachment.service_url end diff --git a/spec/services/reports/docx/docx_spec.rb b/spec/services/reports/docx/docx_spec.rb index ad8ad6cbf..16b1b3ac8 100644 --- a/spec/services/reports/docx/docx_spec.rb +++ b/spec/services/reports/docx/docx_spec.rb @@ -23,7 +23,41 @@ describe Reports::Docx do } end it '' do - expect(report.send(:list_element, xml_elements)).to be == result + expect(report.__send__(:list_element, xml_elements)).to be == result + end + end + + describe '.tiny_mce_table_element' do + let(:text) do + # rubocop:disable Layout/LineLength + '
12
34
' + # rubocop:enable Layout/LineLength + end + let(:xml_elements) { Nokogiri::HTML(text).css('body').children.first } + let(:result) do + { + data: [ + { + data: [ + [{ children: [{ style: {}, type: 'text', value: '1' }], type: 'p' }], + [{ children: [{ style: {}, type: 'text', value: '2' }], type: 'p' }] + ], + type: 'tr' + }, + { + data: [ + [{ children: [{ style: {}, type: 'text', value: '3' }], type: 'p' }], + [{ children: [{ style: {}, type: 'text', value: '4' }], type: 'p' }] + ], + type: 'tr' + } + ], + type: 'table' + } + end + + it '' do + expect(report.__send__(:tiny_mce_table_element, xml_elements)).to be == result end end end From 274293348ea3bd19a16c2f921a794532c27fcb18 Mon Sep 17 00:00:00 2001 From: Urban Rotnik Date: Thu, 1 Oct 2020 17:19:14 +0200 Subject: [PATCH 3/7] Clean up --- app/services/reports/docx.rb | 51 ++------- app/services/reports/docx/private_methods.rb | 110 ++++--------------- 2 files changed, 29 insertions(+), 132 deletions(-) diff --git a/app/services/reports/docx.rb b/app/services/reports/docx.rb index 3a937d6aa..a8cdad749 100644 --- a/app/services/reports/docx.rb +++ b/app/services/reports/docx.rb @@ -93,12 +93,18 @@ class Reports::Docx bookmark_items.each_with_index do |(key, item), index| if item[:type] == 'image' docx.bookmark_start id: index, name: key + docx.p do + br + text item[:blob]&.filename.to_s + end Reports::Docx.render_img_element(docx, item) docx.bookmark_end id: index elsif item[:type] == 'table' docx.bookmark_start id: index, name: key + # Bookmark won't work with table only, empty p element added docx.p do + br text '' end Reports::Docx.render_table_element(docx, item, options) @@ -108,7 +114,6 @@ class Reports::Docx end # rubocop:disable Metrics/BlockLength - def self.recursive_list_items_renderer(node, element, bookmark_items: {}) node.public_send(element[:type]) do element[:data].each do |values_array| @@ -122,7 +127,7 @@ class Reports::Docx Reports::Docx.render_link_element(self, item) elsif %w(image).include?(item[:type]) bookmark_items[item[:bookmark_id]] = item - link 'Appended image', item[:bookmark_id] do + link "Appended image - #{item[:blob]&.filename}", item[:bookmark_id] do internal true end elsif %w(table).include?(item[:type]) @@ -140,6 +145,7 @@ class Reports::Docx end bookmark_items end + # rubocop:enable Metrics/BlockLength def self.render_table_element(docx, element, options = {}) docx_table = [] @@ -163,46 +169,5 @@ class Reports::Docx end docx.table docx_table, border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE end - - # Testing renderer, will be removed - def self.render_list_element1(docx, _elem) - docx.ol do - li 'some' - li do - text 'kekec' - text 'kekec2' - text 'kekec3' - ul do - li 'nes1' - li 'nes2' do - ul do - li '3 level1' - li '3 leve 2' do - link 'Click Here', 'https://image.shutterstock.com/image-vector/example-stamp-260nw-426673501.jpg' - p do - text 'Click Here', 'https://image.shutterstock.com/image-vector/example-stamp-260nw-426673501.jpg' - end - end - end - end - li 'nes3' - li do - bookmark_start id: 'img1', name: 'image1' - text 'bookmark is here' - bookmark_end id: 'img1' - end - end - end - li 'som3' - li 'some4' - end - docx.p do - bookmark_start id: 'img1', name: 'image1' - text 'bookmark is here' - bookmark_end id: 'img1' - end - end end - -# rubocop:enable Metrics/BlockLength # rubocop:enable Style/ClassAndModuleChildren diff --git a/app/services/reports/docx/private_methods.rb b/app/services/reports/docx/private_methods.rb index 8b15bbe20..ac78facb4 100644 --- a/app/services/reports/docx/private_methods.rb +++ b/app/services/reports/docx/private_methods.rb @@ -16,7 +16,6 @@ module Reports::Docx::PrivateMethods if elem[:type] == 'p' Reports::Docx.render_p_element(@docx, elem, scinote_url: @scinote_url, link_style: @link_style) elsif elem[:type] == 'table' - # tiny_mce_table(elem[:data]) Reports::Docx.render_table_element(@docx, elem) elsif elem[:type] == 'newline' style = elem[:style] || {} @@ -60,7 +59,7 @@ module Reports::Docx::PrivateMethods end # Convert HTML structure to plain text structure - def recursive_children(children, elements, _options = {}) + def recursive_children(children, elements) children.each do |elem| if elem.class == Nokogiri::XML::Text next if elem.text.strip == ' ' # Invisible symbol @@ -83,24 +82,8 @@ module Reports::Docx::PrivateMethods next end - if elem.name == 'img' && elem.attributes['data-mce-token'] - - image = TinyMceAsset.find_by(id: Base62.decode(elem.attributes['data-mce-token'].value)) - next unless image - - image_path = image_path(image.image) - dimension = FastImage.size(image_path) - - next unless dimension - - style = image_styling(elem, dimension) - - elements.push( - type: 'image', - data: image_path.split('&')[0], - blob: image.blob, - style: style - ) + if elem.name == 'img' + elements.push(img_element(elem)) next end @@ -110,11 +93,6 @@ module Reports::Docx::PrivateMethods end if elem.name == 'table' - # elem = tiny_mce_table(elem, nested_table: true) if options[:nested_tables] - # elements.push( - # type: 'table', - # data: elem - # ) elements.push(tiny_mce_table_element(elem)) next end @@ -129,6 +107,22 @@ module Reports::Docx::PrivateMethods elements end + def img_element(elem) + return unless elem.attributes['data-mce-token'] + + image = TinyMceAsset.find_by(id: Base62.decode(elem.attributes['data-mce-token'].value)) + return unless image + + image_path = image_path(image.image) + dimension = FastImage.size(image_path) + + return unless dimension + + style = image_styling(elem, dimension) + + { type: 'image', data: image_path.split('&')[0], blob: image.blob, style: style } + end + def link_element(elem) text = elem.text link = elem.attributes['href'].value if elem.attributes['href'] @@ -145,7 +139,6 @@ module Reports::Docx::PrivateMethods } end - # rubocop:disable Metrics/BlockLength def list_element(list_element) data_array = list_element.children.select { |n| %w(li ul ol a img).include?(n.name) }.map do |li_child| li_child.children.map do |item| @@ -156,35 +149,14 @@ module Reports::Docx::PrivateMethods elsif %w(a).include?(item.name) link_element(item) elsif %w(img).include?(item.name) - - # this will be extracted to new method with code from line 85 - next unless item.attributes['data-mce-token'] - - image = TinyMceAsset.find_by(id: Base62.decode(item.attributes['data-mce-token'].value)) - next unless image - - image_path = image_path(image.image) - dimension = FastImage.size(image_path) - - next unless dimension - - style = image_styling(item, dimension) - - { - type: 'image', - data: image_path.split('&')[0], - blob: image.blob, - style: style, - bookmark_id: SecureRandom.hex - } + img_element(item).merge(bookmark_id: SecureRandom.hex) elsif %w(table).include?(item.name) - { type: 'table', data: tiny_mce_table_element(item)[:data], bookmark_id: SecureRandom.hex } + tiny_mce_table_element(item).merge(bookmark_id: SecureRandom.hex) end end.reject(&:blank?) end { type: list_element.name, data: data_array } end - # rubocop:enable Metrics/BlockLength def smart_annotation_check(elem) return "[#{elem.text}]" if elem.parent.attributes['class']&.value == 'sa-type' @@ -341,46 +313,6 @@ module Reports::Docx::PrivateMethods { type: 'table', data: rows } end - # def tiny_mce_table(table_data, options = {}) - # docx_table = [] - # scinote_url = @scinote_url - # link_style = @link_style - # table_data.css('tbody').first.children.each do |row| - # docx_row = [] - # next unless row.name == 'tr' - # - # row.children.each do |cell| - # next unless cell.name == 'td' - # - # # Parse cell content - # formated_cell = recursive_children(cell.children, [], nested_tables: true) - # # Combine text elements to single paragraph - # formated_cell = combine_docx_elements(formated_cell) - # - # docx_cell = Caracal::Core::Models::TableCellModel.new do |c| - # formated_cell.each do |cell_content| - # if cell_content[:type] == 'p' - # Reports::Docx.render_p_element(c, cell_content, - # scinote_url: scinote_url, link_style: link_style, skip_br: true) - # elsif cell_content[:type] == 'table' - # c.table formated_cell_content[:data], border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE - # elsif cell_content[:type] == 'image' - # Reports::Docx.render_img_element(c, cell_content, table: { columns: row.children.length / 3 }) - # end - # end - # end - # docx_row.push(docx_cell) - # end - # docx_table.push(docx_row) - # end - # - # if options[:nested_table] - # docx_table - # else - # @docx.table docx_table, border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE - # end - # end - def image_path(attachment) attachment.service_url end From ae8b2cbf8f503b764096d4a34391e9ed450b75ed Mon Sep 17 00:00:00 2001 From: Urban Rotnik Date: Tue, 13 Oct 2020 08:37:32 +0200 Subject: [PATCH 4/7] Add text to I18n --- app/services/reports/docx.rb | 5 +++-- config/locales/en.yml | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/app/services/reports/docx.rb b/app/services/reports/docx.rb index a8cdad749..0142da60f 100644 --- a/app/services/reports/docx.rb +++ b/app/services/reports/docx.rb @@ -127,12 +127,13 @@ class Reports::Docx Reports::Docx.render_link_element(self, item) elsif %w(image).include?(item[:type]) bookmark_items[item[:bookmark_id]] = item - link "Appended image - #{item[:blob]&.filename}", item[:bookmark_id] do + link I18n.t('projects.reports.renderers.lists.appended_image', + name: item[:blob]&.filename), item[:bookmark_id] do internal true end elsif %w(table).include?(item[:type]) bookmark_items[item[:bookmark_id]] = item - link 'Appended table', item[:bookmark_id] do + link I18n.t('projects.reports.renderers.lists.appended_table'), item[:bookmark_id] do internal true end end diff --git a/config/locales/en.yml b/config/locales/en.yml index a2f4d4a50..ba2acfbc2 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -453,6 +453,10 @@ en: nothing_selected: "Nothing selected" generate_PDF: generated_on: "Report generated by SciNote on: %{timestamp}" + renderers: + lists: + appended_image: "Appended image - %{name}" + appended_table: "Appended table" elements: modals: project_contents: From 1b860afe302a9a2ae46d04ec33829d24caf33ba9 Mon Sep 17 00:00:00 2001 From: Urban Rotnik Date: Tue, 13 Oct 2020 09:45:13 +0200 Subject: [PATCH 5/7] Move TinyMCE parser to separate class --- app/services/reports/docx/private_methods.rb | 275 ------------------ app/services/reports/docx/tiny_mce_parser.rb | 281 +++++++++++++++++++ 2 files changed, 281 insertions(+), 275 deletions(-) create mode 100644 app/services/reports/docx/tiny_mce_parser.rb diff --git a/app/services/reports/docx/private_methods.rb b/app/services/reports/docx/private_methods.rb index ac78facb4..3a17a5d77 100644 --- a/app/services/reports/docx/private_methods.rb +++ b/app/services/reports/docx/private_methods.rb @@ -3,245 +3,6 @@ module Reports::Docx::PrivateMethods private - # RTE fields support - def html_to_word_converter(text) - html = Nokogiri::HTML(text) - raw_elements = recursive_children(html.css('body').children, []) - - # Combined raw text blocks in paragraphs - elements = combine_docx_elements(raw_elements) - - # Draw elements - elements.each do |elem| - if elem[:type] == 'p' - Reports::Docx.render_p_element(@docx, elem, scinote_url: @scinote_url, link_style: @link_style) - elsif elem[:type] == 'table' - Reports::Docx.render_table_element(@docx, elem) - elsif elem[:type] == 'newline' - style = elem[:style] || {} - # print heading if its heading - # Mixing heading with other style setting causes problems for Word - if %w(h1 h2 h3 h4 h5).include?(style[:style]) - @docx.public_send(style[:style], elem[:value]) - else - @docx.p elem[:value] do - align style[:align] - color style[:color] - bold style[:bold] - italic style[:italic] - end - end - elsif elem[:type] == 'image' - Reports::Docx.render_img_element(@docx, elem) - elsif %w(ul ol).include?(elem[:type]) - Reports::Docx.render_list_element(@docx, elem) - end - end - end - - def combine_docx_elements(raw_elements) - # Word does not support some nested elements, move some elements to root level - elements = [] - temp_p = [] - raw_elements.each do |elem| - if %w(image newline table ol ul).include? elem[:type] - unless temp_p.empty? - elements.push(type: 'p', children: temp_p) - temp_p = [] - end - elements.push(elem) - elsif %w(br text a).include? elem[:type] - temp_p.push(elem) - end - end - elements.push(type: 'p', children: temp_p) - elements - end - - # Convert HTML structure to plain text structure - def recursive_children(children, elements) - children.each do |elem| - if elem.class == Nokogiri::XML::Text - next if elem.text.strip == ' ' # Invisible symbol - - style = paragraph_styling(elem.parent) - type = (style[:align] && style[:align] != :justify) || style[:style] ? 'newline' : 'text' - - text = smart_annotation_check(elem) - - elements.push( - type: type, - value: text.strip.delete(' '), # Invisible symbol - style: style - ) - next - end - - if elem.name == 'br' - elements.push(type: 'br') - next - end - - if elem.name == 'img' - elements.push(img_element(elem)) - next - end - - if elem.name == 'a' - elements.push(link_element(elem)) - next - end - - if elem.name == 'table' - elements.push(tiny_mce_table_element(elem)) - next - end - - if %w(ul ol).include?(elem.name) - elements.push(list_element(elem)) - next - end - - elements = recursive_children(elem.children, elements) if elem.children - end - elements - end - - def img_element(elem) - return unless elem.attributes['data-mce-token'] - - image = TinyMceAsset.find_by(id: Base62.decode(elem.attributes['data-mce-token'].value)) - return unless image - - image_path = image_path(image.image) - dimension = FastImage.size(image_path) - - return unless dimension - - style = image_styling(elem, dimension) - - { type: 'image', data: image_path.split('&')[0], blob: image.blob, style: style } - end - - def link_element(elem) - text = elem.text - link = elem.attributes['href'].value if elem.attributes['href'] - if elem.attributes['class']&.value == 'record-info-link' - link = nil - text = "##{text}" - end - text = "##{text}" if elem.parent.attributes['class']&.value == 'atwho-inserted' - text = "@#{text}" if elem.attributes['class']&.value == 'atwho-user-popover' - { - type: 'a', - value: text, - link: link - } - end - - def list_element(list_element) - data_array = list_element.children.select { |n| %w(li ul ol a img).include?(n.name) }.map do |li_child| - li_child.children.map do |item| - if item.is_a? Nokogiri::XML::Text - item.text.chomp - elsif %w(ul ol).include?(item.name) - list_element(item) - elsif %w(a).include?(item.name) - link_element(item) - elsif %w(img).include?(item.name) - img_element(item).merge(bookmark_id: SecureRandom.hex) - elsif %w(table).include?(item.name) - tiny_mce_table_element(item).merge(bookmark_id: SecureRandom.hex) - end - end.reject(&:blank?) - end - { type: list_element.name, data: data_array } - end - - def smart_annotation_check(elem) - return "[#{elem.text}]" if elem.parent.attributes['class']&.value == 'sa-type' - - elem.text - end - - # Prepare style for text - def paragraph_styling(elem) - style = elem.attributes['style'] - result = {} - result[:style] = elem.name if elem.name.include? 'h' - result[:bold] = true if elem.name == 'strong' - result[:italic] = true if elem.name == 'em' - style_keys = %w(text-align color) - - if style - style_keys.each do |key| - style_el = style.value.split(';').select { |i| (i.include? key) }[0] - next unless style_el - - value = style_el.split(':')[1].strip if style_el - if key == 'text-align' - result[:align] = value.to_sym - elsif key == 'color' && calculate_color_hsp(value) < 190 - result[:color] = value.delete('#') - end - end - end - result - end - - # Prepare style for images - def image_styling(elem, dimension) - dimension[0] = elem.attributes['width'].value.to_i if elem.attributes['width'] - dimension[1] = elem.attributes['height'].value.to_i if elem.attributes['height'] - - if elem.attributes['style'] - align = if elem.attributes['style'].value.include? 'margin-right' - :center - elsif elem.attributes['style'].value.include? 'float: right' - :right - else - :left - end - end - - margins = Constants::REPORT_DOCX_MARGIN_LEFT + Constants::REPORT_DOCX_MARGIN_RIGHT - max_width = (Constants::REPORT_DOCX_WIDTH - margins) / 20 - - if dimension[0] > max_width - x = max_width - y = dimension[1] * max_width / dimension[0] - else - x = dimension[0] - y = dimension[1] - end - - { - width: x, - height: y, - align: align, - max_width: max_width - } - end - - def asset_image_preparing(asset) - return unless asset - - image_path = image_path(asset.file) - - dimension = FastImage.size(image_path) - x = dimension[0] - y = dimension[1] - if x > 300 - y = y * 300 / x - x = 300 - end - @docx.img image_path.split('&')[0] do - data asset.blob.download - width x - height y - end - end - def initial_document_load @docx.page_size do width Constants::REPORT_DOCX_WIDTH @@ -292,40 +53,4 @@ module Reports::Docx::PrivateMethods green: '2dbe61' } end - - def tiny_mce_table_element(table_element) - # array of elements - rows = table_element.css('tbody').first.children.map do |row| - next unless row.name == 'tr' - - cells = row.children.map do |cell| - next unless cell.name == 'td' - - # Parse cell content - formated_cell = recursive_children(cell.children, []) - - # Combine text elements to single paragraph - formated_cell = combine_docx_elements(formated_cell) - formated_cell - end.reject(&:blank?) - { type: 'tr', data: cells } - end.reject(&:blank?) - { type: 'table', data: rows } - end - - def image_path(attachment) - attachment.service_url - end - - def calculate_color_hsp(color) - return 255 if color.length != 7 - - color = color.delete('#').scan(/.{1,2}/) - rgb = color.map(&:hex) - Math.sqrt( - 0.299 * (rgb[0]**2) + - 0.587 * (rgb[1]**2) + - 0.114 * (rgb[2]**2) - ) - end end diff --git a/app/services/reports/docx/tiny_mce_parser.rb b/app/services/reports/docx/tiny_mce_parser.rb new file mode 100644 index 000000000..223ec2be0 --- /dev/null +++ b/app/services/reports/docx/tiny_mce_parser.rb @@ -0,0 +1,281 @@ +# frozen_string_literal: true + +module Reports::Docx::TinyMceParser + private + + def html_to_word_converter(text) + html = Nokogiri::HTML(text) + raw_elements = recursive_children(html.css('body').children, []) + + # Combined raw text blocks in paragraphs + elements = combine_docx_elements(raw_elements) + + # Draw elements + elements.each do |elem| + if elem[:type] == 'p' + Reports::Docx.render_p_element(@docx, elem, scinote_url: @scinote_url, link_style: @link_style) + elsif elem[:type] == 'table' + Reports::Docx.render_table_element(@docx, elem) + elsif elem[:type] == 'newline' + style = elem[:style] || {} + # print heading if its heading + # Mixing heading with other style setting causes problems for Word + if %w(h1 h2 h3 h4 h5).include?(style[:style]) + @docx.public_send(style[:style], elem[:value]) + else + @docx.p elem[:value] do + align style[:align] + color style[:color] + bold style[:bold] + italic style[:italic] + end + end + elsif elem[:type] == 'image' + Reports::Docx.render_img_element(@docx, elem) + elsif %w(ul ol).include?(elem[:type]) + Reports::Docx.render_list_element(@docx, elem) + end + end + end + + def combine_docx_elements(raw_elements) + # Word does not support some nested elements, move some elements to root level + elements = [] + temp_p = [] + raw_elements.each do |elem| + if %w(image newline table ol ul).include? elem[:type] + unless temp_p.empty? + elements.push(type: 'p', children: temp_p) + temp_p = [] + end + elements.push(elem) + elsif %w(br text a).include? elem[:type] + temp_p.push(elem) + end + end + elements.push(type: 'p', children: temp_p) + elements + end + + # Convert HTML structure to plain text structure + # rubocop:disable Metrics/BlockLength + def recursive_children(children, elements) + children.each do |elem| + if elem.class == Nokogiri::XML::Text + next if elem.text.strip == ' ' # Invisible symbol + + style = paragraph_styling(elem.parent) + type = (style[:align] && style[:align] != :justify) || style[:style] ? 'newline' : 'text' + + text = smart_annotation_check(elem) + + elements.push( + type: type, + value: text.strip.delete(' '), # Invisible symbol + style: style + ) + next + end + + if elem.name == 'br' + elements.push(type: 'br') + next + end + + if elem.name == 'img' + elements.push(img_element(elem)) + next + end + + if elem.name == 'a' + elements.push(link_element(elem)) + next + end + + if elem.name == 'table' + elements.push(tiny_mce_table_element(elem)) + next + end + + if %w(ul ol).include?(elem.name) + elements.push(list_element(elem)) + next + end + + elements = recursive_children(elem.children, elements) if elem.children + end + elements + end + # rubocop:enable Metrics/BlockLength + + def img_element(elem) + return unless elem.attributes['data-mce-token'] + + image = TinyMceAsset.find_by(id: Base62.decode(elem.attributes['data-mce-token'].value)) + return unless image + + image_path = image_path(image.image) + dimension = FastImage.size(image_path) + + return unless dimension + + style = image_styling(elem, dimension) + + { type: 'image', data: image_path.split('&')[0], blob: image.blob, style: style } + end + + def link_element(elem) + text = elem.text + link = elem.attributes['href'].value if elem.attributes['href'] + if elem.attributes['class']&.value == 'record-info-link' + link = nil + text = "##{text}" + end + text = "##{text}" if elem.parent.attributes['class']&.value == 'atwho-inserted' + text = "@#{text}" if elem.attributes['class']&.value == 'atwho-user-popover' + { + type: 'a', + value: text, + link: link + } + end + + def list_element(list_element) + data_array = list_element.children.select { |n| %w(li ul ol a img).include?(n.name) }.map do |li_child| + li_child.children.map do |item| + if item.is_a? Nokogiri::XML::Text + item.text.chomp + elsif %w(ul ol).include?(item.name) + list_element(item) + elsif %w(a).include?(item.name) + link_element(item) + elsif %w(img).include?(item.name) + img_element(item).merge(bookmark_id: SecureRandom.hex) + elsif %w(table).include?(item.name) + tiny_mce_table_element(item).merge(bookmark_id: SecureRandom.hex) + end + end.reject(&:blank?) + end + { type: list_element.name, data: data_array } + end + + def smart_annotation_check(elem) + return "[#{elem.text}]" if elem.parent.attributes['class']&.value == 'sa-type' + + elem.text + end + + # Prepare style for text + def paragraph_styling(elem) + style = elem.attributes['style'] + result = {} + result[:style] = elem.name if elem.name.include? 'h' + result[:bold] = true if elem.name == 'strong' + result[:italic] = true if elem.name == 'em' + style_keys = %w(text-align color) + + if style + style_keys.each do |key| + style_el = style.value.split(';').select { |i| (i.include? key) }[0] + next unless style_el + + value = style_el.split(':')[1].strip if style_el + if key == 'text-align' + result[:align] = value.to_sym + elsif key == 'color' && calculate_color_hsp(value) < 190 + result[:color] = value.delete('#') + end + end + end + result + end + + # Prepare style for images + def image_styling(elem, dimension) + dimension[0] = elem.attributes['width'].value.to_i if elem.attributes['width'] + dimension[1] = elem.attributes['height'].value.to_i if elem.attributes['height'] + + if elem.attributes['style'] + align = if elem.attributes['style'].value.include? 'margin-right' + :center + elsif elem.attributes['style'].value.include? 'float: right' + :right + else + :left + end + end + + margins = Constants::REPORT_DOCX_MARGIN_LEFT + Constants::REPORT_DOCX_MARGIN_RIGHT + max_width = (Constants::REPORT_DOCX_WIDTH - margins) / 20 + + if dimension[0] > max_width + x = max_width + y = dimension[1] * max_width / dimension[0] + else + x = dimension[0] + y = dimension[1] + end + + { + width: x, + height: y, + align: align, + max_width: max_width + } + end + + def asset_image_preparing(asset) + return unless asset + + image_path = image_path(asset.file) + + dimension = FastImage.size(image_path) + x = dimension[0] + y = dimension[1] + if x > 300 + y = y * 300 / x + x = 300 + end + @docx.img image_path.split('&')[0] do + data asset.blob.download + width x + height y + end + end + + def tiny_mce_table_element(table_element) + # array of elements + rows = table_element.css('tbody').first.children.map do |row| + next unless row.name == 'tr' + + cells = row.children.map do |cell| + next unless cell.name == 'td' + + # Parse cell content + formated_cell = recursive_children(cell.children, []) + + # Combine text elements to single paragraph + formated_cell = combine_docx_elements(formated_cell) + formated_cell + end.reject(&:blank?) + { type: 'tr', data: cells } + end.reject(&:blank?) + { type: 'table', data: rows } + end + + def image_path(attachment) + attachment.service_url + end + + def calculate_color_hsp(color) + return 255 if color.length != 7 + + color = color.delete('#').scan(/.{1,2}/) + rgb = color.map(&:hex) + Math.sqrt( + 0.299 * (rgb[0]**2) + + 0.587 * (rgb[1]**2) + + 0.114 * (rgb[2]**2) + ) + end +end From a19e0ef84630b8fbe63deb24c5bce476284011c1 Mon Sep 17 00:00:00 2001 From: Urban Rotnik Date: Tue, 13 Oct 2020 13:54:16 +0200 Subject: [PATCH 6/7] Refactor --- app/services/reports/docx.rb | 136 --------- app/services/reports/docx/draw_experiment.rb | 2 +- app/services/reports/docx/draw_my_module.rb | 2 +- .../reports/docx/draw_my_module_activity.rb | 2 +- .../reports/docx/draw_my_module_protocol.rb | 2 +- .../reports/docx/draw_result_asset.rb | 2 +- .../reports/docx/draw_result_comments.rb | 2 +- app/services/reports/docx/draw_result_text.rb | 2 +- app/services/reports/docx/draw_step.rb | 2 +- app/services/reports/docx/draw_step_asset.rb | 2 +- .../reports/docx/draw_step_comments.rb | 2 +- app/services/reports/docx/tiny_mce_parser.rb | 281 ------------------ app/services/reports/docx_renderer.rb | 158 ++++++++++ .../reports/html_to_word_converter.rb | 253 ++++++++++++++++ app/services/reports/utils.rb | 25 ++ ...spec.rb => html_to_word_converter_spec.rb} | 4 +- 16 files changed, 448 insertions(+), 429 deletions(-) delete mode 100644 app/services/reports/docx/tiny_mce_parser.rb create mode 100644 app/services/reports/docx_renderer.rb create mode 100644 app/services/reports/html_to_word_converter.rb create mode 100644 app/services/reports/utils.rb rename spec/services/reports/{docx/docx_spec.rb => html_to_word_converter_spec.rb} (93%) diff --git a/app/services/reports/docx.rb b/app/services/reports/docx.rb index 0142da60f..75686b989 100644 --- a/app/services/reports/docx.rb +++ b/app/services/reports/docx.rb @@ -34,141 +34,5 @@ class Reports::Docx end @docx end - - def self.link_prepare(scinote_url, link) - link[0] == '/' ? scinote_url + link : link - end - - def self.render_p_element(docx, element, options = {}) - docx.p do - element[:children].each do |text_el| - if text_el[:type] == 'text' - style = text_el[:style] || {} - text text_el[:value], style - text ' ' if text_el[:value] != '' - elsif text_el[:type] == 'br' && !options[:skip_br] - br - elsif text_el[:type] == 'a' - Reports::Docx.render_link_element(self, text_el, options) - end - end - end - end - - def self.render_link_element(node, link_item, options = {}) - scinote_url = options[:scinote_url] - link_style = options[:link_style] - - if link_item[:link] - link_url = Reports::Docx.link_prepare(scinote_url, link_item[:link]) - node.link link_item[:value], link_url, link_style - else - node.text link_item[:value], link_style - end - node.text ' ' if link_item[:value] != '' - end - - def self.render_img_element(docx, element, options = {}) - style = element[:style] - - if options[:table] - max_width = (style[:max_width] / options[:table][:columns].to_f) - if style[:width] > max_width - style[:height] = (max_width / style[:width].to_f) * style[:height] - style[:width] = max_width - end - end - - docx.img element[:data] do - data element[:blob].download - width style[:width] - height style[:height] - align style[:align] || :left - end - end - - def self.render_list_element(docx, element, options = {}) - bookmark_items = Reports::Docx.recursive_list_items_renderer(docx, element) - - bookmark_items.each_with_index do |(key, item), index| - if item[:type] == 'image' - docx.bookmark_start id: index, name: key - docx.p do - br - text item[:blob]&.filename.to_s - end - Reports::Docx.render_img_element(docx, item) - docx.bookmark_end id: index - elsif item[:type] == 'table' - docx.bookmark_start id: index, name: key - - # Bookmark won't work with table only, empty p element added - docx.p do - br - text '' - end - Reports::Docx.render_table_element(docx, item, options) - docx.bookmark_end id: index - end - end - end - - # rubocop:disable Metrics/BlockLength - def self.recursive_list_items_renderer(node, element, bookmark_items: {}) - node.public_send(element[:type]) do - element[:data].each do |values_array| - li do - values_array.each do |item| - case item - when Hash - if %w(ul ol li).include?(item[:type]) - Reports::Docx.recursive_list_items_renderer(self, item, bookmark_items: bookmark_items) - elsif %w(a).include?(item[:type]) - Reports::Docx.render_link_element(self, item) - elsif %w(image).include?(item[:type]) - bookmark_items[item[:bookmark_id]] = item - link I18n.t('projects.reports.renderers.lists.appended_image', - name: item[:blob]&.filename), item[:bookmark_id] do - internal true - end - elsif %w(table).include?(item[:type]) - bookmark_items[item[:bookmark_id]] = item - link I18n.t('projects.reports.renderers.lists.appended_table'), item[:bookmark_id] do - internal true - end - end - else - text item - end - end - end - end - end - bookmark_items - end - # rubocop:enable Metrics/BlockLength - - def self.render_table_element(docx, element, options = {}) - docx_table = [] - element[:data].each do |row| - docx_row = [] - row[:data].each do |cell| - docx_cell = Caracal::Core::Models::TableCellModel.new do |c| - cell.each do |content| - if content[:type] == 'p' - Reports::Docx.render_p_element(c, content, options.merge({ skip_br: true })) - elsif content[:type] == 'table' - Reports::Docx.render_table_element(c, content, options) - elsif content[:type] == 'image' - Reports::Docx.render_img_element(c, content, table: { columns: row.children.length / 3 }) - end - end - end - docx_row.push(docx_cell) - end - docx_table.push(docx_row) - end - docx.table docx_table, border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE - end end # rubocop:enable Style/ClassAndModuleChildren diff --git a/app/services/reports/docx/draw_experiment.rb b/app/services/reports/docx/draw_experiment.rb index e61595846..e43a08e97 100644 --- a/app/services/reports/docx/draw_experiment.rb +++ b/app/services/reports/docx/draw_experiment.rb @@ -22,7 +22,7 @@ module Reports::Docx::DrawExperiment link_style end html = custom_auto_link(experiment.description, team: @report_team) - html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) @docx.p subject['children'].each do |child| public_send("draw_#{child['type_of']}", child, experiment) diff --git a/app/services/reports/docx/draw_my_module.rb b/app/services/reports/docx/draw_my_module.rb index 1f38b7435..0cd9d512f 100644 --- a/app/services/reports/docx/draw_my_module.rb +++ b/app/services/reports/docx/draw_my_module.rb @@ -66,7 +66,7 @@ module Reports::Docx::DrawMyModule if my_module.description.present? html = custom_auto_link(my_module.description, team: @report_team) - html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) else @docx.p I18n.t('projects.reports.elements.module.no_description') end diff --git a/app/services/reports/docx/draw_my_module_activity.rb b/app/services/reports/docx/draw_my_module_activity.rb index fcf753a68..8747e5937 100644 --- a/app/services/reports/docx/draw_my_module_activity.rb +++ b/app/services/reports/docx/draw_my_module_activity.rb @@ -20,7 +20,7 @@ module Reports::Docx::DrawMyModuleActivity sanitize_input(generate_activity_content(activity, true)) end @docx.p I18n.l(activity_ts, format: :full), color: color[:gray] - html_to_word_converter(activity_text) + Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(activity_text) @docx.p end end diff --git a/app/services/reports/docx/draw_my_module_protocol.rb b/app/services/reports/docx/draw_my_module_protocol.rb index 97d04fc07..881c79aa1 100644 --- a/app/services/reports/docx/draw_my_module_protocol.rb +++ b/app/services/reports/docx/draw_my_module_protocol.rb @@ -11,7 +11,7 @@ module Reports::Docx::DrawMyModuleProtocol timestamp: I18n.l(protocol.created_at, format: :full) @docx.hr html = custom_auto_link(protocol.description, team: @report_team) - html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) @docx.p @docx.p end diff --git a/app/services/reports/docx/draw_result_asset.rb b/app/services/reports/docx/draw_result_asset.rb index b1fda37fa..fb5ffa88d 100644 --- a/app/services/reports/docx/draw_result_asset.rb +++ b/app/services/reports/docx/draw_result_asset.rb @@ -17,7 +17,7 @@ module Reports::Docx::DrawResultAsset user: result.user.full_name, timestamp: I18n.l(timestamp, format: :full)), color: color[:gray] end - asset_image_preparing(asset) if asset.image? + Reports::DocxRenderer.render_asset_image(@docx, asset) if asset.image? subject['children'].each do |child| public_send("draw_#{child['type_of']}", child, result) diff --git a/app/services/reports/docx/draw_result_comments.rb b/app/services/reports/docx/draw_result_comments.rb index b738808d8..67ddfee62 100644 --- a/app/services/reports/docx/draw_result_comments.rb +++ b/app/services/reports/docx/draw_result_comments.rb @@ -17,7 +17,7 @@ module Reports::Docx::DrawResultComments date: I18n.l(comment_ts, format: :full_date), time: I18n.l(comment_ts, format: :time)), italic: true html = custom_auto_link(comment.message, team: @report_team) - html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) @docx.p end end diff --git a/app/services/reports/docx/draw_result_text.rb b/app/services/reports/docx/draw_result_text.rb index ee87953e0..5ecc27158 100644 --- a/app/services/reports/docx/draw_result_text.rb +++ b/app/services/reports/docx/draw_result_text.rb @@ -17,7 +17,7 @@ module Reports::Docx::DrawResultText timestamp: I18n.l(timestamp, format: :full), user: result.user.full_name), color: color[:gray] end html = custom_auto_link(result_text.text, team: @report_team) - html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) subject['children'].each do |child| public_send("draw_#{child['type_of']}", child, result) diff --git a/app/services/reports/docx/draw_step.rb b/app/services/reports/docx/draw_step.rb index 064d3a85a..0bd9e924e 100644 --- a/app/services/reports/docx/draw_step.rb +++ b/app/services/reports/docx/draw_step.rb @@ -27,7 +27,7 @@ module Reports::Docx::DrawStep end if step.description.present? html = custom_auto_link(step.description, team: @report_team) - html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) else @docx.p I18n.t 'projects.reports.elements.step.no_description' end diff --git a/app/services/reports/docx/draw_step_asset.rb b/app/services/reports/docx/draw_step_asset.rb index e8560ddf0..80a87c8da 100644 --- a/app/services/reports/docx/draw_step_asset.rb +++ b/app/services/reports/docx/draw_step_asset.rb @@ -15,6 +15,6 @@ module Reports::Docx::DrawStepAsset timestamp: I18n.l(timestamp, format: :full)), color: color[:gray] end - asset_image_preparing(asset) if asset.image? + Reports::DocxRenderer.render_asset_image(@docx, asset) if asset.image? end end diff --git a/app/services/reports/docx/draw_step_comments.rb b/app/services/reports/docx/draw_step_comments.rb index 5e04956ec..12e5ed5ff 100644 --- a/app/services/reports/docx/draw_step_comments.rb +++ b/app/services/reports/docx/draw_step_comments.rb @@ -17,7 +17,7 @@ module Reports::Docx::DrawStepComments date: I18n.l(comment_ts, format: :full_date), time: I18n.l(comment_ts, format: :time)), italic: true html = custom_auto_link(comment.message, team: @report_team) - html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) @docx.p end end diff --git a/app/services/reports/docx/tiny_mce_parser.rb b/app/services/reports/docx/tiny_mce_parser.rb deleted file mode 100644 index 223ec2be0..000000000 --- a/app/services/reports/docx/tiny_mce_parser.rb +++ /dev/null @@ -1,281 +0,0 @@ -# frozen_string_literal: true - -module Reports::Docx::TinyMceParser - private - - def html_to_word_converter(text) - html = Nokogiri::HTML(text) - raw_elements = recursive_children(html.css('body').children, []) - - # Combined raw text blocks in paragraphs - elements = combine_docx_elements(raw_elements) - - # Draw elements - elements.each do |elem| - if elem[:type] == 'p' - Reports::Docx.render_p_element(@docx, elem, scinote_url: @scinote_url, link_style: @link_style) - elsif elem[:type] == 'table' - Reports::Docx.render_table_element(@docx, elem) - elsif elem[:type] == 'newline' - style = elem[:style] || {} - # print heading if its heading - # Mixing heading with other style setting causes problems for Word - if %w(h1 h2 h3 h4 h5).include?(style[:style]) - @docx.public_send(style[:style], elem[:value]) - else - @docx.p elem[:value] do - align style[:align] - color style[:color] - bold style[:bold] - italic style[:italic] - end - end - elsif elem[:type] == 'image' - Reports::Docx.render_img_element(@docx, elem) - elsif %w(ul ol).include?(elem[:type]) - Reports::Docx.render_list_element(@docx, elem) - end - end - end - - def combine_docx_elements(raw_elements) - # Word does not support some nested elements, move some elements to root level - elements = [] - temp_p = [] - raw_elements.each do |elem| - if %w(image newline table ol ul).include? elem[:type] - unless temp_p.empty? - elements.push(type: 'p', children: temp_p) - temp_p = [] - end - elements.push(elem) - elsif %w(br text a).include? elem[:type] - temp_p.push(elem) - end - end - elements.push(type: 'p', children: temp_p) - elements - end - - # Convert HTML structure to plain text structure - # rubocop:disable Metrics/BlockLength - def recursive_children(children, elements) - children.each do |elem| - if elem.class == Nokogiri::XML::Text - next if elem.text.strip == ' ' # Invisible symbol - - style = paragraph_styling(elem.parent) - type = (style[:align] && style[:align] != :justify) || style[:style] ? 'newline' : 'text' - - text = smart_annotation_check(elem) - - elements.push( - type: type, - value: text.strip.delete(' '), # Invisible symbol - style: style - ) - next - end - - if elem.name == 'br' - elements.push(type: 'br') - next - end - - if elem.name == 'img' - elements.push(img_element(elem)) - next - end - - if elem.name == 'a' - elements.push(link_element(elem)) - next - end - - if elem.name == 'table' - elements.push(tiny_mce_table_element(elem)) - next - end - - if %w(ul ol).include?(elem.name) - elements.push(list_element(elem)) - next - end - - elements = recursive_children(elem.children, elements) if elem.children - end - elements - end - # rubocop:enable Metrics/BlockLength - - def img_element(elem) - return unless elem.attributes['data-mce-token'] - - image = TinyMceAsset.find_by(id: Base62.decode(elem.attributes['data-mce-token'].value)) - return unless image - - image_path = image_path(image.image) - dimension = FastImage.size(image_path) - - return unless dimension - - style = image_styling(elem, dimension) - - { type: 'image', data: image_path.split('&')[0], blob: image.blob, style: style } - end - - def link_element(elem) - text = elem.text - link = elem.attributes['href'].value if elem.attributes['href'] - if elem.attributes['class']&.value == 'record-info-link' - link = nil - text = "##{text}" - end - text = "##{text}" if elem.parent.attributes['class']&.value == 'atwho-inserted' - text = "@#{text}" if elem.attributes['class']&.value == 'atwho-user-popover' - { - type: 'a', - value: text, - link: link - } - end - - def list_element(list_element) - data_array = list_element.children.select { |n| %w(li ul ol a img).include?(n.name) }.map do |li_child| - li_child.children.map do |item| - if item.is_a? Nokogiri::XML::Text - item.text.chomp - elsif %w(ul ol).include?(item.name) - list_element(item) - elsif %w(a).include?(item.name) - link_element(item) - elsif %w(img).include?(item.name) - img_element(item).merge(bookmark_id: SecureRandom.hex) - elsif %w(table).include?(item.name) - tiny_mce_table_element(item).merge(bookmark_id: SecureRandom.hex) - end - end.reject(&:blank?) - end - { type: list_element.name, data: data_array } - end - - def smart_annotation_check(elem) - return "[#{elem.text}]" if elem.parent.attributes['class']&.value == 'sa-type' - - elem.text - end - - # Prepare style for text - def paragraph_styling(elem) - style = elem.attributes['style'] - result = {} - result[:style] = elem.name if elem.name.include? 'h' - result[:bold] = true if elem.name == 'strong' - result[:italic] = true if elem.name == 'em' - style_keys = %w(text-align color) - - if style - style_keys.each do |key| - style_el = style.value.split(';').select { |i| (i.include? key) }[0] - next unless style_el - - value = style_el.split(':')[1].strip if style_el - if key == 'text-align' - result[:align] = value.to_sym - elsif key == 'color' && calculate_color_hsp(value) < 190 - result[:color] = value.delete('#') - end - end - end - result - end - - # Prepare style for images - def image_styling(elem, dimension) - dimension[0] = elem.attributes['width'].value.to_i if elem.attributes['width'] - dimension[1] = elem.attributes['height'].value.to_i if elem.attributes['height'] - - if elem.attributes['style'] - align = if elem.attributes['style'].value.include? 'margin-right' - :center - elsif elem.attributes['style'].value.include? 'float: right' - :right - else - :left - end - end - - margins = Constants::REPORT_DOCX_MARGIN_LEFT + Constants::REPORT_DOCX_MARGIN_RIGHT - max_width = (Constants::REPORT_DOCX_WIDTH - margins) / 20 - - if dimension[0] > max_width - x = max_width - y = dimension[1] * max_width / dimension[0] - else - x = dimension[0] - y = dimension[1] - end - - { - width: x, - height: y, - align: align, - max_width: max_width - } - end - - def asset_image_preparing(asset) - return unless asset - - image_path = image_path(asset.file) - - dimension = FastImage.size(image_path) - x = dimension[0] - y = dimension[1] - if x > 300 - y = y * 300 / x - x = 300 - end - @docx.img image_path.split('&')[0] do - data asset.blob.download - width x - height y - end - end - - def tiny_mce_table_element(table_element) - # array of elements - rows = table_element.css('tbody').first.children.map do |row| - next unless row.name == 'tr' - - cells = row.children.map do |cell| - next unless cell.name == 'td' - - # Parse cell content - formated_cell = recursive_children(cell.children, []) - - # Combine text elements to single paragraph - formated_cell = combine_docx_elements(formated_cell) - formated_cell - end.reject(&:blank?) - { type: 'tr', data: cells } - end.reject(&:blank?) - { type: 'table', data: rows } - end - - def image_path(attachment) - attachment.service_url - end - - def calculate_color_hsp(color) - return 255 if color.length != 7 - - color = color.delete('#').scan(/.{1,2}/) - rgb = color.map(&:hex) - Math.sqrt( - 0.299 * (rgb[0]**2) + - 0.587 * (rgb[1]**2) + - 0.114 * (rgb[2]**2) - ) - end -end diff --git a/app/services/reports/docx_renderer.rb b/app/services/reports/docx_renderer.rb new file mode 100644 index 000000000..ba73b2511 --- /dev/null +++ b/app/services/reports/docx_renderer.rb @@ -0,0 +1,158 @@ +# frozen_string_literal: true + +module Reports + class DocxRenderer + def self.render_p_element(docx, element, options = {}) + docx.p do + element[:children].each do |text_el| + if text_el[:type] == 'text' + style = text_el[:style] || {} + text text_el[:value], style + text ' ' if text_el[:value] != '' + elsif text_el[:type] == 'br' && !options[:skip_br] + br + elsif text_el[:type] == 'a' + Reports::DocxRenderer.render_link_element(self, text_el, options) + end + end + end + end + + def self.render_link_element(node, link_item, options = {}) + scinote_url = options[:scinote_url] + link_style = options[:link_style] + + if link_item[:link] + link_url = Reports::Utils.link_prepare(scinote_url, link_item[:link]) + node.link link_item[:value], link_url, link_style + else + node.text link_item[:value], link_style + end + node.text ' ' if link_item[:value] != '' + end + + def self.render_img_element(docx, element, options = {}) + style = element[:style] + + if options[:table] + max_width = (style[:max_width] / options[:table][:columns].to_f) + if style[:width] > max_width + style[:height] = (max_width / style[:width].to_f) * style[:height] + style[:width] = max_width + end + end + + docx.img element[:data] do + data element[:blob].download + width style[:width] + height style[:height] + align style[:align] || :left + end + end + + def self.render_list_element(docx, element, options = {}) + bookmark_items = Reports::DocxRenderer.recursive_list_items_renderer(docx, element) + + bookmark_items.each_with_index do |(key, item), index| + if item[:type] == 'image' + docx.bookmark_start id: index, name: key + docx.p do + br + text item[:blob]&.filename.to_s + end + Reports::DocxRenderer.render_img_element(docx, item) + docx.bookmark_end id: index + elsif item[:type] == 'table' + docx.bookmark_start id: index, name: key + + # Bookmark won't work with table only, empty p element added + docx.p do + br + text '' + end + Reports::DocxRenderer.render_table_element(docx, item, options) + docx.bookmark_end id: index + end + end + end + + # rubocop:disable Metrics/BlockLength + def self.recursive_list_items_renderer(node, element, bookmark_items: {}) + node.public_send(element[:type]) do + element[:data].each do |values_array| + li do + values_array.each do |item| + case item + when Hash + if %w(ul ol li).include?(item[:type]) + Reports::DocxRenderer.recursive_list_items_renderer(self, item, bookmark_items: bookmark_items) + elsif %w(a).include?(item[:type]) + Reports::DocxRenderer.render_link_element(self, item) + elsif %w(image).include?(item[:type]) + bookmark_items[item[:bookmark_id]] = item + link I18n.t('projects.reports.renderers.lists.appended_image', + name: item[:blob]&.filename), item[:bookmark_id] do + internal true + end + elsif %w(table).include?(item[:type]) + bookmark_items[item[:bookmark_id]] = item + link I18n.t('projects.reports.renderers.lists.appended_table'), item[:bookmark_id] do + internal true + end + end + else + text item + end + end + end + end + end + bookmark_items + end + # rubocop:enable Metrics/BlockLength + + def self.render_table_element(docx, element, options = {}) + docx_table = [] + element[:data].each do |row| + docx_row = [] + row[:data].each do |cell| + docx_cell = Caracal::Core::Models::TableCellModel.new do |c| + cell.each do |content| + if content[:type] == 'p' + Reports::DocxRenderer.render_p_element(c, content, options.merge({ skip_br: true })) + elsif content[:type] == 'table' + Reports::DocxRenderer.render_table_element(c, content, options) + elsif content[:type] == 'image' + Reports::DocxRenderer.render_img_element(c, content, table: { columns: row.children.length / 3 }) + end + end + end + docx_row.push(docx_cell) + end + docx_table.push(docx_row) + end + docx.table docx_table, border_size: Constants::REPORT_DOCX_TABLE_BORDER_SIZE + end + + def self.render_asset_image(docx, asset) + return unless asset + + image_path = Reports::Utils.image_path(asset.file) + + dimension = FastImage.size(image_path) + return unless dimension + + x = dimension[0] + y = dimension[1] + if x > 300 + y = y * 300 / x + x = 300 + end + docx.img image_path.split('&')[0] do + data asset.blob.download + width x + height y + end + end + end +end diff --git a/app/services/reports/html_to_word_converter.rb b/app/services/reports/html_to_word_converter.rb new file mode 100644 index 000000000..8c0a43268 --- /dev/null +++ b/app/services/reports/html_to_word_converter.rb @@ -0,0 +1,253 @@ +# frozen_string_literal: true + +module Reports + class HtmlToWordConverter + def initialize(document) + @docx = document + end + + def html_to_word_converter(text) + html = Nokogiri::HTML(text) + raw_elements = recursive_children(html.css('body').children, []) + + # Combined raw text blocks in paragraphs + elements = combine_docx_elements(raw_elements) + + # Draw elements + elements.each do |elem| + if elem[:type] == 'p' + Reports::DocxRenderer.render_p_element(@docx, elem, scinote_url: @scinote_url, link_style: @link_style) + elsif elem[:type] == 'table' + Reports::DocxRenderer.render_table_element(@docx, elem) + elsif elem[:type] == 'newline' + style = elem[:style] || {} + # print heading if its heading + # Mixing heading with other style setting causes problems for Word + if %w(h1 h2 h3 h4 h5).include?(style[:style]) + @docx.public_send(style[:style], elem[:value]) + else + @docx.p elem[:value] do + align style[:align] + color style[:color] + bold style[:bold] + italic style[:italic] + end + end + elsif elem[:type] == 'image' + Reports::DocxRenderer.render_img_element(@docx, elem) + elsif %w(ul ol).include?(elem[:type]) + Reports::DocxRenderer.render_list_element(@docx, elem) + end + end + end + + private + + def combine_docx_elements(raw_elements) + # Word does not support some nested elements, move some elements to root level + elements = [] + temp_p = [] + raw_elements.each do |elem| + if %w(image newline table ol ul).include? elem[:type] + unless temp_p.empty? + elements.push(type: 'p', children: temp_p) + temp_p = [] + end + elements.push(elem) + elsif %w(br text a).include? elem[:type] + temp_p.push(elem) + end + end + elements.push(type: 'p', children: temp_p) + elements + end + + # Convert HTML structure to plain text structure + # rubocop:disable Metrics/BlockLength + def recursive_children(children, elements) + children.each do |elem| + if elem.class == Nokogiri::XML::Text + next if elem.text.strip == ' ' # Invisible symbol + + style = paragraph_styling(elem.parent) + type = (style[:align] && style[:align] != :justify) || style[:style] ? 'newline' : 'text' + + text = smart_annotation_check(elem) + + elements.push( + type: type, + value: text.strip.delete(' '), # Invisible symbol + style: style + ) + next + end + + if elem.name == 'br' + elements.push(type: 'br') + next + end + + if elem.name == 'img' + elements.push(img_element(elem)) + next + end + + if elem.name == 'a' + elements.push(link_element(elem)) + next + end + + if elem.name == 'table' + elements.push(tiny_mce_table_element(elem)) + next + end + + if %w(ul ol).include?(elem.name) + elements.push(list_element(elem)) + next + end + + elements = recursive_children(elem.children, elements) if elem.children + end + elements + end + + # rubocop:enable Metrics/BlockLength + + def img_element(elem) + return unless elem.attributes['data-mce-token'] + + image = TinyMceAsset.find_by(id: Base62.decode(elem.attributes['data-mce-token'].value)) + return unless image + + image_path = Reports::Utils.image_path(image.image) + dimension = FastImage.size(image_path) + + return unless dimension + + style = image_styling(elem, dimension) + + { type: 'image', data: image_path.split('&')[0], blob: image.blob, style: style } + end + + def link_element(elem) + text = elem.text + link = elem.attributes['href'].value if elem.attributes['href'] + if elem.attributes['class']&.value == 'record-info-link' + link = nil + text = "##{text}" + end + text = "##{text}" if elem.parent.attributes['class']&.value == 'atwho-inserted' + text = "@#{text}" if elem.attributes['class']&.value == 'atwho-user-popover' + { + type: 'a', + value: text, + link: link + } + end + + def list_element(list_element) + data_array = list_element.children.select { |n| %w(li ul ol a img).include?(n.name) }.map do |li_child| + li_child.children.map do |item| + if item.is_a? Nokogiri::XML::Text + item.text.chomp + elsif %w(ul ol).include?(item.name) + list_element(item) + elsif %w(a).include?(item.name) + link_element(item) + elsif %w(img).include?(item.name) + img_element(item)&.merge(bookmark_id: SecureRandom.hex) + elsif %w(table).include?(item.name) + tiny_mce_table_element(item).merge(bookmark_id: SecureRandom.hex) + end + end.reject(&:blank?) + end + { type: list_element.name, data: data_array } + end + + def smart_annotation_check(elem) + return "[#{elem.text}]" if elem.parent.attributes['class']&.value == 'sa-type' + + elem.text + end + + # Prepare style for text + def paragraph_styling(elem) + style = elem.attributes['style'] + result = {} + result[:style] = elem.name if elem.name.include? 'h' + result[:bold] = true if elem.name == 'strong' + result[:italic] = true if elem.name == 'em' + style_keys = %w(text-align color) + + if style + style_keys.each do |key| + style_el = style.value.split(';').select { |i| (i.include? key) }[0] + next unless style_el + + value = style_el.split(':')[1].strip if style_el + if key == 'text-align' + result[:align] = value.to_sym + elsif key == 'color' && Reports::Utils.calculate_color_hsp(value) < 190 + result[:color] = value.delete('#') + end + end + end + result + end + + # Prepare style for images + def image_styling(elem, dimension) + dimension[0] = elem.attributes['width'].value.to_i if elem.attributes['width'] + dimension[1] = elem.attributes['height'].value.to_i if elem.attributes['height'] + + if elem.attributes['style'] + align = if elem.attributes['style'].value.include? 'margin-right' + :center + elsif elem.attributes['style'].value.include? 'float: right' + :right + else + :left + end + end + + margins = Constants::REPORT_DOCX_MARGIN_LEFT + Constants::REPORT_DOCX_MARGIN_RIGHT + max_width = (Constants::REPORT_DOCX_WIDTH - margins) / 20 + + if dimension[0] > max_width + x = max_width + y = dimension[1] * max_width / dimension[0] + else + x = dimension[0] + y = dimension[1] + end + + { + width: x, + height: y, + align: align, + max_width: max_width + } + end + + def tiny_mce_table_element(table_element) + # array of elements + rows = table_element.css('tbody').first.children.map do |row| + next unless row.name == 'tr' + + cells = row.children.map do |cell| + next unless cell.name == 'td' + + # Parse cell content + formated_cell = recursive_children(cell.children, []) + + # Combine text elements to single paragraph + formated_cell = combine_docx_elements(formated_cell) + formated_cell + end.reject(&:blank?) + { type: 'tr', data: cells } + end.reject(&:blank?) + { type: 'table', data: rows } + end + end +end diff --git a/app/services/reports/utils.rb b/app/services/reports/utils.rb new file mode 100644 index 000000000..b61f1cf1c --- /dev/null +++ b/app/services/reports/utils.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Reports + class Utils + def self.link_prepare(scinote_url, link) + link[0] == '/' ? scinote_url + link : link + end + + def self.image_path(attachment) + attachment.service_url + end + + def self.calculate_color_hsp(color) + return 255 if color.length != 7 + + color = color.delete('#').scan(/.{1,2}/) + rgb = color.map(&:hex) + Math.sqrt( + 0.299 * (rgb[0]**2) + + 0.587 * (rgb[1]**2) + + 0.114 * (rgb[2]**2) + ) + end + end +end diff --git a/spec/services/reports/docx/docx_spec.rb b/spec/services/reports/html_to_word_converter_spec.rb similarity index 93% rename from spec/services/reports/docx/docx_spec.rb rename to spec/services/reports/html_to_word_converter_spec.rb index 16b1b3ac8..1393d7892 100644 --- a/spec/services/reports/docx/docx_spec.rb +++ b/spec/services/reports/html_to_word_converter_spec.rb @@ -2,11 +2,11 @@ require 'rails_helper' -describe Reports::Docx do +describe Reports::HtmlToWordConverter do let(:user) { create :user } let(:team) { create :team } let(:docx) { double('docx') } - let(:report) { described_class.new({}.to_json, docx, user: user, team: team, scinote_url: 'scinote.test') } + let(:report) { described_class.new(docx) } describe 'html_list' do let(:text) do From e748e1eff3b3fa94a08d2defd3a47ca00549e35c Mon Sep 17 00:00:00 2001 From: Urban Rotnik Date: Wed, 14 Oct 2020 16:09:49 +0200 Subject: [PATCH 7/7] Fix smart annotations and links styling --- app/services/reports/docx/draw_experiment.rb | 3 ++- app/services/reports/docx/draw_my_module.rb | 3 ++- app/services/reports/docx/draw_result_comments.rb | 3 ++- app/services/reports/docx/draw_result_text.rb | 3 ++- app/services/reports/docx/draw_step.rb | 3 ++- app/services/reports/docx/draw_step_comments.rb | 3 ++- app/services/reports/html_to_word_converter.rb | 7 ++++--- 7 files changed, 16 insertions(+), 9 deletions(-) diff --git a/app/services/reports/docx/draw_experiment.rb b/app/services/reports/docx/draw_experiment.rb index e43a08e97..b1aa74350 100644 --- a/app/services/reports/docx/draw_experiment.rb +++ b/app/services/reports/docx/draw_experiment.rb @@ -22,7 +22,8 @@ module Reports::Docx::DrawExperiment link_style end html = custom_auto_link(experiment.description, team: @report_team) - Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx, { scinote_url: scinote_url, + link_style: link_style }).html_to_word_converter(html) @docx.p subject['children'].each do |child| public_send("draw_#{child['type_of']}", child, experiment) diff --git a/app/services/reports/docx/draw_my_module.rb b/app/services/reports/docx/draw_my_module.rb index 0cd9d512f..c18b2896b 100644 --- a/app/services/reports/docx/draw_my_module.rb +++ b/app/services/reports/docx/draw_my_module.rb @@ -66,7 +66,8 @@ module Reports::Docx::DrawMyModule if my_module.description.present? html = custom_auto_link(my_module.description, team: @report_team) - Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx, { scinote_url: scinote_url, + link_style: link_style }).html_to_word_converter(html) else @docx.p I18n.t('projects.reports.elements.module.no_description') end diff --git a/app/services/reports/docx/draw_result_comments.rb b/app/services/reports/docx/draw_result_comments.rb index 67ddfee62..101290b24 100644 --- a/app/services/reports/docx/draw_result_comments.rb +++ b/app/services/reports/docx/draw_result_comments.rb @@ -17,7 +17,8 @@ module Reports::Docx::DrawResultComments date: I18n.l(comment_ts, format: :full_date), time: I18n.l(comment_ts, format: :time)), italic: true html = custom_auto_link(comment.message, team: @report_team) - Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx, { scinote_url: @scinote_url, + link_style: @link_style }).html_to_word_converter(html) @docx.p end end diff --git a/app/services/reports/docx/draw_result_text.rb b/app/services/reports/docx/draw_result_text.rb index 5ecc27158..33abb9867 100644 --- a/app/services/reports/docx/draw_result_text.rb +++ b/app/services/reports/docx/draw_result_text.rb @@ -17,7 +17,8 @@ module Reports::Docx::DrawResultText timestamp: I18n.l(timestamp, format: :full), user: result.user.full_name), color: color[:gray] end html = custom_auto_link(result_text.text, team: @report_team) - Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx, { scinote_url: @scinote_url, + link_style: @link_style }).html_to_word_converter(html) subject['children'].each do |child| public_send("draw_#{child['type_of']}", child, result) diff --git a/app/services/reports/docx/draw_step.rb b/app/services/reports/docx/draw_step.rb index 0bd9e924e..87ac65c7a 100644 --- a/app/services/reports/docx/draw_step.rb +++ b/app/services/reports/docx/draw_step.rb @@ -27,7 +27,8 @@ module Reports::Docx::DrawStep end if step.description.present? html = custom_auto_link(step.description, team: @report_team) - Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx, { scinote_url: @scinote_url, + link_style: @link_style }).html_to_word_converter(html) else @docx.p I18n.t 'projects.reports.elements.step.no_description' end diff --git a/app/services/reports/docx/draw_step_comments.rb b/app/services/reports/docx/draw_step_comments.rb index 12e5ed5ff..40bee154d 100644 --- a/app/services/reports/docx/draw_step_comments.rb +++ b/app/services/reports/docx/draw_step_comments.rb @@ -17,7 +17,8 @@ module Reports::Docx::DrawStepComments date: I18n.l(comment_ts, format: :full_date), time: I18n.l(comment_ts, format: :time)), italic: true html = custom_auto_link(comment.message, team: @report_team) - Reports::HtmlToWordConverter.new(@docx).html_to_word_converter(html) + Reports::HtmlToWordConverter.new(@docx, { scinote_url: @scinote_url, + link_style: @link_style }).html_to_word_converter(html) @docx.p end end diff --git a/app/services/reports/html_to_word_converter.rb b/app/services/reports/html_to_word_converter.rb index 8c0a43268..b5fa20d35 100644 --- a/app/services/reports/html_to_word_converter.rb +++ b/app/services/reports/html_to_word_converter.rb @@ -2,13 +2,15 @@ module Reports class HtmlToWordConverter - def initialize(document) + def initialize(document, options = {}) @docx = document + @scinote_url = options[:scinote_url] + @link_style = options[:link_style] end def html_to_word_converter(text) html = Nokogiri::HTML(text) - raw_elements = recursive_children(html.css('body').children, []) + raw_elements = recursive_children(html.css('body').children, []).compact # Combined raw text blocks in paragraphs elements = combine_docx_elements(raw_elements) @@ -106,7 +108,6 @@ module Reports elements.push(list_element(elem)) next end - elements = recursive_children(elem.children, elements) if elem.children end elements