mirror of
				https://github.com/scinote-eln/scinote-web.git
				synced 2025-11-01 00:56:05 +08:00 
			
		
		
		
	Add text extraction from MarvinJS for search [SCI-3643]
This commit is contained in:
		
							parent
							
								
									3b7f715e0b
								
							
						
					
					
						commit
						a2fe4bee94
					
				
					 2 changed files with 35 additions and 22 deletions
				
			
		|  | @ -119,7 +119,7 @@ class Asset < ApplicationRecord | |||
|                   assets_in_steps, assets_in_results, assets_in_inventories) | ||||
| 
 | ||||
|     new_query = Asset.left_outer_joins(:asset_text_datum) | ||||
|                      .left_outer_joins(file_attachment: :blob) | ||||
|                      .joins(file_attachment: :blob) | ||||
|                      .from(assets, 'assets') | ||||
| 
 | ||||
|     a_query = s_query = '' | ||||
|  | @ -266,6 +266,10 @@ class Asset < ApplicationRecord | |||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def marvinjs? | ||||
|     file.metadata[:asset_type] == 'marvinjs' | ||||
|   end | ||||
| 
 | ||||
|   def post_process_file(team = nil) | ||||
|     # Update self.empty | ||||
|     update(file_present: true) | ||||
|  | @ -277,6 +281,8 @@ class Asset < ApplicationRecord | |||
|       # estimated size calculation | ||||
|       Asset.delay(queue: :assets, run_at: 20.minutes.from_now) | ||||
|            .extract_asset_text_delayed(id, in_template) | ||||
|     elsif marvinjs? | ||||
|       extract_asset_text | ||||
|     else | ||||
|       # Update asset's estimated size immediately | ||||
|       update_estimated_size(team) | ||||
|  | @ -293,11 +299,17 @@ class Asset < ApplicationRecord | |||
|   def extract_asset_text(in_template = false) | ||||
|     self.in_template = in_template | ||||
| 
 | ||||
|     download_blob_to_tempfile do |tmp_file| | ||||
|     if marvinjs? | ||||
|       mjs_doc = Nokogiri::XML(file.metadata[:description]) | ||||
|       mjs_doc.remove_namespaces! | ||||
|       text_data = mjs_doc.search("//Field[@name='text']").collect(&:text).join(' ') | ||||
|     else | ||||
|       # Start Tika as a server | ||||
|       Yomu.server(:text) if !ENV['NO_TIKA_SERVER'] && Yomu.class_variable_get(:@@server_pid).nil? | ||||
| 
 | ||||
|       download_blob_to_tempfile do |tmp_file| | ||||
|         text_data = Yomu.new(tmp_file.path).text | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     if asset_text_datum.present? | ||||
|       # Update existing text datum if it exists | ||||
|  | @ -318,7 +330,6 @@ class Asset < ApplicationRecord | |||
|       "file #{file.blob.key}: #{e.message}" | ||||
|     ) | ||||
|   end | ||||
|   end | ||||
| 
 | ||||
|   # If team is provided, its space_taken | ||||
|   # is updated as well | ||||
|  |  | |||
|  | @ -24,6 +24,7 @@ class MarvinJsService | |||
|                           team_id: current_team.id) | ||||
|       attach_file(asset.file, file, params) | ||||
|       asset.save! | ||||
|       asset.post_process_file(current_team) | ||||
|       connect_asset(asset, params, current_user) | ||||
|     end | ||||
| 
 | ||||
|  | @ -39,6 +40,7 @@ class MarvinJsService | |||
| 
 | ||||
|       file = generate_image(params) | ||||
|       attach_file(attachment, file, params) | ||||
|       asset.post_process_file(current_team) | ||||
|       asset | ||||
|     end | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue