mirror of
https://github.com/scinote-eln/scinote-web.git
synced 2025-11-11 17:00:41 +08:00
Install tika text extractor directly from Dockerfiles, remove old yomu gem [SCI-11589] (#8257)
This commit is contained in:
parent
5d17d7d0a1
commit
063aa5a470
6 changed files with 16 additions and 3 deletions
|
|
@ -1,6 +1,9 @@
|
||||||
FROM ruby:3.2-bookworm
|
FROM ruby:3.2-bookworm
|
||||||
MAINTAINER SciNote <info@scinote.net>
|
MAINTAINER SciNote <info@scinote.net>
|
||||||
|
|
||||||
|
ARG TIKA_DIST_URL="https://dlcdn.apache.org/tika/2.9.3/tika-app-2.9.3.jar"
|
||||||
|
ENV TIKA_PATH=/usr/local/bin/tika-app.jar
|
||||||
|
|
||||||
# additional dependecies
|
# additional dependecies
|
||||||
# libreoffice for file preview generation
|
# libreoffice for file preview generation
|
||||||
RUN apt-get update -qq && \
|
RUN apt-get update -qq && \
|
||||||
|
|
@ -23,6 +26,8 @@ RUN apt-get update -qq && \
|
||||||
chromium \
|
chromium \
|
||||||
chromium-sandbox \
|
chromium-sandbox \
|
||||||
yarnpkg && \
|
yarnpkg && \
|
||||||
|
wget -O $TIKA_PATH $TIKA_DIST_URL && \
|
||||||
|
chmod +x $TIKA_PATH && \
|
||||||
ln -s /usr/lib/x86_64-linux-gnu/libvips.so.42 /usr/lib/x86_64-linux-gnu/libvips.so && \
|
ln -s /usr/lib/x86_64-linux-gnu/libvips.so.42 /usr/lib/x86_64-linux-gnu/libvips.so && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,9 @@ RUN \
|
||||||
FROM ruby:3.2-bookworm AS runner
|
FROM ruby:3.2-bookworm AS runner
|
||||||
MAINTAINER SciNote <info@scinote.net>
|
MAINTAINER SciNote <info@scinote.net>
|
||||||
|
|
||||||
|
ARG TIKA_DIST_URL="https://dlcdn.apache.org/tika/2.9.3/tika-app-2.9.3.jar"
|
||||||
|
ENV TIKA_PATH=/usr/local/bin/tika-app.jar
|
||||||
|
|
||||||
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
||||||
RUN \
|
RUN \
|
||||||
--mount=type=cache,target=/var/cache/apt,sharing=locked \
|
--mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
|
@ -79,6 +82,8 @@ RUN \
|
||||||
chromium-sandbox \
|
chromium-sandbox \
|
||||||
libfile-mimeinfo-perl \
|
libfile-mimeinfo-perl \
|
||||||
yarnpkg && \
|
yarnpkg && \
|
||||||
|
wget -O $TIKA_PATH $TIKA_DIST_URL && \
|
||||||
|
chmod +x $TIKA_PATH && \
|
||||||
/usr/share/nodejs/yarn/bin/yarn add puppeteer@npm:puppeteer-core@^22.15.0 && \
|
/usr/share/nodejs/yarn/bin/yarn add puppeteer@npm:puppeteer-core@^22.15.0 && \
|
||||||
apt-get install -y libreoffice && \
|
apt-get install -y libreoffice && \
|
||||||
ln -s /usr/lib/x86_64-linux-gnu/libvips.so.42 /usr/lib/x86_64-linux-gnu/libvips.so
|
ln -s /usr/lib/x86_64-linux-gnu/libvips.so.42 /usr/lib/x86_64-linux-gnu/libvips.so
|
||||||
|
|
|
||||||
1
Gemfile
1
Gemfile
|
|
@ -17,7 +17,6 @@ gem 'recaptcha'
|
||||||
gem 'sanitize'
|
gem 'sanitize'
|
||||||
gem 'sprockets-rails'
|
gem 'sprockets-rails'
|
||||||
gem 'view_component'
|
gem 'view_component'
|
||||||
gem 'yomu', git: 'https://github.com/scinote-eln/yomu', branch: 'master'
|
|
||||||
|
|
||||||
# Gems for OAuth2 subsystem
|
# Gems for OAuth2 subsystem
|
||||||
gem 'doorkeeper', '>= 4.6'
|
gem 'doorkeeper', '>= 4.6'
|
||||||
|
|
|
||||||
|
|
@ -893,7 +893,6 @@ DEPENDENCIES
|
||||||
webmock
|
webmock
|
||||||
whacamole
|
whacamole
|
||||||
wicked_pdf
|
wicked_pdf
|
||||||
yomu!
|
|
||||||
zip-zip
|
zip-zip
|
||||||
|
|
||||||
RUBY VERSION
|
RUBY VERSION
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require 'mime/types'
|
||||||
|
|
||||||
module TinyMceImages
|
module TinyMceImages
|
||||||
extend ActiveSupport::Concern
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
module ActiveStorage
|
module ActiveStorage
|
||||||
class Analyzer::TextExtractionAnalyzer < Analyzer
|
class Analyzer::TextExtractionAnalyzer < Analyzer
|
||||||
|
DEFAULT_TIKA_PATH = 'tika-app.jar'
|
||||||
|
|
||||||
def self.accept?(blob)
|
def self.accept?(blob)
|
||||||
blob.content_type.in?(Constants::TEXT_EXTRACT_FILE_TYPES) && blob.attachments.where(record_type: 'Asset').any?
|
blob.content_type.in?(Constants::TEXT_EXTRACT_FILE_TYPES) && blob.attachments.where(record_type: 'Asset').any?
|
||||||
end
|
end
|
||||||
|
|
@ -40,7 +42,8 @@ module ActiveStorage
|
||||||
end
|
end
|
||||||
|
|
||||||
def process_other(file)
|
def process_other(file)
|
||||||
text_data = Yomu.new(file.path).text
|
tika_path = ENV['TIKA_PATH'] || DEFAULT_TIKA_PATH
|
||||||
|
text_data = IO.popen(['java', '-Djava.awt.headless=true', '-jar', tika_path, '-t', file.path], 'r').read
|
||||||
create_or_update_text_data(text_data)
|
create_or_update_text_data(text_data)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue