diff --git a/README.md b/README.md index c202332..377be55 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,28 @@ Response: ... PDF document body ... +### Detect language + +This is a foreground document language detection request. The detected language +will be returned as the response body. + + POST /detect-language + +Params *(suggest using `multipart/form-data`)*: + +* `file` - the file to convert + +#### Example: + + POST /detect-language + file=... foo.docx ... + +Response: + + Content-Type: text/plain + + en + ## Callbacks When a document conversion is completed, an attempt will be made to POST a diff --git a/lib/app.rb b/lib/app.rb index 7336bc8..c5328c5 100644 --- a/lib/app.rb +++ b/lib/app.rb @@ -207,6 +207,30 @@ class App < Sinatra::Base end end + # + # Detect document language + # + # POST params: + # file - the file to detect language + post '/detect-language' do + begin + unless params[:file] + return respond 400, "missing file parameter" + end + + unless params[:file].respond_to?(:fetch) and params[:file].fetch(:tempfile, nil).respond_to?(:read) + return respond 400, "invalid file parameter" + end + + body = params[:file][:tempfile].read + content = Converter.new(logger: @logger).convert_file('detect_language', body) + content_type content.mime_type + content + rescue StandardError => e + respond_with_error e + end + end + # Legacy method to convert files # Brought over from Heathen # diff --git a/lib/heathen/processor_methods/detect_language.rb b/lib/heathen/processor_methods/detect_language.rb new file mode 100644 index 0000000..f65b6b4 --- /dev/null +++ b/lib/heathen/processor_methods/detect_language.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Heathen + class Processor + def detect_language + expect_mime_type 'application/pdf' + + executioner.execute( + Colore::C_.tika_path, + '--language', + job.content_file, + binary: true + ) + + job.content = executioner.stdout + end + end +end diff --git a/spec/heathen/processor_methods/detect_language_spec.rb b/spec/heathen/processor_methods/detect_language_spec.rb new file mode 100644 index 0000000..f872d45 --- /dev/null +++ b/spec/heathen/processor_methods/detect_language_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Heathen::Processor do + let(:content) { File.read(fixture('heathen/quickfox.pdf')) } + let(:job) { Heathen::Job.new 'foo', content, 'en' } + let(:processor) { described_class.new job: job, logger: Logger.new($stderr) } + + after do + processor.clean_up + end + + context '#detect_language' do + it 'detects input file language' do + processor.detect_language + expect(job.content).to eq 'en' + end + end +end