Class: Mindee::Parsing::Common::Ocr::OcrPage

Inherits:
Object
  • Object
show all
Defined in:
lib/mindee/parsing/common/ocr/ocr.rb

Overview

OCR extraction for a single page.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(prediction) ⇒ OcrPage

Returns a new instance of OcrPage.

Parameters:

  • prediction (Hash)


72
73
74
75
76
77
78
# File 'lib/mindee/parsing/common/ocr/ocr.rb', line 72

def initialize(prediction)
  @lines = []
  @all_words = []
  prediction['all_words'].each do |word_prediction|
    @all_words.push(OcrWord.new(word_prediction))
  end
end

Instance Attribute Details

#all_wordsArray<OcrWord> (readonly)

All the words on the page, in semi-random order.

Returns:



67
68
69
# File 'lib/mindee/parsing/common/ocr/ocr.rb', line 67

def all_words
  @all_words
end

#linesArray<OcrLine> (readonly)

Returns:



69
70
71
# File 'lib/mindee/parsing/common/ocr/ocr.rb', line 69

def lines
  @lines
end

Instance Method Details

#all_linesArray<OcrLine>

All the words on the page, ordered in lines.

Returns:



82
83
84
85
# File 'lib/mindee/parsing/common/ocr/ocr.rb', line 82

def all_lines
  @lines = to_lines if @lines.empty?
  @lines
end

#to_sString

Returns:

  • (String)


88
89
90
91
92
93
94
95
96
97
# File 'lib/mindee/parsing/common/ocr/ocr.rb', line 88

def to_s
  lines = all_lines
  return '' if lines.empty?

  out_str = String.new
  lines.map do |line|
    out_str << "#{line}\n" unless line.to_s.strip.empty?
  end
  out_str.strip
end