Module: Mindee::PDF::PDFProcessor
- Defined in:
- lib/mindee/pdf/pdf_processor.rb
Overview
PDF document processing
Class Method Summary collapse
-
.get_page(pdf_doc, page_id) ⇒ StringIO
Retrieves a PDF document’s page.
- .indexes_from_keep(page_indexes, all_pages) ⇒ Object
- .indexes_from_remove(page_indexes, all_pages) ⇒ Object
- .open_pdf(io_stream) ⇒ Origami::PDF
- .parse(io_stream, options) ⇒ StringIO
Class Method Details
.get_page(pdf_doc, page_id) ⇒ StringIO
Retrieves a PDF document’s page.
74 75 76 77 78 79 80 81 82 83 |
# File 'lib/mindee/pdf/pdf_processor.rb', line 74 def self.get_page(pdf_doc, page_id) stream = StringIO.new pdf_doc.save(stream) = PageOptions.new(params: { page_indexes: [page_id - 1], }) parse(stream, ) end |
.indexes_from_keep(page_indexes, all_pages) ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/mindee/pdf/pdf_processor.rb', line 36 def self.indexes_from_keep(page_indexes, all_pages) pages_to_keep = Set.new page_indexes.each do |idx| idx = (all_pages.length - (idx + 2)) if idx.negative? page = all_pages[idx] next if page.nil? pages_to_keep << page end all_pages.to_set - pages_to_keep end |
.indexes_from_remove(page_indexes, all_pages) ⇒ Object
50 51 52 53 54 55 56 57 58 59 |
# File 'lib/mindee/pdf/pdf_processor.rb', line 50 def self.indexes_from_remove(page_indexes, all_pages) pages_to_remove = Set.new page_indexes.each do |idx| idx = (all_pages.length - (idx + 2)) if idx.negative? page = all_pages[idx] next if page.nil? pages_to_remove << page end end |
.open_pdf(io_stream) ⇒ Origami::PDF
63 64 65 66 67 |
# File 'lib/mindee/pdf/pdf_processor.rb', line 63 def self.open_pdf(io_stream) pdf_parser = Origami::PDF::LinearParser.new({ verbosity: Origami::Parser::VERBOSE_QUIET }) io_stream.seek(0) pdf_parser.parse(io_stream) end |
.parse(io_stream, options) ⇒ StringIO
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/mindee/pdf/pdf_processor.rb', line 15 def self.parse(io_stream, ) current_pdf = open_pdf(io_stream) pages_count = current_pdf.pages.size return current_pdf.to_io_stream if .on_min_pages.to_i > pages_count all_pages = (0..pages_count - 1).to_a if .operation == :KEEP_ONLY pages_to_remove = indexes_from_keep(.page_indexes, all_pages) elsif .operation == :REMOVE pages_to_remove = indexes_from_remove(.page_indexes, all_pages) else raise ArgumentError, "operation must be one of :KEEP_ONLY or :REMOVE, sent '#{.operation}'" end current_pdf.delete_pages_at(pages_to_remove) if pages_to_remove.to_a != all_pages.to_a current_pdf.to_io_stream end |