Skip to content

Commit

Permalink
#75 allow Roo::Excelx to open streams
Browse files Browse the repository at this point in the history
  • Loading branch information
Tony Novak committed May 1, 2015
1 parent e7216e2 commit 3c0dda4
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 15 deletions.
1 change: 1 addition & 0 deletions lib/roo/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ def search_or_set_header(options)
end

def local_filename(filename, tmpdir, packed)
return unless filename.is_a?(String)
filename = download_uri(filename, tmpdir) if uri?(filename)
filename = unzip(filename, tmpdir) if packed == :zip
unless File.file?(filename)
Expand Down
47 changes: 33 additions & 14 deletions lib/roo/excelx.rb
Original file line number Diff line number Diff line change
Expand Up @@ -250,20 +250,23 @@ def pad_cells(cell, last_column)
# values for packed: :zip
# optional cell_max (int) parameter for early aborting attempts to parse
# enormous documents.
def initialize(filename, options = {})
def initialize(filename_or_stream, options = {})
packed = options[:packed]
file_warning = options.fetch(:file_warning, :error)
cell_max = options.delete(:cell_max)
sheet_options = {}
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)

file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
if filename_or_stream.is_a? String
file_type_check(filename_or_stream,'.xlsx','an Excel-xlsx', file_warning, packed)
basename = filename_or_stream.split('/').last
end

@tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
@filename = local_filename(filename, @tmpdir, packed)
@tmpdir = make_tmpdir(basename, options[:tmpdir_root])
@filename = local_filename(filename_or_stream, @tmpdir, packed)
@comments_files = []
@rels_files = []
process_zipfile(@tmpdir, @filename)
process_zipfile(@filename || filename_or_stream)

@sheet_names = workbook.sheets.map do |sheet|
unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
Expand Down Expand Up @@ -574,10 +577,26 @@ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
end

# Extracts all needed files from the zip file
def process_zipfile(tmpdir, zipfilename)
def process_zipfile(zipfilename_or_stream)
@sheet_files = []
entries = Zip::File.open(zipfilename).to_a.sort_by(&:name)

if zipfilename_or_stream.is_a?(String)
process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name)
else
stream = Zip::InputStream.open zipfilename_or_stream
begin
entries = []
while entry = stream.get_next_entry
entries << entry
end
process_zipfile_entries entries
ensure
stream.close
end
end
end

def process_zipfile_entries entries
# NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
# are not in order. With Numbers 3.1, the first sheet is always
# sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
Expand All @@ -593,31 +612,31 @@ def process_zipfile(tmpdir, zipfilename)
# workbook.xml.rel:
# <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
# <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
sheet_ids = extract_worksheet_ids(entries, "#{tmpdir}/roo_workbook.xml")
sheets = extract_worksheet_rels(entries, "#{tmpdir}/roo_workbook.xml.rels")
extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)

entries.each do |entry|
path =
case entry.name.downcase
when /sharedstrings.xml$/
"#{tmpdir}/roo_sharedStrings.xml"
"#{@tmpdir}/roo_sharedStrings.xml"
when /styles.xml$/
"#{tmpdir}/roo_styles.xml"
"#{@tmpdir}/roo_styles.xml"
when /comments([0-9]+).xml$/
# FIXME: Most of the time, The order of the comment files are the same
# the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
# In some situations, this isn't true. The true location of a
# sheet's comment file is in the sheet1.xml.rels file. SEE
# ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
nr = Regexp.last_match[1].to_i
@comments_files[nr - 1] = "#{tmpdir}/roo_comments#{nr}"
@comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
when /sheet([0-9]+).xml.rels$/
# FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
# it also stores the location for sharedStrings, comments,
# drawings, etc.
nr = Regexp.last_match[1].to_i
@rels_files[nr - 1] = "#{tmpdir}/roo_rels#{nr}"
@rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
end

entry.extract(path) if path
Expand Down
9 changes: 8 additions & 1 deletion test/test_roo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#STDERR.reopen "/dev/null","w"

require 'test_helper'
require 'stringio'

class TestRoo < Minitest::Test

Expand Down Expand Up @@ -2063,5 +2064,11 @@ def test_noexpand_merged_range
end
end


def test_open_stream
return unless EXCELX
file_contents = File.read File.join(TESTDIR, fixture_filename(:numbers1, :excelx))
stream = StringIO.new(file_contents)
xlsx = Roo::Excelx.new(stream)
assert_equal ["Tabelle1","Name of Sheet 2","Sheet3","Sheet4","Sheet5"], xlsx.sheets
end
end # class

0 comments on commit 3c0dda4

Please sign in to comment.