diff --git a/.gitignore b/.gitignore index 1ec2ce82..e086f14f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /pkg/ /log/ .ruby-version +.project diff --git a/CHANGELOG b/CHANGELOG index c0af37c0..6526b046 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,9 @@ * enhancements * Extend Roo::Spreadsheet.open to accept Tempfiles and other arguments responding to #path. Note they require an :extension option to be declared, as the tempfile mangles the extension. #84. +* bugfixes + * Fix that paths with spaces in them would fail with URI::InvalidURIError. #121. + == 1.13.2 2013-12-23 * bugfixes diff --git a/Gemfile b/Gemfile index 13ad8d6c..1db61cdd 100644 --- a/Gemfile +++ b/Gemfile @@ -16,3 +16,8 @@ group :test do gem 'rspec', '>= 2.14' gem 'vcr' end + +group :development, :test do + gem 'pry' + gem 'pry-nav' +end diff --git a/Gemfile.lock b/Gemfile.lock index 9218ad17..11a16c8e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,9 +2,10 @@ GEM remote: http://rubygems.org/ specs: addressable (2.3.5) + coderay (1.0.9) crack (0.4.1) safe_yaml (~> 0.9.0) - diff-lcs (1.2.4) + diff-lcs (1.2.5) faraday (0.8.7) multipart-post (~> 1.1) git (1.2.5) @@ -22,6 +23,7 @@ GEM json (1.7.7-java) jwt (0.1.8) multi_json (>= 1.5) + method_source (0.8.2) multi_json (1.7.3) multi_xml (0.5.3) multipart-post (1.2.0) @@ -35,6 +37,12 @@ GEM multi_json (~> 1.0) multi_xml (~> 0.5) rack (~> 1.2) + pry (0.9.12.2) + coderay (~> 1.0.5) + method_source (~> 0.8) + slop (~> 3.4) + pry-nav (0.2.3) + pry (~> 0.9.10) rack (1.5.2) rake (0.9.2.2) rdoc (3.12.2) @@ -43,10 +51,10 @@ GEM rspec-core (~> 2.14.0) rspec-expectations (~> 2.14.0) rspec-mocks (~> 2.14.0) - rspec-core (2.14.5) - rspec-expectations (2.14.3) + rspec-core (2.14.7) + rspec-expectations (2.14.4) diff-lcs (>= 1.1.3, < 2.0) - rspec-mocks (2.14.3) + rspec-mocks (2.14.4) ruby-ole (1.2.11.6) rubyzip (1.0.0) safe_yaml (0.9.4) @@ -55,6 +63,7 @@ GEM shoulda-matchers (~> 1.0.0) shoulda-context (1.0.0) shoulda-matchers (1.0.0) + slop (3.4.6) spreadsheet (0.8.2) ruby-ole (>= 1.0) vcr (2.5.0) @@ -70,6 +79,8 @@ DEPENDENCIES google_drive jeweler nokogiri + pry + pry-nav rspec (>= 2.14) rubyzip shoulda diff --git a/README.markdown b/README.markdown index 91ca5798..d58a6bea 100644 --- a/README.markdown +++ b/README.markdown @@ -1,4 +1,4 @@ -# README for Roo +# README Roo implements read access for all spreadsheet types and read/write access for Google spreadsheets. It can handle @@ -11,6 +11,17 @@ Google spreadsheets. It can handle ## Notes +### Help Maintain Roo! + +I've worked a bit over 2012-2013 to refine and improve Roo, but now I'm off to +other things so the library needs your help! + +Specifically, we need someone(s) who can take over stewardship of the project +and see that it continues to progress. + +If you're interested in helping out with more than a pull request, just contact me. +In the mean time I'll continue to contribute in what spare moments I have. + ### XLS There is no support for formulas in Roo for .xls files - you can get the result @@ -70,6 +81,9 @@ s.font(1,1).italic? s.font(1,1).underline? +# Roo::Spreadsheet requires spreadsheet gem +require 'spreadsheet' + # Spreadsheet.open can accept both files and paths xls = Roo::Spreadsheet.open('./new_prices.xls') diff --git a/lib/roo/base.rb b/lib/roo/base.rb index a7531ece..eb76de1c 100644 --- a/lib/roo/base.rb +++ b/lib/roo/base.rb @@ -17,34 +17,65 @@ class Roo::Base include Enumerable TEMP_PREFIX = "oo_" + LETTERS = %w{A B C D E F G H I J K L M N O P Q R S T U V W X Y Z} attr_reader :default_sheet, :headers # sets the line with attribute names (default: 1) attr_accessor :header_line - protected + class << self + def split_coordinate(str) + letter,number = split_coord(str) + x = letter_to_number(letter) + y = number + return y, x + end - def self.split_coordinate(str) - letter,number = Roo::Base.split_coord(str) - x = letter_to_number(letter) - y = number - return y, x - end + def split_coord(s) + if s =~ /([a-zA-Z]+)([0-9]+)/ + letter = $1 + number = $2.to_i + else + raise ArgumentError + end + return letter, number + end - def self.split_coord(s) - if s =~ /([a-zA-Z]+)([0-9]+)/ - letter = $1 - number = $2.to_i - else - raise ArgumentError + # convert a number to something like 'AB' (1 => 'A', 2 => 'B', ...) + def number_to_letter(n) + letters="" + if n > 26 + while n % 26 == 0 && n != 0 + letters << 'Z' + n = (n - 26) / 26 + end + while n > 0 + num = n%26 + letters = LETTERS[num-1] + letters + n = (n / 26) + end + else + letters = LETTERS[n-1] + end + letters + end + + # convert letters like 'AB' to a number ('A' => 1, 'B' => 2, ...) + def letter_to_number(letters) + result = 0 + while letters && letters.length > 0 + character = letters[0,1].upcase + num = LETTERS.index(character) + raise ArgumentError, "invalid column character '#{letters[0,1]}'" if num == nil + num += 1 + result = result * 26 + num + letters = letters[1..-1] + end + result end - return letter, number end - - public - def initialize(filename, options={}, file_warning=:error, tmpdir=nil) @filename = filename @options = options @@ -73,84 +104,72 @@ def default_sheet=(sheet) # first non-empty column as a letter def first_column_as_letter(sheet=nil) - Roo::Base.number_to_letter(first_column(sheet)) + self.class.number_to_letter(first_column(sheet)) end # last non-empty column as a letter def last_column_as_letter(sheet=nil) - Roo::Base.number_to_letter(last_column(sheet)) + self.class.number_to_letter(last_column(sheet)) end # returns the number of the first non-empty row def first_row(sheet=nil) sheet ||= @default_sheet read_cells(sheet) - if @first_row[sheet] - return @first_row[sheet] - end - impossible_value = 999_999 # more than a spreadsheet can hold - result = impossible_value - @cell[sheet].each_pair {|key,value| - y = key.first.to_i # _to_string(key).split(',') - result = [result, y].min if value - } if @cell[sheet] - result = nil if result == impossible_value - @first_row[sheet] = result - result + @first_row[sheet] ||= + begin + impossible_value = 999_999 # more than a spreadsheet can hold + result = impossible_value + @cell[sheet].each_pair {|key,value| + result = [result, key.first.to_i].min if value + } if @cell[sheet] + result unless result == impossible_value + end end # returns the number of the last non-empty row def last_row(sheet=nil) sheet ||= @default_sheet read_cells(sheet) - if @last_row[sheet] - return @last_row[sheet] - end - impossible_value = 0 - result = impossible_value - @cell[sheet].each_pair {|key,value| - y = key.first.to_i # _to_string(key).split(',') - result = [result, y].max if value - } if @cell[sheet] - result = nil if result == impossible_value - @last_row[sheet] = result - result + @last_row[sheet] ||= + begin + impossible_value = 0 + result = impossible_value + @cell[sheet].each_pair {|key,value| + result = [result, key.first.to_i].max if value + } if @cell[sheet] + result unless result == impossible_value + end end # returns the number of the first non-empty column def first_column(sheet=nil) sheet ||= @default_sheet read_cells(sheet) - if @first_column[sheet] - return @first_column[sheet] - end - impossible_value = 999_999 # more than a spreadsheet can hold - result = impossible_value - @cell[sheet].each_pair {|key,value| - x = key.last.to_i # _to_string(key).split(',') - result = [result, x].min if value - } if @cell[sheet] - result = nil if result == impossible_value - @first_column[sheet] = result - result + @first_column[sheet] ||= + begin + impossible_value = 999_999 # more than a spreadsheet can hold + result = impossible_value + @cell[sheet].each_pair {|key,value| + result = [result, key.last.to_i].min if value + } if @cell[sheet] + result unless result == impossible_value + end end # returns the number of the last non-empty column def last_column(sheet=nil) sheet ||= @default_sheet read_cells(sheet) - if @last_column[sheet] - return @last_column[sheet] - end - impossible_value = 0 - result = impossible_value - @cell[sheet].each_pair {|key,value| - x = key.last.to_i # _to_string(key).split(',') - result = [result, x].max if value - } if @cell[sheet] - result = nil if result == impossible_value - @last_column[sheet] = result - result + @last_column[sheet] ||= + begin + impossible_value = 0 + result = impossible_value + @cell[sheet].each_pair {|key,value| + result = [result, key.last.to_i].max if value + } if @cell[sheet] + result unless result == impossible_value + end end # returns a rectangular area (default: all cells) as yaml-output @@ -158,11 +177,16 @@ def last_column(sheet=nil) # oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"}) def to_yaml(prefix={}, from_row=nil, from_column=nil, to_row=nil, to_column=nil,sheet=nil) sheet ||= @default_sheet - result = "--- \n" return '' unless first_row # empty result if there is no first_row in a sheet - (from_row||first_row(sheet)).upto(to_row||last_row(sheet)) do |row| - (from_column||first_column(sheet)).upto(to_column||last_column(sheet)) do |col| + from_row ||= first_row(sheet) + to_row ||= last_row(sheet) + from_column ||= first_column(sheet) + to_column ||= last_column(sheet) + + result = "--- \n" + from_row.upto(to_row) do |row| + from_column.upto(to_column) do |col| unless empty?(row,col,sheet) result << "cell_#{row}_#{col}: \n" prefix.each {|k,v| @@ -171,11 +195,11 @@ def to_yaml(prefix={}, from_row=nil, from_column=nil, to_row=nil, to_column=nil, result << " row: #{row} \n" result << " col: #{col} \n" result << " celltype: #{self.celltype(row,col,sheet)} \n" - if self.celltype(row,col,sheet) == :time - result << " value: #{Roo::Base.integer_to_timestring( self.cell(row,col,sheet))} \n" - else - result << " value: #{self.cell(row,col,sheet)} \n" + value = cell(row,col,sheet) + if celltype(row,col,sheet) == :time + value = integer_to_timestring(value) end + result << " value: #{value} \n" end end end @@ -205,23 +229,36 @@ def to_matrix(from_row=nil, from_column=nil, to_row=nil, to_column=nil,sheet=nil sheet ||= @default_sheet return Matrix.empty unless first_row - Matrix.rows((from_row||first_row(sheet)).upto(to_row||last_row(sheet)).map do |row| - (from_column||first_column(sheet)).upto(to_column||last_column(sheet)).map do |col| + from_row ||= first_row(sheet) + to_row ||= last_row(sheet) + from_column ||= first_column(sheet) + to_column ||= last_column(sheet) + + Matrix.rows(from_row.upto(to_row).map do |row| + from_column.upto(to_column).map do |col| cell(row,col,sheet) end end) end + # call to_s method defined on subclasses + def inspect + to_s + end + # find a row either by row number or a condition # Caution: this works only within the default sheet -> set default_sheet before you call this method # (experimental. see examples in the test_roo.rb file) def find(*args) # :nodoc options = (args.last.is_a?(Hash) ? args.pop : {}) - if args[0].class == Fixnum - find_by_row(args) - elsif args[0] == :all + case args[0] + when Fixnum + find_by_row(args[0]) + when :all find_by_conditions(options) + else + raise ArgumentError, "unexpected arg #{args[0].inspect}, pass a row index or :all" end end @@ -254,12 +291,13 @@ def set(row,col,value,sheet=nil) #:nodoc: sheet ||= @default_sheet read_cells(sheet) row, col = normalize(row,col) - cell_type = case value - when Fixnum then :float - when String, Float then :string - else - raise ArgumentError, "Type for #{value} not set" - end + cell_type = + case value + when Fixnum then :float + when String, Float then :string + else + raise ArgumentError, "Type for #{value} not set" + end set_value(row,col,value,sheet) set_type(row,col,cell_type,sheet) @@ -298,8 +336,8 @@ def info else result << " First row: #{first_row}\n" result << " Last row: #{last_row}\n" - result << " First column: #{Roo::Base.number_to_letter(first_column)}\n" - result << " Last column: #{Roo::Base.number_to_letter(last_column)}" + result << " First column: #{self.class.number_to_letter(first_column)}\n" + result << " Last column: #{self.class.number_to_letter(last_column)}" end result << "\n" if sheet != sheets.last n += 1 @@ -340,7 +378,7 @@ def method_missing(m, *args) # #aa42 => #cell('aa',42) # #aa42('Sheet1') => #cell('aa',42,'Sheet1') if m =~ /^([a-z]+)(\d)$/ - col = Roo::Base.letter_to_number($1) + col = self.class.letter_to_number($1) row = $2.to_i if args.empty? cell(row,col) @@ -516,35 +554,33 @@ def key_to_string(arr) private - def find_by_row(args) - rownum = args[0] - current_row = rownum - current_row += header_line - 1 if @header_line + def find_by_row(row_index) + row_index += (header_line - 1) if @header_line - self.row(current_row).size.times.map do |j| - cell(current_row, j + 1) + row(row_index).size.times.map do |cell_index| + cell(row_index, cell_index + 1) end end def find_by_conditions(options) rows = first_row.upto(last_row) - result_array = options[:array] header_for = Hash[1.upto(last_column).map do |col| [col, cell(@header_line,col)] end] # are all conditions met? - if (conditions = options[:conditions]) && !conditions.empty? + conditions = options[:conditions] + if conditions && !conditions.empty? column_with = header_for.invert rows = rows.select do |i| conditions.all? { |key,val| cell(i,column_with[key]) == val } end end - rows.map do |i| - if result_array - self.row(i) - else + if options[:array] + rows.map {|i| self.row(i) } + else + rows.map do |i| Hash[1.upto(self.row(i).size).map do |j| [header_for.fetch(j), cell(i,j)] end] @@ -617,7 +653,7 @@ def normalize(row,col) end end if col.class == String - col = Roo::Base.letter_to_number(col) + col = self.class.letter_to_number(col) end return row,col end @@ -650,41 +686,6 @@ def open_from_stream(stream, tmpdir) File.join(tmpdir, "spreadsheet") end - LETTERS = %w{A B C D E F G H I J K L M N O P Q R S T U V W X Y Z} - - # convert a number to something like 'AB' (1 => 'A', 2 => 'B', ...) - def self.number_to_letter(n) - letters="" - if n > 26 - while n % 26 == 0 && n != 0 - letters << 'Z' - n = (n - 26) / 26 - end - while n > 0 - num = n%26 - letters = LETTERS[num-1] + letters - n = (n / 26) - end - else - letters = LETTERS[n-1] - end - letters - end - - # convert letters like 'AB' to a number ('A' => 1, 'B' => 2, ...) - def self.letter_to_number(letters) - result = 0 - while letters && letters.length > 0 - character = letters[0,1].upcase - num = LETTERS.index(character) - raise ArgumentError, "invalid column character '#{letters[0,1]}'" if num == nil - num += 1 - result = result * 26 + num - letters = letters[1..-1] - end - result - end - def unzip(filename, tmpdir) Roo::ZipFile.open(filename) do |zip| process_zipfile_packed(zip, tmpdir) @@ -749,7 +750,7 @@ def cell_to_csv(row, col, sheet) onecell = cell(row,col,sheet) case celltype(row,col,sheet) - when :string + when :string, :link unless onecell.empty? %{"#{onecell.gsub(/"/,'""')}"} end @@ -781,7 +782,7 @@ def cell_to_csv(row, col, sheet) when :date, :datetime onecell.to_s when :time - Roo::Base.integer_to_timestring(onecell) + integer_to_timestring(onecell) when :link %{"#{onecell.url.gsub(/"/,'""')}"} else @@ -790,8 +791,10 @@ def cell_to_csv(row, col, sheet) end end + private + # converts an integer value to a time string like '02:05:06' - def self.integer_to_timestring(content) + def integer_to_timestring(content) h = (content/3600.0).floor content = content - h*3600 m = (content/60.0).floor diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index d3994ec7..4a6d37c5 100644 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -46,7 +46,7 @@ def to_type(format) type elsif format.include?('#') :float - elsif format.include?('d') || format.include?('y') + elsif !format.match(/d+(?![\]])/).nil? || format.include?('y') if format.include?('h') || format.include?('s') :datetime else @@ -84,8 +84,8 @@ def initialize(filename, options = {}, deprecated_file_warning = :error) unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end - @comments_files = Array.new - @rels_files = Array.new + @comments_files = [] + @rels_files = [] extract_content(tmpdir, @filename) @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml")) @shared_table = [] @@ -94,7 +94,7 @@ def initialize(filename, options = {}, deprecated_file_warning = :error) read_shared_strings(@sharedstring_doc) end @styles_table = [] - @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} } + @style_definitions = [] # TODO: ??? { |h,k| h[k] = {} } if File.exist?(File.join(tmpdir, 'roo_styles.xml')) @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml')) read_styles(@styles_doc) @@ -104,14 +104,14 @@ def initialize(filename, options = {}, deprecated_file_warning = :error) @rels_doc = load_xmls(@rels_files) end super(filename, options) - @formula = Hash.new - @excelx_type = Hash.new - @excelx_value = Hash.new - @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt - @comment = Hash.new - @comments_read = Hash.new - @hyperlink = Hash.new - @hyperlinks_read = Hash.new + @formula = {} + @excelx_type = {} + @excelx_value = {} + @s_attribute = {} # TODO: ggf. wieder entfernen nur lokal benoetigt + @comment = {} + @comments_read = {} + @hyperlink = {} + @hyperlinks_read = {} end def method_missing(m,*args) @@ -138,14 +138,12 @@ def cell(row, col, sheet=nil) row,col = normalize(row,col) if celltype(row,col,sheet) == :date yyyy,mm,dd = @cell[sheet][[row,col]].split('-') - return Date.new(yyyy.to_i,mm.to_i,dd.to_i) + Date.new(yyyy.to_i,mm.to_i,dd.to_i) elsif celltype(row,col,sheet) == :datetime - date_part,time_part = @cell[sheet][[row,col]].split(' ') - yyyy,mm,dd = date_part.split('-') - hh,mi,ss = time_part.split(':') - return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i) + create_datetime_from( @cell[sheet][[row,col]] ) + else + @cell[sheet][[row,col]] end - @cell[sheet][[row,col]] end # Returns the formula at (row,col). @@ -165,8 +163,8 @@ def formulas(sheet=nil) sheet ||= @default_sheet read_cells(sheet) if @formula[sheet] - @formula[sheet].each.collect do |elem| - [elem[0][0], elem[0][1], elem[1]] + @formula[sheet].map do |coord, formula| + [coord[0], coord[1], formula] end else [] @@ -213,7 +211,7 @@ def celltype(row,col,sheet=nil) read_cells(sheet) row,col = normalize(row,col) if @formula[sheet][[row,col]] - return :formula + :formula else @cell_type[sheet][[row,col]] end @@ -227,7 +225,7 @@ def excelx_type(row,col,sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) - return @excelx_type[sheet][[row,col]] + @excelx_type[sheet][[row,col]] end # returns the internal value of an excelx cell @@ -236,7 +234,7 @@ def excelx_value(row,col,sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) - return @excelx_value[sheet][[row,col]] + @excelx_value[sheet][[row,col]] end # returns the internal format of an excel cell @@ -268,11 +266,11 @@ def to_s(sheet=nil) def label(labelname) read_labels if @label.empty? || !@label.has_key?(labelname) - return nil,nil,nil + [nil,nil,nil] else - return @label[labelname][1].to_i, - Roo::Base.letter_to_number(@label[labelname][2]), - @label[labelname][0] + [@label[labelname][1].to_i, + self.class.letter_to_number(@label[labelname][2]), + @label[labelname][0]] end end @@ -285,7 +283,7 @@ def labels @label.map do |label| [ label[0], # name [ label[1][1].to_i, # row - Roo::Base.letter_to_number(label[1][2]), # column + self.class.letter_to_number(label[1][2]), # column label[1][0], # sheet ] ] end @@ -301,8 +299,7 @@ def hyperlink(row,col,sheet=nil) sheet ||= @default_sheet read_hyperlinks(sheet) unless @hyperlinks_read[sheet] row,col = normalize(row,col) - return nil unless @hyperlink[sheet] - @hyperlink[sheet][[row,col]] + @hyperlink[sheet] && @hyperlink[sheet][[row,col]] end # returns the comment at (row/col) @@ -312,17 +309,12 @@ def comment(row,col,sheet=nil) #read_cells(sheet) read_comments(sheet) unless @comments_read[sheet] row,col = normalize(row,col) - return nil unless @comment[sheet] - @comment[sheet][[row,col]] + @comment[sheet] && @comment[sheet][[row,col]] end # true, if there is a comment def comment?(row,col,sheet=nil) - sheet ||= @default_sheet - # read_cells(sheet) - read_comments(sheet) unless @comments_read[sheet] - row,col = normalize(row,col) - comment(row,col) != nil + comment(row,col,sheet) != nil end # returns each comment in the selected sheet as an array of elements @@ -356,7 +348,7 @@ def set_cell_values(sheet,x,y,i,v,value_type,formula, @cell_type[sheet] ||= {} @cell_type[sheet][key] = value_type @formula[sheet] ||= {} - @formula[sheet][key] = formula if formula + @formula[sheet][key] = formula if formula @cell[sheet] ||= {} @cell[sheet][key] = case @cell_type[sheet][key] @@ -367,7 +359,7 @@ def set_cell_values(sheet,x,y,i,v,value_type,formula, when :date (base_date+v.to_i).strftime("%Y-%m-%d") when :datetime - (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S") + (base_date+v.to_f.round(6)).strftime("%Y-%m-%d %H:%M:%S.%N") when :percentage v.to_f when :time @@ -424,7 +416,7 @@ def read_cells(sheet=nil) value_type = :string v = inlinestr_content excelx_type = :string - y, x = Roo::Base.split_coordinate(c['r']) + y, x = self.class.split_coordinate(c['r']) excelx_value = inlinestr_content #cell.content set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute) end @@ -465,7 +457,7 @@ def read_cells(sheet=nil) value_type = :float cell.content end - y, x = Roo::Base.split_coordinate(c['r']) + y, x = self.class.split_coordinate(c['r']) set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute) end end @@ -523,7 +515,7 @@ def read_comments(sheet=nil) return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment| ref = comment.attributes['ref'].to_s - row,col = Roo::Base.split_coordinate(ref) + row,col = self.class.split_coordinate(ref) comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text| @comment[sheet] ||= {} @comment[sheet][[row,col]] = text.text @@ -541,9 +533,9 @@ def read_hyperlinks(sheet=nil) rels = Hash[rels_doc.xpath("/xmlns:Relationships/xmlns:Relationship").map do |r| [r.attribute('Id').text, r] end] - @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h| + @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink[id]").each do |h| if rel_element = rels[h.attribute('id').text] - row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s) + row,col = self.class.split_coordinate(h.attributes['ref'].to_s) @hyperlink[sheet] ||= {} @hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text end @@ -575,9 +567,23 @@ def process_zipfile(tmpdir, zipfilename, zip, path='') "#{tmpdir}/roo_sharedStrings.xml" elsif entry_name.end_with?('styles.xml') "#{tmpdir}/roo_styles.xml" - elsif entry_name =~ /sheet([0-9]+).xml$/ + elsif entry_name =~ /sheet([0-9]+)?.xml$/ nr = $1 - @sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}" + path = "#{tmpdir}/roo_sheet#{nr.to_i}" + + # Numbers 3.1 exports first sheet without sheet number. Such sheets + # are always added to the beginning of the array which, naturally, + # causes other sheets to be pushed to the next index which could + # lead to sheet references getting overwritten, so we need to + # handle that case specifically. + if nr + sheet_files_index = nr.to_i - 1 + sheet_files_index += 1 if @sheet_files[sheet_files_index] + @sheet_files[sheet_files_index] = path + else + @sheet_files.unshift path + path + end elsif entry_name =~ /comments([0-9]+).xml$/ nr = $1 @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}" @@ -593,7 +599,7 @@ def process_zipfile(tmpdir, zipfilename, zip, path='') end def extract_file(source_zip, entry, destination_path) - open(destination_path,'wb') {|f| + File.open(destination_path,'wb') {|f| f << source_zip.read(entry) } end @@ -655,20 +661,31 @@ def attribute2format(s) end def base_date - @base_date ||= read_base_date + @base_date ||= + begin + # Default to 1900 (minus one day due to excel quirk) but use 1904 if + # it's set in the Workbook's workbookPr + # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx + @workbook_doc.xpath("//xmlns:workbookPr[date1904]").each do |workbookPr| + if workbookPr["date1904"] =~ /true|1/i + return Date.new(1904,01,01) + end + end + Date.new(1899,12,30) + end end - # Default to 1900 (minus one day due to excel quirk) but use 1904 if - # it's set in the Workbook's workbookPr - # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx - def read_base_date - base_date = Date.new(1899,12,30) - @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr| - if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i - base_date = Date.new(1904,01,01) - end - end - base_date + def create_datetime_from(datetime_string) + date_part,time_part = round_time_from(datetime_string).split(' ') + yyyy,mm,dd = date_part.split('-') + hh,mi,ss = time_part.split(':') + DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i) end -end # class + def round_time_from(datetime_string) + date_part,time_part = datetime_string.split(' ') + yyyy,mm,dd = date_part.split('-') + hh,mi,ss = time_part.split(':') + Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0).strftime("%Y-%m-%d %H:%M:%S") + end +end diff --git a/lib/roo/openoffice.rb b/lib/roo/openoffice.rb index dc0c657d..1e5df6a5 100644 --- a/lib/roo/openoffice.rb +++ b/lib/roo/openoffice.rb @@ -189,7 +189,7 @@ def label(labelname) end if @label.has_key? labelname return @label[labelname][1].to_i, - Roo::Base.letter_to_number(@label[labelname][2]), + self.class.letter_to_number(@label[labelname][2]), @label[labelname][0] else return nil,nil,nil @@ -203,7 +203,7 @@ def labels(sheet=nil) @label.map do |label| [ label[0], # name [ label[1][1].to_i, # row - Roo::Base.letter_to_number(label[1][2]), # column + self.class.letter_to_number(label[1][2]), # column label[1][0], # sheet ] ] end diff --git a/lib/roo/spreadsheet.rb b/lib/roo/spreadsheet.rb index 594dc538..822d0c8e 100644 --- a/lib/roo/spreadsheet.rb +++ b/lib/roo/spreadsheet.rb @@ -1,33 +1,33 @@ module Roo class Spreadsheet class << self - def open(file, options = {}) - file = file.respond_to?(:path) ? file.path : file + def open(path, options = {}) + path = path.respond_to?(:path) ? path.path : path extension = if options[:extension] options[:file_warning] = :ignore ".#{options.delete(:extension)}".gsub(/[.]+/, ".") else - File.extname(URI.decode(URI.parse(URI.encode(file)).path)) + File.extname((path =~ URI::regexp) ? URI.parse(URI.encode(path)).path : path) end case extension.downcase when '.xls' - Roo::Excel.new(file, options) + Roo::Excel.new(path, options) when '.xlsx' - Roo::Excelx.new(file, options) + Roo::Excelx.new(path, options) when '.ods' - Roo::OpenOffice.new(file, options) + Roo::OpenOffice.new(path, options) when '.xml' - Roo::Excel2003XML.new(file, options) + Roo::Excel2003XML.new(path, options) when '' - Roo::Google.new(file, options) + Roo::Google.new(path, options) when '.csv' - Roo::CSV.new(file, options) + Roo::CSV.new(path, options) else raise ArgumentError, - "Can't detect the type of #{file} - please use the :extension option to declare its type." + "Can't detect the type of #{path} - please use the :extension option to declare its type." end end end diff --git a/spec/lib/roo/excelx/format_spec.rb b/spec/lib/roo/excelx/format_spec.rb index 815fbc26..488d2379 100644 --- a/spec/lib/roo/excelx/format_spec.rb +++ b/spec/lib/roo/excelx/format_spec.rb @@ -40,7 +40,8 @@ 'dd/mmm/yy\\ hh:mm' => :datetime, 'dd/mmm/yy' => :date, # 2011-05-21 'yyyy-mm-dd' => :date, # 2011-09-16 - 'yyyy-mm-dd;@' => :date + 'yyyy-mm-dd;@' => :date, + '#0_);[Red]\(0\)' => :float }.each do |format, type| it "translates #{format} to #{type}" do Roo::Excelx::Format.to_type(format).should == type diff --git a/spec/lib/roo/spreadsheet_spec.rb b/spec/lib/roo/spreadsheet_spec.rb index ba58cd0e..39d1af21 100644 --- a/spec/lib/roo/spreadsheet_spec.rb +++ b/spec/lib/roo/spreadsheet_spec.rb @@ -2,6 +2,15 @@ describe Roo::Spreadsheet do describe '.open' do + context 'when the file name includes a space' do + let(:filename) { 'great scott.xls' } + + it 'loads the proper type' do + expect(Roo::Excel).to receive(:new).with(filename, {}) + Roo::Spreadsheet.open(filename) + end + end + context 'when the file extension is uppercase' do let(:filename) { 'file.XLS' } diff --git a/test/files/datetime.ods b/test/files/datetime.ods index c46784c6..2c515dbd 100644 Binary files a/test/files/datetime.ods and b/test/files/datetime.ods differ diff --git a/test/files/datetime.xls b/test/files/datetime.xls index 15141050..61158a06 100644 Binary files a/test/files/datetime.xls and b/test/files/datetime.xls differ diff --git a/test/files/datetime.xlsx b/test/files/datetime.xlsx index 38415ee7..6bef0c7a 100644 Binary files a/test/files/datetime.xlsx and b/test/files/datetime.xlsx differ diff --git a/test/files/datetime.xml b/test/files/datetime.xml index fe4cae6f..f61ca3a3 100755 --- a/test/files/datetime.xml +++ b/test/files/datetime.xml @@ -28,6 +28,9 @@ + @@ -36,7 +39,7 @@ - @@ -69,6 +72,11 @@ 1961-11-21T00:00:00.000 1961-11-21T00:00:00.000 + + 2013-11-05T11:45:00.000 + 2013-11-05T11:45:00.000 + 2013-11-05T11:45:00.000 +
diff --git a/test/files/numbers-export.xlsx b/test/files/numbers-export.xlsx new file mode 100644 index 00000000..472471e8 Binary files /dev/null and b/test/files/numbers-export.xlsx differ diff --git a/test/test_helper.rb b/test/test_helper.rb index d9b7ff7d..c5f8823b 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -8,6 +8,8 @@ require 'logger' require 'date' require 'webmock/test_unit' +require 'pry' +require 'pry-nav' # require gem files require File.dirname(__FILE__) + '/../lib/roo' @@ -47,7 +49,7 @@ def file_diff(fn1,fn2) if f2.eof? == false while f2.eof? == false line2 = f2.gets - result ">#{line2}\n" + result << ">#{line2}\n" end end end diff --git a/test/test_roo.rb b/test/test_roo.rb index 3f441ec0..d13aa453 100644 --- a/test/test_roo.rb +++ b/test/test_roo.rb @@ -1413,6 +1413,9 @@ def test_datetime assert_equal Date.new(1961,11,21), oo.cell('a',7) assert_equal Date.new(1961,11,21), oo.cell('b',7) assert_equal Date.new(1961,11,21), oo.cell('c',7) + assert_equal DateTime.new(2013,11,5,11,45,00), oo.cell('a',8) + assert_equal DateTime.new(2013,11,5,11,45,00), oo.cell('b',8) + assert_equal DateTime.new(2013,11,5,11,45,00), oo.cell('c',8) end end @@ -2315,4 +2318,19 @@ def test_bug_numbered_sheet_names end end + def test_parsing_xslx_from_numbers + return unless EXCELX + xlsx = Roo::Excelx.new(File.join(TESTDIR, "numbers-export.xlsx")) + + xlsx.default_sheet = xlsx.sheets.first + assert_equal 'Sheet 1', xlsx.cell('a',1) + + # Another buggy behavior of Numbers 3.1: if a warkbook has more than a + # single sheet, all sheets except the first one will have an extra row and + # column added to the beginning. That's why we assert against cell B2 and + # not A1 + xlsx.default_sheet = xlsx.sheets.last + assert_equal 'Sheet 2', xlsx.cell('b',2) + end + end # class