diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a099c76..86a9484a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## [Unreleased][unreleased] +## [2.1.0] - 2015-07-18 ### Added - Added support for Excel 2007 `xlsm` files. [#232](https://github.com/roo-rb/roo/pull/232) - Roo::Excelx returns an enumerator when calling each_row_streaming without a block. [#224](https://github.com/roo-rb/roo/pull/224) diff --git a/README.md b/README.md index 1153eee8..2f091709 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Install as a gem Or add it to your Gemfile ```ruby -gem 'roo', '~> 2.0.0' +gem 'roo', '~> 2.1.0' ``` ## Usage @@ -143,7 +143,7 @@ sheet.to_xml sheet.to_yaml ``` -### Excel (xlsx) Support +### Excel (xlsx and xlsm) Support Stream rows from an Excelx spreadsheet. diff --git a/lib/roo.rb b/lib/roo.rb index 9b2e0c8e..bedcd5be 100644 --- a/lib/roo.rb +++ b/lib/roo.rb @@ -1,3 +1,4 @@ +require 'roo/constants' require 'roo/spreadsheet' require 'roo/base' @@ -17,11 +18,11 @@ module Roo def self.const_missing(const_name) case const_name when :Excel - raise "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel." + raise ROO_EXCEL_NOTICE when :Excel2003XML - raise "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML." + raise ROO_EXCELML_NOTICE when :Google - raise "Google support has been extracted to roo-google. Install roo-google to use Roo::Google." + raise ROO_GOOGLE_NOTICE else super end diff --git a/lib/roo/constants.rb b/lib/roo/constants.rb new file mode 100644 index 00000000..90d54eec --- /dev/null +++ b/lib/roo/constants.rb @@ -0,0 +1,5 @@ +module Roo + ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel.".freeze + ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML.".freeze + ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google.".freeze +end \ No newline at end of file diff --git a/lib/roo/libre_office.rb b/lib/roo/libre_office.rb index 5dd0b2d2..6aa2332a 100644 --- a/lib/roo/libre_office.rb +++ b/lib/roo/libre_office.rb @@ -1,5 +1,4 @@ require 'roo/open_office' # LibreOffice is just an alias for Roo::OpenOffice class -class Roo::LibreOffice < Roo::OpenOffice -end +Roo::LibreOffice = Roo::OpenOffice diff --git a/lib/roo/open_office.rb b/lib/roo/open_office.rb index 183b03aa..7d5bfc20 100644 --- a/lib/roo/open_office.rb +++ b/lib/roo/open_office.rb @@ -4,318 +4,306 @@ require 'zip/filesystem' require 'roo/font' -class Roo::OpenOffice < Roo::Base - # initialization and opening of a spreadsheet file - # values for packed: :zip - def initialize(filename, options={}) - packed = options[:packed] - file_warning = options[:file_warning] || :error - - @only_visible_sheets = options[:only_visible_sheets] - file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed) - @tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root]) - @filename = local_filename(filename, @tmpdir, packed) - #TODO: @cells_read[:default] = false - Zip::File.open(@filename) do |zip_file| - if content_entry = zip_file.glob("content.xml").first - roo_content_xml_path = File.join(@tmpdir, 'roo_content.xml') +module Roo + class OpenOffice < Roo::Base + ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze + XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze + XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze + + # initialization and opening of a spreadsheet file + # values for packed: :zip + def initialize(filename, options = {}) + packed = options[:packed] + file_warning = options[:file_warning] || :error + + @only_visible_sheets = options[:only_visible_sheets] + file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed) + @tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root]) + @filename = local_filename(filename, @tmpdir, packed) + # TODO: @cells_read[:default] = false + open_oo_file(options) + super(filename, options) + initialize_default_variables + rescue => e # clean up any temp files, but only if an error was raised + close + raise e + end + + def open_oo_file(options) + Zip::File.open(@filename) do |zip_file| + content_entry = zip_file.glob('content.xml').first + fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry + + roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml') content_entry.extract(roo_content_xml_path) - decrypt_if_necessary( - zip_file, - content_entry, - roo_content_xml_path, - options - ) - else - raise ArgumentError, 'file missing required content.xml' + decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options) end end - super(filename, options) - @formula = Hash.new - @style = Hash.new - @style_defaults = Hash.new { |h,k| h[k] = [] } - @table_display = Hash.new { |h,k| h[k] = true } - @font_style_definitions = Hash.new - @comment = Hash.new - @comments_read = Hash.new - rescue => e # clean up any temp files, but only if an error was raised - close - raise e - end - def method_missing(m,*args) - read_labels - # is method name a label name - if @label.has_key?(m.to_s) - row,col = label(m.to_s) - cell(row,col) - else - # call super for methods like #a1 - super + def initialize_default_variables + @formula = {} + @style = {} + @style_defaults = Hash.new { |h, k| h[k] = [] } + @table_display = Hash.new { |h, k| h[k] = true } + @font_style_definitions = {} + @comment = {} + @comments_read = {} end - end - # Returns the content of a spreadsheet-cell. - # (1,1) is the upper left corner. - # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the - # cell at the first line and first row. - def cell(row, col, sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - if celltype(row,col,sheet) == :date - yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-') - return Date.new(yyyy.to_i,mm.to_i,dd.to_i) - end - @cell[sheet][[row,col]] - end + def method_missing(m, *args) + read_labels + # is method name a label name + if @label.key?(m.to_s) + row, col = label(m.to_s) + cell(row, col) + else + # call super for methods like #a1 + super + end + end - # Returns the formula at (row,col). - # Returns nil if there is no formula. - # The method #formula? checks if there is a formula. - def formula(row,col,sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - @formula[sheet][[row,col]] - end + # Returns the content of a spreadsheet-cell. + # (1,1) is the upper left corner. + # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the + # cell at the first line and first row. + def cell(row, col, sheet = nil) + sheet ||= default_sheet + read_cells(sheet) + row, col = normalize(row, col) + if celltype(row, col, sheet) == :date + yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-') + return Date.new(yyyy.to_i, mm.to_i, dd.to_i) + end - # Predicate methods really should return a boolean - # value. Hopefully no one was relying on the fact that this - # previously returned either nil/formula - def formula?(*args) - !!formula(*args) - end + @cell[sheet][[row, col]] + end + + # Returns the formula at (row,col). + # Returns nil if there is no formula. + # The method #formula? checks if there is a formula. + def formula(row, col, sheet = nil) + sheet ||= default_sheet + read_cells(sheet) + row, col = normalize(row, col) + @formula[sheet][[row, col]] + end - # returns each formula in the selected sheet as an array of elements - # [row, col, formula] - def formulas(sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - if @formula[sheet] + # Predicate methods really should return a boolean + # value. Hopefully no one was relying on the fact that this + # previously returned either nil/formula + def formula?(*args) + !!formula(*args) + end + + # returns each formula in the selected sheet as an array of elements + # [row, col, formula] + def formulas(sheet = nil) + sheet ||= default_sheet + read_cells(sheet) + return [] unless @formula[sheet] @formula[sheet].each.collect do |elem| [elem[0][0], elem[0][1], elem[1]] end - else - [] end - end - # Given a cell, return the cell's style - def font(row, col, sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default' - @font_style_definitions[style_name] - end + # Given a cell, return the cell's style + def font(row, col, sheet = nil) + sheet ||= default_sheet + read_cells(sheet) + row, col = normalize(row, col) + style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default' + @font_style_definitions[style_name] + end - # returns the type of a cell: - # * :float - # * :string - # * :date - # * :percentage - # * :formula - # * :time - # * :datetime - def celltype(row,col,sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - if @formula[sheet][[row,col]] - return :formula - else - @cell_type[sheet][[row,col]] + # returns the type of a cell: + # * :float + # * :string + # * :date + # * :percentage + # * :formula + # * :time + # * :datetime + def celltype(row, col, sheet = nil) + sheet ||= default_sheet + read_cells(sheet) + row, col = normalize(row, col) + @formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]] end - end - def sheets - unless @table_display.any? - doc.xpath("//*[local-name()='automatic-styles']").each do |style| - read_table_styles(style) + def sheets + unless @table_display.any? + doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style| + read_table_styles(style) + end end + doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet| + if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')] + sheet.attributes['name'].value + end + end.compact end - doc.xpath("//*[local-name()='table']").map do |sheet| - if !@only_visible_sheets || @table_display[attr(sheet,'style-name')] - sheet.attributes["name"].value - end - end.compact - end - # version of the Roo::OpenOffice document - # at 2007 this is always "1.0" - def officeversion - oo_version - @officeversion - end - - # shows the internal representation of all cells - # mainly for debugging purposes - def to_s(sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - @cell[sheet].inspect - end + # version of the Roo::OpenOffice document + # at 2007 this is always "1.0" + def officeversion + oo_version + @officeversion + end - # returns the row,col values of the labelled cell - # (nil,nil) if label is not defined - def label(labelname) - read_labels - unless @label.size > 0 - return nil,nil,nil + # shows the internal representation of all cells + # mainly for debugging purposes + def to_s(sheet = nil) + sheet ||= default_sheet + read_cells(sheet) + @cell[sheet].inspect end - if @label.has_key? labelname - return @label[labelname][1].to_i, + + # returns the row,col values of the labelled cell + # (nil,nil) if label is not defined + def label(labelname) + read_labels + return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname) + [ + @label[labelname][1].to_i, ::Roo::Utils.letter_to_number(@label[labelname][2]), @label[labelname][0] - else - return nil,nil,nil + ] end - end - # Returns an array which all labels. Each element is an array with - # [labelname, [row,col,sheetname]] - def labels(sheet=nil) - read_labels - @label.map do |label| - [ label[0], # name - [ label[1][1].to_i, # row + # Returns an array which all labels. Each element is an array with + # [labelname, [row,col,sheetname]] + def labels(_sheet = nil) + read_labels + @label.map do |label| + [label[0], # name + [label[1][1].to_i, # row ::Roo::Utils.letter_to_number(label[1][2]), # column label[1][0], # sheet - ] ] + ]] + end end - end - # returns the comment at (row/col) - # nil if there is no comment - def comment(row,col,sheet=nil) - sheet ||= default_sheet - read_cells(sheet) - row,col = normalize(row,col) - return nil unless @comment[sheet] - @comment[sheet][[row,col]] - end + # returns the comment at (row/col) + # nil if there is no comment + def comment(row, col, sheet = nil) + sheet ||= default_sheet + read_cells(sheet) + row, col = normalize(row, col) + return nil unless @comment[sheet] + @comment[sheet][[row, col]] + end - # returns each comment in the selected sheet as an array of elements - # [row, col, comment] - def comments(sheet=nil) - sheet ||= default_sheet - read_comments(sheet) unless @comments_read[sheet] - if @comment[sheet] + # returns each comment in the selected sheet as an array of elements + # [row, col, comment] + def comments(sheet = nil) + sheet ||= default_sheet + read_comments(sheet) unless @comments_read[sheet] + return [] unless @comment[sheet] @comment[sheet].each.collect do |elem| - [elem[0][0],elem[0][1],elem[1]] + [elem[0][0], elem[0][1], elem[1]] end - else - [] end - end - private - - # If the ODS file has an encryption-data element, then try to decrypt. - # If successful, the temporary content.xml will be overwritten with - # decrypted contents. - def decrypt_if_necessary( - zip_file, - content_entry, - roo_content_xml_path, options - ) - # Check if content.xml is encrypted by extracting manifest.xml - # and searching for a manifest:encryption-data element - - if manifest_entry = zip_file.glob("META-INF/manifest.xml").first - roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml") - manifest_entry.extract(roo_manifest_xml_path) - manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path) - - # XPath search for manifest:encryption-data only for the content.xml - # file - - encryption_data = manifest.xpath( - "//manifest:file-entry[@manifest:full-path='content.xml']"\ + private + + # If the ODS file has an encryption-data element, then try to decrypt. + # If successful, the temporary content.xml will be overwritten with + # decrypted contents. + def decrypt_if_necessary( + zip_file, + content_entry, + roo_content_xml_path, options + ) + # Check if content.xml is encrypted by extracting manifest.xml + # and searching for a manifest:encryption-data element + + if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first) + roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml') + manifest_entry.extract(roo_manifest_xml_path) + manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path) + + # XPath search for manifest:encryption-data only for the content.xml + # file + + encryption_data = manifest.xpath( + "//manifest:file-entry[@manifest:full-path='content.xml']"\ "/manifest:encryption-data" - ).first + ).first - # If XPath returns a node, then we know content.xml is encrypted + # If XPath returns a node, then we know content.xml is encrypted - if !encryption_data.nil? + unless encryption_data.nil? - # Since we know it's encrypted, we check for the password option - # and if it doesn't exist, raise an argument error + # Since we know it's encrypted, we check for the password option + # and if it doesn't exist, raise an argument error - password = options[:password] - if !password.nil? - perform_decryption( - encryption_data, - password, - content_entry, - roo_content_xml_path - ) - else - raise ArgumentError, - 'file is encrypted but password was not supplied' + password = options[:password] + if !password.nil? + perform_decryption( + encryption_data, + password, + content_entry, + roo_content_xml_path + ) + else + fail ArgumentError, 'file is encrypted but password was not supplied' + end end + else + fail ArgumentError, 'file missing required META-INF/manifest.xml' end - else - raise ArgumentError, 'file missing required META-INF/manifest.xml' end - end - # Process the ODS encryption manifest and perform the decryption - def perform_decryption( - encryption_data, - password, - content_entry, - roo_content_xml_path - ) - # Extract various expected attributes from the manifest that - # describe the encryption - - algorithm_node = encryption_data.xpath("manifest:algorithm").first - key_derivation_node = - encryption_data.xpath("manifest:key-derivation").first - start_key_generation_node = - encryption_data.xpath("manifest:start-key-generation").first - - # If we have all the expected elements, then we can perform - # the decryption. - - if !algorithm_node.nil? && !key_derivation_node.nil? && - !start_key_generation_node.nil? - - # The algorithm is a URI describing the algorithm used - algorithm = algorithm_node['manifest:algorithm-name'] - - # The initialization vector is base-64 encoded - iv = Base64.decode64( - algorithm_node['manifest:initialisation-vector'] - ) - key_derivation_name = - key_derivation_node['manifest:key-derivation-name'] - key_size = key_derivation_node['manifest:key-size'].to_i - iteration_count = - key_derivation_node['manifest:iteration-count'].to_i - salt = Base64.decode64(key_derivation_node['manifest:salt']) - - # The key is hashed with an algorithm represented by this URI - key_generation_name = - start_key_generation_node[ - 'manifest:start-key-generation-name' - ] - key_generation_size = - start_key_generation_node['manifest:key-size'].to_i + # Process the ODS encryption manifest and perform the decryption + def perform_decryption( + encryption_data, + password, + content_entry, + roo_content_xml_path + ) + # Extract various expected attributes from the manifest that + # describe the encryption + + algorithm_node = encryption_data.xpath('manifest:algorithm').first + key_derivation_node = + encryption_data.xpath('manifest:key-derivation').first + start_key_generation_node = + encryption_data.xpath('manifest:start-key-generation').first + + # If we have all the expected elements, then we can perform + # the decryption. + + if !algorithm_node.nil? && !key_derivation_node.nil? && + !start_key_generation_node.nil? + + # The algorithm is a URI describing the algorithm used + algorithm = algorithm_node['manifest:algorithm-name'] + + # The initialization vector is base-64 encoded + iv = Base64.decode64( + algorithm_node['manifest:initialisation-vector'] + ) + key_derivation_name = key_derivation_node['manifest:key-derivation-name'] + key_size = key_derivation_node['manifest:key-size'].to_i + iteration_count = key_derivation_node['manifest:iteration-count'].to_i + salt = Base64.decode64(key_derivation_node['manifest:salt']) + + # The key is hashed with an algorithm represented by this URI + key_generation_name = + start_key_generation_node[ + 'manifest:start-key-generation-name' + ] + key_generation_size = start_key_generation_node['manifest:key-size'].to_i hashed_password = password - key = nil + key = nil - if key_generation_name.eql?( - "http://www.w3.org/2000/09/xmldsig#sha256" - ) - hashed_password = Digest::SHA256.digest(password) - else - raise ArgumentError, 'Unknown key generation algorithm ' + - key_generation_name - end + if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256' + + hashed_password = Digest::SHA256.digest(password) + else + fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}" + end cipher = find_cipher( algorithm, @@ -336,210 +324,177 @@ def perform_decryption( Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted) ) rescue StandardError => error - raise ArgumentError, - 'Invalid password or other data error: ' + error.to_s + raise ArgumentError, "Invalid password or other data error: #{error}" end - else - raise ArgumentError, - 'manifest.xml missing encryption-data elements' + else + fail ArgumentError, 'manifest.xml missing encryption-data elements' + end end - end - # Create a cipher based on an ODS algorithm URI from manifest.xml - def find_cipher( - algorithm, - key_derivation_name, - hashed_password, - salt, - iteration_count, - iv - ) - cipher = nil - if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc" + # Create a cipher based on an ODS algorithm URI from manifest.xml + # params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv + def find_cipher(*args) + fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc' + cipher = OpenSSL::Cipher.new('AES-256-CBC') cipher.decrypt cipher.padding = 0 - cipher.key = find_cipher_key( - cipher, - key_derivation_name, - hashed_password, - salt, - iteration_count - ) - cipher.iv = iv - else - raise ArgumentError, 'Unknown algorithm ' + algorithm - end - cipher - end + cipher.key = find_cipher_key(cipher, *args[1..4]) + cipher.iv = args[5] - # Create a cipher key based on an ODS algorithm string from manifest.xml - def find_cipher_key( - cipher, - key_derivation_name, - hashed_password, - salt, - iteration_count - ) - if key_derivation_name.eql? "PBKDF2" - key = OpenSSL::PKCS5.pbkdf2_hmac_sha1( - hashed_password, - salt, - iteration_count, - cipher.key_len - ) - else - raise ArgumentError, 'Unknown key derivation name ' + - key_derivation_name - end - key - end + cipher + end - # Block decrypt raw bytes from the zip file based on the cipher - def decrypt(content_entry, cipher) - # Zip::Entry.extract writes a 0-length file when trying - # to extract an encrypted stream, so we read the - # raw bytes based on the offset and lengths - decrypted = "" - File.open(@filename, "rb") do |zipfile| - zipfile.seek( - content_entry.local_header_offset + - content_entry.calculate_local_header_size - ) - total_to_read = content_entry.compressed_size - - block_size = 4096 - block_size = total_to_read if block_size > total_to_read - - while buffer = zipfile.read(block_size) - decrypted += cipher.update(buffer) - total_to_read -= buffer.length - - break if total_to_read == 0 - - block_size = total_to_read if block_size > total_to_read - end + # Create a cipher key based on an ODS algorithm string from manifest.xml + def find_cipher_key(*args) + fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2' + + OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len) end - decrypted + cipher.final - end + # Block decrypt raw bytes from the zip file based on the cipher + def decrypt(content_entry, cipher) + # Zip::Entry.extract writes a 0-length file when trying + # to extract an encrypted stream, so we read the + # raw bytes based on the offset and lengths + decrypted = '' + File.open(@filename, 'rb') do |zipfile| + zipfile.seek( + content_entry.local_header_offset + + content_entry.calculate_local_header_size + ) + total_to_read = content_entry.compressed_size - def doc - @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, "roo_content.xml")) - end + block_size = 4096 + block_size = total_to_read if block_size > total_to_read + + while (buffer = zipfile.read(block_size)) + decrypted += cipher.update(buffer) + total_to_read -= buffer.length - # read the version of the OO-Version - def oo_version - doc.xpath("//*[local-name()='document-content']").each do |office| - @officeversion = attr(office,'version') + break if total_to_read == 0 + + block_size = total_to_read if block_size > total_to_read + end + end + + decrypted + cipher.final + end + + def doc + @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml')) + end + + # read the version of the OO-Version + def oo_version + doc.xpath("//*[local-name()='document-content']").each do |office| + @officeversion = attribute(office, 'version') + end end - end - # helper function to set the internal representation of cells - def set_cell_values(sheet,x,y,i,v,value_type,formula,table_cell,str_v,style_name) - key = [y,x+i] - @cell_type[sheet] = {} unless @cell_type[sheet] - @cell_type[sheet][key] = Roo::OpenOffice.oo_type_2_roo_type(value_type) - @formula[sheet] = {} unless @formula[sheet] - if formula - ['of:', 'oooc:'].each do |prefix| - if formula[0,prefix.length] == prefix - formula = formula[prefix.length..-1] + # helper function to set the internal representation of cells + def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name) + key = [y, x + i] + @cell_type[sheet] ||= {} + @cell_type[sheet][key] = value_type.to_sym if value_type + @formula[sheet] ||= {} + if formula + ['of:', 'oooc:'].each do |prefix| + if formula[0, prefix.length] == prefix + formula = formula[prefix.length..-1] + end end + @formula[sheet][key] = formula end - @formula[sheet][key] = formula - end - @cell[sheet] = {} unless @cell[sheet] - @style[sheet] = {} unless @style[sheet] - @style[sheet][key] = style_name - case @cell_type[sheet][key] - when :float - @cell[sheet][key] = v.to_f - when :string - @cell[sheet][key] = str_v - when :date - #TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size - if attr(table_cell,'date-value').size != "XXXX-XX-XX".size - #-- dann ist noch eine Uhrzeit vorhanden - #-- "1961-11-21T12:17:18" - @cell[sheet][key] = DateTime.parse(attr(table_cell,'date-value').to_s) - @cell_type[sheet][key] = :datetime + @cell[sheet] ||= {} + @style[sheet] ||= {} + @style[sheet][key] = style_name + case @cell_type[sheet][key] + when :float, :percentage + @cell[sheet][key] = v.to_f + when :string + @cell[sheet][key] = str_v + when :date + # TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size + if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size + #-- dann ist noch eine Uhrzeit vorhanden + #-- "1961-11-21T12:17:18" + @cell[sheet][key] = DateTime.parse(attribute(table_cell, 'date-value').to_s) + @cell_type[sheet][key] = :datetime + else + @cell[sheet][key] = table_cell.attributes['date-value'] + end + when :time + hms = v.split(':') + @cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i else - @cell[sheet][key] = table_cell.attributes['date-value'] + @cell[sheet][key] = v end - when :percentage - @cell[sheet][key] = v.to_f - when :time - hms = v.split(':') - @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i - else - @cell[sheet][key] = v end - end - # read all cells in the selected sheet - #-- - # the following construct means '4 blanks' - # some content - #++ - def read_cells(sheet = default_sheet) - validate_sheet!(sheet) - return if @cells_read[sheet] - - sheet_found = false - doc.xpath("//*[local-name()='table']").each do |ws| - if sheet == attr(ws,'name') + # read all cells in the selected sheet + #-- + # the following construct means '4 blanks' + # some content + #++ + def read_cells(sheet = default_sheet) + validate_sheet!(sheet) + return if @cells_read[sheet] + + sheet_found = false + doc.xpath("//*[local-name()='table']").each do |ws| + next unless sheet == attribute(ws, 'name') + sheet_found = true - col = 1 - row = 1 + col = 1 + row = 1 ws.children.each do |table_element| case table_element.name when 'table-column' @style_defaults[sheet] << table_element.attributes['default-cell-style-name'] when 'table-row' if table_element.attributes['number-rows-repeated'] - skip_row = attr(table_element,'number-rows-repeated').to_s.to_i - row = row + skip_row - 1 + skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i + row = row + skip_row - 1 end table_element.children.each do |cell| - skip_col = attr(cell, 'number-columns-repeated') - formula = attr(cell,'formula') - value_type = attr(cell,'value-type') - v = attr(cell,'value') - style_name = attr(cell,'style-name') + skip_col = attribute(cell, 'number-columns-repeated') + formula = attribute(cell, 'formula') + value_type = attribute(cell, 'value-type') + v = attribute(cell, 'value') + style_name = attribute(cell, 'style-name') case value_type when 'string' - str_v = '' + str_v = '' # insert \n if there is more than one paragraph para_count = 0 cell.children.each do |str| # begin comments -=begin -- - - - 2011-09-20T00:00:00 - Kommentar fuer B4 - - B4 (mit Kommentar) - -=end + #=begin + #- + # - + # 2011-09-20T00:00:00 + # Kommentar fuer B4 + # + # B4 (mit Kommentar) + # + #=end if str.name == 'annotation' str.children.each do |annotation| - if annotation.name == 'p' - # @comment ist ein Hash mit Sheet als Key (wie bei @cell) - # innerhalb eines Elements besteht ein Eintrag aus einem - # weiteren Hash mit Key [row,col] und dem eigentlichen - # Kommentartext als Inhalt - @comment[sheet] = Hash.new unless @comment[sheet] - key = [row,col] - @comment[sheet][key] = annotation.text - end + next unless annotation.name == 'p' + # @comment ist ein Hash mit Sheet als Key (wie bei @cell) + # innerhalb eines Elements besteht ein Eintrag aus einem + # weiteren Hash mit Key [row,col] und dem eigentlichen + # Kommentartext als Inhalt + @comment[sheet] = Hash.new unless @comment[sheet] + key = [row, col] + @comment[sheet][key] = annotation.text end end # end comments if str.name == 'p' - v = str.content - str_v += "\n" if para_count > 0 + v = str.content + str_v += "\n" if para_count > 0 para_count += 1 if str.children.size > 1 str_v += children_to_string(str.children) @@ -548,32 +503,28 @@ def read_cells(sheet = default_sheet) str_v += child.content #.text end end - str_v.gsub!(/'/,"'") # special case not supported by unescapeHTML + str_v.gsub!(/'/, "'") # special case not supported by unescapeHTML str_v = CGI.unescapeHTML(str_v) end # == 'p' end when 'time' cell.children.each do |str| - if str.name == 'p' - v = str.content - end + v = str.content if str.name == 'p' end when '', nil, 'date', 'percentage', 'float' # when 'boolean' - v = attr(cell,'boolean-value').to_s - else - # raise "unknown type #{value_type}" + v = attribute(cell, 'boolean-value').to_s end if skip_col - if v != nil or cell.attributes['date-value'] - 0.upto(skip_col.to_i-1) do |i| - set_cell_values(sheet,col,row,i,v,value_type,formula,cell,str_v,style_name) + if !v.nil? || cell.attributes['date-value'] + 0.upto(skip_col.to_i - 1) do |i| + set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name) end end col += (skip_col.to_i - 1) end # if skip - set_cell_values(sheet,col,row,0,v,value_type,formula,cell,str_v,style_name) + set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name) col += 1 end row += 1 @@ -581,101 +532,83 @@ def read_cells(sheet = default_sheet) end end end + doc.xpath("//*[local-name()='automatic-styles']").each do |style| + read_styles(style) + end + + fail RangeError unless sheet_found + + @cells_read[sheet] = true + @comments_read[sheet] = true end - doc.xpath("//*[local-name()='automatic-styles']").each do |style| - read_styles(style) - end - if !sheet_found - raise RangeError - end - @cells_read[sheet] = true - @comments_read[sheet] = true - end - # Only calls read_cells because Roo::Base calls read_comments - # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects - def read_comments(sheet=nil) - read_cells(sheet) - end + # Only calls read_cells because Roo::Base calls read_comments + # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects + def read_comments(sheet = nil) + read_cells(sheet) + end - def read_labels - @label ||= Hash[doc.xpath("//table:named-range").map do |ne| - #- - # $Sheet1.$C$5 - #+ - name = attr(ne,'name').to_s - sheetname,coords = attr(ne,'cell-range-address').to_s.split('.$') - col, row = coords.split('$') - sheetname = sheetname[1..-1] if sheetname[0,1] == '$' - [name, [sheetname,row,col]] - end] - end + def read_labels + @label ||= Hash[doc.xpath('//table:named-range').map do |ne| + #- + # $Sheet1.$C$5 + #+ + name = attribute(ne, 'name').to_s + sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$') + col, row = coords.split('$') + sheetname = sheetname[1..-1] if sheetname[0, 1] == '$' + [name, [sheetname, row, col]] + end] + end - def read_styles(style_elements) - @font_style_definitions['Default'] = Roo::Font.new - style_elements.each do |style| - next unless style.name == 'style' - style_name = attr(style,'name') - style.each do |properties| - font = Roo::OpenOffice::Font.new - font.bold = attr(properties,'font-weight') - font.italic = attr(properties,'font-style') - font.underline = attr(properties,'text-underline-style') - @font_style_definitions[style_name] = font + def read_styles(style_elements) + @font_style_definitions['Default'] = Roo::Font.new + style_elements.each do |style| + next unless style.name == 'style' + style_name = attribute(style, 'name') + style.each do |properties| + font = Roo::OpenOffice::Font.new + font.bold = attribute(properties, 'font-weight') + font.italic = attribute(properties, 'font-style') + font.underline = attribute(properties, 'text-underline-style') + @font_style_definitions[style_name] = font + end end end - end - def read_table_styles(styles) - styles.children.each do |style| - next unless style.name == 'style' - style_name = attr(style,'name') - style.children.each do |properties| - display = attr(properties,'display') - next unless display - @table_display[style_name] = (display == 'true') + def read_table_styles(styles) + styles.children.each do |style| + next unless style.name == 'style' + style_name = attribute(style, 'name') + style.children.each do |properties| + display = attribute(properties, 'display') + next unless display + @table_display[style_name] = (display == 'true') + end end end - end - - A_ROO_TYPE = { - "float" => :float, - "string" => :string, - "date" => :date, - "percentage" => :percentage, - "time" => :time, - } - - def self.oo_type_2_roo_type(ootype) - return A_ROO_TYPE[ootype] - end - # helper method to convert compressed spaces and other elements within - # an text into a string - def children_to_string(children) - result = '' - children.each {|child| - if child.text? - result = result + child.content - else - if child.name == 's' - compressed_spaces = child.attributes['c'].to_s.to_i - # no explicit number means a count of 1: - if compressed_spaces == 0 - compressed_spaces = 1 - end - result = result + " "*compressed_spaces + # helper method to convert compressed spaces and other elements within + # an text into a string + # FIXME: add a test for compressed_spaces == 0. It's not currently tested. + def children_to_string(children) + children.map do |child| + if child.text? + child.content else - result = result + child.content + if child.name == 's' + compressed_spaces = child.attributes['c'].to_s.to_i + # no explicit number means a count of 1: + compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces + else + child.content + end end - end - } - result - end + end.join + end - def attr(node, attr_name) - if node.attributes[attr_name] - node.attributes[attr_name].value + def attribute(node, attr_name) + node.attributes[attr_name].value if node.attributes[attr_name] end end end diff --git a/lib/roo/version.rb b/lib/roo/version.rb index 3fdfd7c7..8a38ad23 100644 --- a/lib/roo/version.rb +++ b/lib/roo/version.rb @@ -1,3 +1,3 @@ module Roo - VERSION = "2.0.1.alpha" + VERSION = "2.1.0" end