From f0c683f4abcb6ce34269a12026b25aa17ecb5ff9 Mon Sep 17 00:00:00 2001 From: DmitriyFirsov Date: Mon, 18 Mar 2024 22:29:02 +0100 Subject: [PATCH] Add linter (#119) * added rubocop * applied lint * added lint check in ci --- .github/workflows/test.yml | 15 ++++ .gitignore | 2 + .rubocop.yml | 27 ++++++++ Rakefile | 4 +- creek.gemspec | 28 ++++---- lib/creek/book.rb | 58 ++++++++-------- lib/creek/drawing.rb | 36 +++++----- lib/creek/shared_strings.rb | 31 ++++----- lib/creek/sheet.rb | 118 ++++++++++++++++---------------- lib/creek/styles.rb | 17 ++--- lib/creek/styles/constants.rb | 22 +++--- lib/creek/styles/converter.rb | 48 ++++++------- lib/creek/styles/style_types.rb | 28 ++++---- lib/creek/utils.rb | 2 +- lib/creek/version.rb | 2 +- spec/drawing_spec.rb | 4 +- spec/shared_string_spec.rb | 2 - spec/sheet_spec.rb | 8 ++- spec/styles/converter_spec.rb | 10 ++- spec/styles/style_types_spec.rb | 6 +- spec/test_spec.rb | 72 ++++++++++--------- 21 files changed, 288 insertions(+), 252 deletions(-) create mode 100644 .rubocop.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fffbe9d..e310bcd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,12 +17,27 @@ permissions: contents: read jobs: + lint: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.1" + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + - name: Run lint + run: bundle exec rubocop + test: runs-on: ubuntu-latest strategy: matrix: ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2'] + needs: + - lint steps: - uses: actions/checkout@v3 diff --git a/.gitignore b/.gitignore index 650a897..4d78914 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ tmp # Mac finder artifacts .DS_Store + +.idea diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..2fce36d --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,27 @@ +Gemspec/RequiredRubyVersion: + Enabled: false + +Layout/LineLength: + Enabled: false +Metrics: + Enabled: false +Naming/ConstantName: + Enabled: false + +Style/FrozenStringLiteralComment: + Enabled: false +Style/Documentation: + Enabled: false +Style/AndOr: + Enabled: false +Style/StringConcatenation: + Enabled: false +Style/ClassAndModuleChildren: + Enabled: false +Style/OptionalBooleanParameter: + Enabled: false +Style/TernaryParentheses: + EnforcedStyle: require_parentheses_when_complex + +Naming/PredicateName: + Enabled: false diff --git a/Rakefile b/Rakefile index b8fe45b..6f6f4e6 100644 --- a/Rakefile +++ b/Rakefile @@ -1,7 +1,7 @@ -require "bundler/gem_tasks" +require 'bundler/gem_tasks' require 'rspec/core/rake_task' RSpec::Core::RakeTask.new('spec') # If you want to make this the default task -task :default => :spec \ No newline at end of file +task default: :spec diff --git a/creek.gemspec b/creek.gemspec index ef6bce5..3fb2ca9 100644 --- a/creek.gemspec +++ b/creek.gemspec @@ -1,29 +1,29 @@ -# coding: utf-8 -lib = File.expand_path('../lib', __FILE__) +lib = File.expand_path('lib', __dir__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'creek/version' Gem::Specification.new do |spec| - spec.name = "creek" + spec.name = 'creek' spec.version = Creek::VERSION - spec.authors = ["pythonicrubyist"] - spec.email = ["pythonicrubyist@gmail.com"] - spec.description = %q{A Ruby gem that streams and parses large Excel(xlsx and xlsm) files fast and efficiently.} - spec.summary = %q{A Ruby gem for parsing large Excel(xlsx and xlsm) files.} - spec.homepage = "https://github.com/pythonicrubyist/creek" - spec.license = "MIT" + spec.authors = ['pythonicrubyist'] + spec.email = ['pythonicrubyist@gmail.com'] + spec.description = 'A Ruby gem that streams and parses large Excel(xlsx and xlsm) files fast and efficiently.' + spec.summary = 'A Ruby gem for parsing large Excel(xlsx and xlsm) files.' + spec.homepage = 'https://github.com/pythonicrubyist/creek' + spec.license = 'MIT' - spec.files = `git ls-files`.split($/) + spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR) spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) - spec.require_paths = ["lib"] + spec.require_paths = ['lib'] spec.required_ruby_version = '>= 2.0.0' - spec.add_development_dependency "bundler" - spec.add_development_dependency "rake" - spec.add_development_dependency 'rspec', '~> 3.6.0' + spec.add_development_dependency 'bundler' spec.add_development_dependency 'pry-byebug' + spec.add_development_dependency 'rake' + spec.add_development_dependency 'rspec', '~> 3.6.0' + spec.add_development_dependency 'rubocop' spec.add_dependency 'nokogiri', '>= 1.10.0' spec.add_dependency 'rubyzip', '>= 1.0.0' diff --git a/lib/creek/book.rb b/lib/creek/book.rb index 8fff8e6..157884e 100644 --- a/lib/creek/book.rb +++ b/lib/creek/book.rb @@ -14,11 +14,11 @@ class Creek::Book DATE_1900 = Date.new(1899, 12, 30).freeze DATE_1904 = Date.new(1904, 1, 1).freeze - def initialize path, options = {} + def initialize(path, options = {}) check_file_extension = options.fetch(:check_file_extension, true) if check_file_extension extension = File.extname(options[:original_filename] || path).downcase - raise 'Not a valid file format.' unless (['.xlsx', '.xlsm'].include? extension) + raise 'Not a valid file format.' unless ['.xlsx', '.xlsm'].include? extension end path = download_file(path) if options[:remote] @files = Zip::File.open(path) @@ -28,28 +28,26 @@ def initialize path, options = {} def sheets @sheets ||= begin - doc = @files.file.open "xl/workbook.xml" + doc = @files.file.open 'xl/workbook.xml' xml = Nokogiri::XML::Document.parse doc namespaces = xml.namespaces - cssPrefix = '' + css_prefix = '' namespaces.each do |namespace| - if namespace[1] == 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' && namespace[0] != 'xmlns' then - cssPrefix = namespace[0].split(':')[1]+'|' - end + css_prefix = namespace[0].split(':')[1] + '|' if namespace[1] == 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' && namespace[0] != 'xmlns' end - rels_doc = @files.file.open "xl/_rels/workbook.xml.rels" - rels = Nokogiri::XML::Document.parse(rels_doc).css("Relationship") - xml.css(cssPrefix+'sheet').map do |sheet| - sheetfile = rels.find { |el| sheet.attr("r:id") == el.attr("Id") }.attr("Target") + rels_doc = @files.file.open 'xl/_rels/workbook.xml.rels' + rels = Nokogiri::XML::Document.parse(rels_doc).css('Relationship') + xml.css(css_prefix + 'sheet').map do |sheet| + sheetfile = rels.find { |el| sheet.attr('r:id') == el.attr('Id') }.attr('Target') sheet = Sheet.new( self, - sheet.attr("name"), - sheet.attr("sheetid"), - sheet.attr("state"), - sheet.attr("visible"), - sheet.attr("r:id"), + sheet.attr('name'), + sheet.attr('sheetid'), + sheet.attr('state'), + sheet.attr('visible'), + sheet.attr('r:id'), sheetfile ) sheet.with_headers = with_headers @@ -68,23 +66,23 @@ def close def base_date @base_date ||= - begin - # Default to 1900 (minus one day due to excel quirk) but use 1904 if - # it's set in the Workbook's workbookPr - # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx - result = DATE_1900 # default + begin + # Default to 1900 (minus one day due to excel quirk) but use 1904 if + # it's set in the Workbook's workbookPr + # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx + result = DATE_1900 # default - doc = @files.file.open "xl/workbook.xml" - xml = Nokogiri::XML::Document.parse doc - xml.css('workbookPr[date1904]').each do |workbookPr| - if workbookPr['date1904'] =~ /true|1/i - result = DATE_1904 - break + doc = @files.file.open 'xl/workbook.xml' + xml = Nokogiri::XML::Document.parse doc + xml.css('workbookPr[date1904]').each do |workbook_pr| + if workbook_pr['date1904'] =~ /true|1/i + result = DATE_1904 + break + end end - end - result - end + result + end end private diff --git a/lib/creek/drawing.rb b/lib/creek/drawing.rb index 7847536..4df24ce 100644 --- a/lib/creek/drawing.rb +++ b/lib/creek/drawing.rb @@ -15,10 +15,10 @@ def initialize(book, drawing_filepath) @drawings_rels = [] @images_pathnames = Hash.new { |hash, key| hash[key] = [] } - if file_exist?(@drawing_filepath) - load_drawings_and_rels - load_images_pathnames_by_cells if has_images? - end + return unless file_exist?(@drawing_filepath) + + load_drawings_and_rels + load_images_pathnames_by_cells if has_images? end ## @@ -36,13 +36,11 @@ def images_at(cell_name) return if pathnames_at_coordinate.empty? pathnames_at_coordinate.map do |image_pathname| - if image_pathname.exist? - image_pathname - else + unless image_pathname.exist? excel_image_path = "xl/media#{image_pathname.to_path.split(tmpdir).last}" IO.copy_stream(@book.files.file.open(excel_image_path), image_pathname.to_path) - image_pathname - end + end + image_pathname end end @@ -52,8 +50,8 @@ def images_at(cell_name) # Transforms cell name to [row, col], e.g. A1 => [0, 0], B3 => [1, 2] # Rows and cols start with 0. def calc_coordinate(cell_name) - col = COLUMNS.index(cell_name.slice /[A-Z]+/) - row = (cell_name.slice /\d+/).to_i - 1 # rows in drawings start with 0 + col = COLUMNS.index(cell_name.slice(/[A-Z]+/)) + row = cell_name.slice(/\d+/).to_i - 1 # rows in drawings start with 0 [row, col] end @@ -68,7 +66,7 @@ def tmpdir # Drawing xml contains relationships ID's and coordinates (row, col). # Drawing relationships xml contains images' locations. def load_drawings_and_rels - @drawings = parse_xml(@drawing_filepath).css('xdr|twoCellAnchor', 'xdr|oneCellAnchor' ) + @drawings = parse_xml(@drawing_filepath).css('xdr|twoCellAnchor', 'xdr|oneCellAnchor') drawing_rels_filepath = expand_to_rels_path(@drawing_filepath) @drawings_rels = parse_xml(drawing_rels_filepath).css('Relationships') end @@ -78,11 +76,11 @@ def load_drawings_and_rels # As multiple images can be located in a single cell, hash values are array of Pathname objects. # One image can be spread across multiple cells (defined with from-row/to-row/from-col/to-col attributes) - same Pathname object is associated to each row-col combination for the range. def load_images_pathnames_by_cells - image_selector = 'xdr:pic/xdr:blipFill/a:blip'.freeze - row_from_selector = 'xdr:from/xdr:row'.freeze - row_to_selector = 'xdr:to/xdr:row'.freeze - col_from_selector = 'xdr:from/xdr:col'.freeze - col_to_selector = 'xdr:to/xdr:col'.freeze + image_selector = 'xdr:pic/xdr:blipFill/a:blip' + row_from_selector = 'xdr:from/xdr:row' + row_to_selector = 'xdr:to/xdr:row' + col_from_selector = 'xdr:from/xdr:col' + col_to_selector = 'xdr:to/xdr:col' @drawings.xpath('//xdr:twoCellAnchor', '//xdr:oneCellAnchor').each do |drawing| # embed = drawing.xpath(image_selector).first.attributes['embed'] @@ -91,13 +89,13 @@ def load_images_pathnames_by_cells next if embed.nil? rid = embed.value - path = Pathname.new("#{tmpdir}/#{extract_drawing_path(rid).slice(/[^\/]*$/)}") + path = Pathname.new("#{tmpdir}/#{extract_drawing_path(rid).slice(%r{[^/]*$})}") row_from = drawing.xpath(row_from_selector).text.to_i col_from = drawing.xpath(col_from_selector).text.to_i if drawing.name == 'oneCellAnchor' - @images_pathnames[[row_from , col_from ]].push(path) + @images_pathnames[[row_from, col_from]].push(path) else row_to = drawing.xpath(row_to_selector).text.to_i col_to = drawing.xpath(col_to_selector).text.to_i diff --git a/lib/creek/shared_strings.rb b/lib/creek/shared_strings.rb index 663aaf1..2b32e6b 100644 --- a/lib/creek/shared_strings.rb +++ b/lib/creek/shared_strings.rb @@ -4,25 +4,23 @@ require 'nokogiri' module Creek - class Creek::SharedStrings - SPREADSHEETML_URI = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' attr_reader :book, :dictionary - def initialize book + def initialize(book) @book = book parse_shared_shared_strings end def parse_shared_shared_strings - path = "xl/sharedStrings.xml" - if @book.files.file.exist?(path) - doc = @book.files.file.open path - xml = Nokogiri::XML::Document.parse doc - parse_shared_string_from_document(xml) - end + path = 'xl/sharedStrings.xml' + return unless @book.files.file.exist?(path) + + doc = @book.files.file.open path + xml = Nokogiri::XML::Document.parse doc + parse_shared_string_from_document(xml) end def parse_shared_string_from_document(xml) @@ -30,8 +28,8 @@ def parse_shared_string_from_document(xml) end def self.parse_shared_string_from_document(xml) - dictionary = Hash.new - namespace = xml.namespaces.detect{|_key, uri| uri == SPREADSHEETML_URI } + dictionary = {} + namespace = xml.namespaces.detect { |_key, uri| uri == SPREADSHEETML_URI } prefix = if namespace && namespace[0].start_with?('xmlns:') namespace[0].delete_prefix('xmlns:') + '|' else @@ -42,15 +40,14 @@ def self.parse_shared_string_from_document(xml) xml.css(node_selector).each_with_index do |si, idx| text_nodes = si.css(text_selector) - if text_nodes.count == 1 # plain text node - dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content) - else # rich text nodes with text fragments - dictionary[idx] = text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('') - end + dictionary[idx] = if text_nodes.count == 1 # plain text node + Creek::Styles::Converter.unescape_string(text_nodes.first.content) + else # rich text nodes with text fragments + text_nodes.map { |n| Creek::Styles::Converter.unescape_string(n.content) }.join('') + end end dictionary end - end end diff --git a/lib/creek/sheet.rb b/lib/creek/sheet.rb index 1df94b8..69d8497 100644 --- a/lib/creek/sheet.rb +++ b/lib/creek/sheet.rb @@ -85,65 +85,67 @@ def simple_rows_with_meta_data ## # Returns a hash per row that includes the cell ids and values. # Empty cells will be also included in the hash with a nil value. - def rows_generator include_meta_data=false, use_simple_rows_format=false - path = if @sheetfile.start_with? "/xl/" or @sheetfile.start_with? "xl/" then @sheetfile else "xl/#{@sheetfile}" end - if @book.files.file.exist?(path) - # SAX parsing, Each element in the stream comes through as two events: - # one to open the element and one to close it. - opener = Nokogiri::XML::Reader::TYPE_ELEMENT - closer = Nokogiri::XML::Reader::TYPE_END_ELEMENT - Enumerator.new do |y| - @headers = nil - row, cells, cell = nil, {}, nil - cell_type = nil - cell_style_idx = nil - @book.files.file.open(path) do |xml| - prefix = '' - name_row = "row" - name_c = "c" - name_v = "v" - name_t = "t" - Nokogiri::XML::Reader.from_io(xml).each do |node| - if prefix.empty? && node.namespaces.any? - namespace = node.namespaces.detect{|_key, uri| uri == SPREADSHEETML_URI } - prefix = if namespace && namespace[0].start_with?('xmlns:') - namespace[0].delete_prefix('xmlns:') + ':' - else - '' - end - name_row = "#{prefix}row" - name_c = "#{prefix}c" - name_v = "#{prefix}v" - name_t = "#{prefix}t" - end - if node.name == name_row && node.node_type == opener - row = node.attributes - row['cells'] = {} - cells = {} - y << (include_meta_data ? row : cells) if node.self_closing? - elsif node.name == name_row && node.node_type == closer - processed_cells = fill_in_empty_cells(cells, row['r'], cell, use_simple_rows_format) - @headers = processed_cells if with_headers && row['r'] == HEADERS_ROW_NUMBER - - if @images_present - processed_cells.each do |cell_name, cell_value| - next unless cell_value.nil? - - processed_cells[cell_name] = images_at(cell_name) - end + def rows_generator(include_meta_data = false, use_simple_rows_format = false) + path = (@sheetfile.start_with? '/xl/' or @sheetfile.start_with? 'xl/') ? @sheetfile : "xl/#{@sheetfile}" + return unless @book.files.file.exist?(path) + + # SAX parsing, Each element in the stream comes through as two events: + # one to open the element and one to close it. + opener = Nokogiri::XML::Reader::TYPE_ELEMENT + closer = Nokogiri::XML::Reader::TYPE_END_ELEMENT + Enumerator.new do |y| + @headers = nil + row = nil + cells = {} + cell = nil + cell_type = nil + cell_style_idx = nil + @book.files.file.open(path) do |xml| + prefix = '' + name_row = 'row' + name_c = 'c' + name_v = 'v' + name_t = 't' + Nokogiri::XML::Reader.from_io(xml).each do |node| + if prefix.empty? && node.namespaces.any? + namespace = node.namespaces.detect { |_key, uri| uri == SPREADSHEETML_URI } + prefix = if namespace && namespace[0].start_with?('xmlns:') + namespace[0].delete_prefix('xmlns:') + ':' + else + '' + end + name_row = "#{prefix}row" + name_c = "#{prefix}c" + name_v = "#{prefix}v" + name_t = "#{prefix}t" + end + if node.name == name_row && node.node_type == opener + row = node.attributes + row['cells'] = {} + cells = {} + y << (include_meta_data ? row : cells) if node.self_closing? + elsif node.name == name_row && node.node_type == closer + processed_cells = fill_in_empty_cells(cells, row['r'], cell, use_simple_rows_format) + @headers = processed_cells if with_headers && row['r'] == HEADERS_ROW_NUMBER + + if @images_present + processed_cells.each do |cell_name, cell_value| + next unless cell_value.nil? + + processed_cells[cell_name] = images_at(cell_name) end + end - row['cells'] = processed_cells - y << (include_meta_data ? row : processed_cells) - elsif node.name == name_c && node.node_type == opener - cell_type = node.attributes['t'] - cell_style_idx = node.attributes['s'] - cell = node.attributes['r'] - elsif (node.name == name_v || node.name == name_t) && node.node_type == opener - unless cell.nil? - node.read - cells[cell] = convert(node.value, cell_type, cell_style_idx) - end + row['cells'] = processed_cells + y << (include_meta_data ? row : processed_cells) + elsif node.name == name_c && node.node_type == opener + cell_type = node.attributes['t'] + cell_style_idx = node.attributes['s'] + cell = node.attributes['r'] + elsif (node.name == name_v || node.name == name_t) && node.node_type == opener + unless cell.nil? + node.read + cells[cell] = convert(node.value, cell_type, cell_style_idx) end end end @@ -199,7 +201,7 @@ def extract_drawing_filepath def cell_id(column, use_simple_rows_format, row_number) return "#{column}#{row_number}" unless use_simple_rows_format - with_headers && headers ? headers[column] : column + (with_headers && headers) ? headers[column] : column end end end diff --git a/lib/creek/styles.rb b/lib/creek/styles.rb index 016f158..d598c17 100644 --- a/lib/creek/styles.rb +++ b/lib/creek/styles.rb @@ -3,27 +3,24 @@ module Creek class Styles attr_accessor :book + def initialize(book) @book = book end def path - "xl/styles.xml" + 'xl/styles.xml' end def styles_xml - @styles_xml ||= begin - if @book.files.file.exist?(path) - doc = @book.files.file.open path - Nokogiri::XML::Document.parse doc - end - end + @styles_xml ||= if @book.files.file.exist?(path) + doc = @book.files.file.open path + Nokogiri::XML::Document.parse doc + end end def style_types - @style_types ||= begin - Creek::Styles::StyleTypes.new(styles_xml).call - end + @style_types ||= Creek::Styles::StyleTypes.new(styles_xml).call end end end diff --git a/lib/creek/styles/constants.rb b/lib/creek/styles/constants.rb index 4cde485..163d9e0 100644 --- a/lib/creek/styles/constants.rb +++ b/lib/creek/styles/constants.rb @@ -3,16 +3,16 @@ class Styles module Constants # Map of non-custom numFmtId to casting symbol NumFmtMap = { - 0 => :string, # General - 1 => :fixnum, # 0 - 2 => :float, # 0.00 - 3 => :fixnum, # #,##0 - 4 => :float, # #,##0.00 - 5 => :unsupported, # $#,##0_);($#,##0) - 6 => :unsupported, # $#,##0_);[Red]($#,##0) - 7 => :unsupported, # $#,##0.00_);($#,##0.00) - 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00) - 9 => :percentage, # 0% + 0 => :string, # General + 1 => :fixnum, # 0 + 2 => :float, # 0.00 + 3 => :fixnum, # #,##0 + 4 => :float, # #,##0.00 + 5 => :unsupported, # $#,##0_);($#,##0) + 6 => :unsupported, # $#,##0_);[Red]($#,##0) + 7 => :unsupported, # $#,##0.00_);($#,##0.00) + 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00) + 9 => :percentage, # 0% 10 => :percentage, # 0.00% 11 => :bignum, # 0.00E+00 12 => :unsupported, # # ?/? @@ -35,7 +35,7 @@ module Constants 47 => :time, # mmss.0 48 => :bignum, # ##0.0E+0 49 => :unsupported # @ - } + }.freeze end end end diff --git a/lib/creek/styles/converter.rb b/lib/creek/styles/converter.rb index 62bd6d6..08d7047 100644 --- a/lib/creek/styles/converter.rb +++ b/lib/creek/styles/converter.rb @@ -8,7 +8,7 @@ class Converter include Creek::Styles::Constants # Excel non-printable character escape sequence - HEX_ESCAPE_REGEXP = /_x[0-9A-Fa-f]{4}_/ + HEX_ESCAPE_REGEXP = /_x[0-9A-Fa-f]{4}_/.freeze ## # The heart of typecasting. The ruby type is determined either explicitly @@ -29,14 +29,12 @@ class Converter # - shared_strings: needed for 's' (shared string) type # - base_date: from what date to begin, see method #base_date - DATE_TYPES = [:date, :time, :date_time].to_set + DATE_TYPES = %i[date time date_time].to_set def self.call(value, type, style, options = {}) return nil if value.nil? || value.empty? # Sometimes the type is dictated by the style alone - if type.nil? || (type == 'n' && DATE_TYPES.include?(style)) - type = style - end + type = style if type.nil? || (type == 'n' && DATE_TYPES.include?(style)) case type @@ -80,21 +78,19 @@ def self.call(value, type, style, options = {}) convert_unknown(value) end end - + def self.convert_unknown(value) - begin - if value.nil? or value.empty? - return value - elsif value.to_i.to_s == value.to_s - return value.to_i - elsif value.to_f.to_s == value.to_s - return value.to_f - else - return value - end - rescue - return value + if value.nil? or value.empty? + value + elsif value.to_i.to_s == value.to_s + value.to_i + elsif value.to_f.to_s == value.to_s + value.to_f + else + value end + rescue StandardError + value end def self.convert_date(value, options) @@ -124,17 +120,15 @@ def self.unescape_string(value) value.gsub(HEX_ESCAPE_REGEXP) { |match| match[2, 4].to_i(16).chr(Encoding::UTF_8) } end - private - - def self.base_date(options) - options.fetch(:base_date, Date.new(1899, 12, 30)) - end + def self.base_date(options) + options.fetch(:base_date, Date.new(1899, 12, 30)) + end - def self.round_datetime(datetime_string) - /(?\d+)-(?\d+)-(?
\d+) (?\d+):(?\d+):(?\d+.\d+)/ =~ datetime_string + def self.round_datetime(datetime_string) + /(?\d+)-(?\d+)-(?
\d+) (?\d+):(?\d+):(?\d+.\d+)/ =~ datetime_string - ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0) - end + ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0) + end end end end diff --git a/lib/creek/styles/style_types.rb b/lib/creek/styles/style_types.rb index df3b4d1..0f34dc2 100644 --- a/lib/creek/styles/style_types.rb +++ b/lib/creek/styles/style_types.rb @@ -7,6 +7,7 @@ class Styles class StyleTypes include Creek::Styles::Constants attr_accessor :styles_xml_doc + def initialize(styles_xml_doc) @styles_xml_doc = styles_xml_doc end @@ -26,17 +27,18 @@ def initialize(styles_xml_doc) # custom). Hence this style types array, rather than a map of numFmtId to # type. def call - @style_types ||= begin - styles_xml_doc.css('styleSheet cellXfs xf').map do |xstyle| - a = num_fmt_id(xstyle) - style_type_by_num_fmt_id( a ) - end + # rubocop:disable Naming/MemoizedInstanceVariableName + @style_types ||= styles_xml_doc.css('styleSheet cellXfs xf').map do |xstyle| + a = num_fmt_id(xstyle) + style_type_by_num_fmt_id(a) end + # rubocop:enable Naming/MemoizedInstanceVariableName end - #returns the numFmtId value if it's available + # returns the numFmtId value if it's available def num_fmt_id(xstyle) return nil unless xstyle.attributes['numFmtId'] + xstyle.attributes['numFmtId'].value end @@ -50,6 +52,7 @@ def num_fmt_id(xstyle) # like a bad idea, but we try to be flexible and just go with it. def style_type_by_num_fmt_id(id) return nil unless id + id = id.to_i NumFmtMap[id] || custom_style_types[id] end @@ -57,13 +60,10 @@ def style_type_by_num_fmt_id(id) # Map of (numFmtId >= 164) (custom styles) to our best guess at the type # ex. {164 => :date_time} def custom_style_types - @custom_style_types ||= begin - styles_xml_doc.css('styleSheet numFmts numFmt').inject({}) do |acc, xstyle| - index = xstyle.attributes['numFmtId'].value.to_i - value = xstyle.attributes['formatCode'].value - acc[index] = determine_custom_style_type(value) - acc - end + @custom_style_types ||= styles_xml_doc.css('styleSheet numFmts numFmt').each_with_object({}) do |xstyle, acc| + index = xstyle.attributes['numFmtId'].value.to_i + value = xstyle.attributes['formatCode'].value + acc[index] = determine_custom_style_type(value) end end @@ -80,7 +80,7 @@ def determine_custom_style_type(string) # Looks for one of ymdhis outside of meta-stuff like [Red] return :date_time if string =~ /(^|\])[^\[]*[ymdhis]/i - return :unsupported + :unsupported end end end diff --git a/lib/creek/utils.rb b/lib/creek/utils.rb index af4ac90..5b87163 100644 --- a/lib/creek/utils.rb +++ b/lib/creek/utils.rb @@ -3,7 +3,7 @@ module Creek module Utils def expand_to_rels_path(filepath) - filepath.sub(/(\/[^\/]+$)/, '/_rels\1.rels') + filepath.sub(%r{(/[^/]+$)}, '/_rels\1.rels') end def file_exist?(path) diff --git a/lib/creek/version.rb b/lib/creek/version.rb index 7cb8f1e..7c70ce4 100644 --- a/lib/creek/version.rb +++ b/lib/creek/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Creek - VERSION = "2.6.3" + VERSION = '2.6.3' end diff --git a/spec/drawing_spec.rb b/spec/drawing_spec.rb index 705941f..2832a5d 100644 --- a/spec/drawing_spec.rb +++ b/spec/drawing_spec.rb @@ -3,7 +3,9 @@ describe 'drawing' do let(:book) { Creek::Book.new('spec/fixtures/sample-with-images.xlsx') } let(:book_no_images) { Creek::Book.new('spec/fixtures/sample.xlsx') } - let(:book_with_one_cell_anchored_images) { Creek::Book.new('spec/fixtures/sample-with-one-cell-anchored-images.xlsx') } + let(:book_with_one_cell_anchored_images) do + Creek::Book.new('spec/fixtures/sample-with-one-cell-anchored-images.xlsx') + end let(:drawingfile) { 'xl/drawings/drawing1.xml' } let(:drawing) { Creek::Drawing.new(book, drawingfile) } let(:drawing_without_images) { Creek::Drawing.new(book_no_images, drawingfile) } diff --git a/spec/shared_string_spec.rb b/spec/shared_string_spec.rb index 11b0130..6fb4b88 100644 --- a/spec/shared_string_spec.rb +++ b/spec/shared_string_spec.rb @@ -1,7 +1,6 @@ require './spec/spec_helper' describe 'shared strings' do - it 'parses rich text strings correctly' do shared_strings_xml_file = File.open('spec/fixtures/sst.xml') doc = Nokogiri::XML(shared_strings_xml_file) @@ -32,6 +31,5 @@ expect(dictionary[5]).to eq("Cell with\rescaped\rcharacters") expect(dictionary[6]).to eq('吉田兼好') end - end end diff --git a/spec/sheet_spec.rb b/spec/sheet_spec.rb index eb7b521..6ecee45 100644 --- a/spec/sheet_spec.rb +++ b/spec/sheet_spec.rb @@ -66,8 +66,12 @@ def load_cell(rows, cell_name) end context 'when one cell anchored images in cell' do - let(:book_with_one_cell_anchored_images) { Creek::Book.new('spec/fixtures/sample-with-one-cell-anchored-images.xlsx') } - let(:sheet_with_one_cell_anchored_images) { Creek::Sheet.new(book_with_one_cell_anchored_images, 'Sheet 1', 1, '', '', '1', sheetfile) } + let(:book_with_one_cell_anchored_images) do + Creek::Book.new('spec/fixtures/sample-with-one-cell-anchored-images.xlsx') + end + let(:sheet_with_one_cell_anchored_images) do + Creek::Sheet.new(book_with_one_cell_anchored_images, 'Sheet 1', 1, '', '', '1', sheetfile) + end let(:rows) { sheet_with_one_cell_anchored_images.with_images.rows.map { |r| r } } it 'returns image for anchored cell' do diff --git a/spec/styles/converter_spec.rb b/spec/styles/converter_spec.rb index a86e75c..ea44e8e 100644 --- a/spec/styles/converter_spec.rb +++ b/spec/styles/converter_spec.rb @@ -1,22 +1,20 @@ require './spec/spec_helper' describe Creek::Styles::Converter do - describe :call do - def convert(value, type, style) Creek::Styles::Converter.call(value, type, style) end describe :date do - it "works" do - expect(convert('41275', 'n', :date)).to eq(Date.new(2013,01,01)) + it 'works' do + expect(convert('41275', 'n', :date)).to eq(Date.new(2013, 0o1, 0o1)) end end describe :date_time do - it "works" do - expect(convert('41275', 'n', :date_time)).to eq(Time.new(2013,01,01)) + it 'works' do + expect(convert('41275', 'n', :date_time)).to eq(Time.new(2013, 0o1, 0o1)) end end end diff --git a/spec/styles/style_types_spec.rb b/spec/styles/style_types_spec.rb index b5c72bc..c737908 100644 --- a/spec/styles/style_types_spec.rb +++ b/spec/styles/style_types_spec.rb @@ -1,15 +1,15 @@ require './spec/spec_helper' describe Creek::Styles::StyleTypes do - describe :call do - it "return array of styletypes with mapping to ruby types" do + it 'return array of styletypes with mapping to ruby types' do xml_file = File.open('spec/fixtures/styles/first.xml') doc = Nokogiri::XML(xml_file) res = Creek::Styles::StyleTypes.new(doc).call expect(res.size).to eq(8) expect(res[3]).to eq(:date_time) - expect(res).to eq([:unsupported, :unsupported, :unsupported, :date_time, :unsupported, :unsupported, :unsupported, :unsupported]) + expect(res).to eq(%i[unsupported unsupported unsupported date_time unsupported unsupported + unsupported unsupported]) end end end diff --git a/spec/test_spec.rb b/spec/test_spec.rb index dabb932..6ff1fff 100644 --- a/spec/test_spec.rb +++ b/spec/test_spec.rb @@ -19,9 +19,9 @@ it 'Check file extension of original_filename if passed.' do path = 'spec/fixtures/temp_string_io_file_path_with_no_extension' - expect { Creek::Book.new path, :original_filename => 'invalid.xls' } + expect { Creek::Book.new path, original_filename: 'invalid.xls' } .to raise_error 'Not a valid file format.' - expect { Creek::Book.new path, :original_filename => 'valid.xlsx' } + expect { Creek::Book.new path, original_filename: 'valid.xlsx' } .not_to raise_error end end @@ -31,9 +31,10 @@ @creek = Creek::Book.new 'spec/fixtures/sample_dates.xlsx' @expected_datetime_rows = [ - {'A3' => 'Date', 'B3' => Date.parse('2018-01-01')}, - {'A4' => 'Datetime 00:00:00', 'B4' => Time.parse('2018-01-01 00:00:00')}, - {'A5' => 'Datetime', 'B5' => Time.parse('2018-01-01 23:59:59')}] + { 'A3' => 'Date', 'B3' => Date.parse('2018-01-01') }, + { 'A4' => 'Datetime 00:00:00', 'B4' => Time.parse('2018-01-01 00:00:00') }, + { 'A5' => 'Datetime', 'B5' => Time.parse('2018-01-01 23:59:59') } + ] end after(:all) do @@ -41,7 +42,7 @@ end it 'parses dates successfully' do - rows = Array.new + rows = [] row_count = 0 @creek.sheets[0].rows.each do |row| rows << row @@ -49,7 +50,7 @@ end (2..5).each do |number| - expect(rows[number]).to eq(@expected_datetime_rows[number-2]) + expect(rows[number]).to eq(@expected_datetime_rows[number - 2]) end end end @@ -57,7 +58,7 @@ describe 'Creek parsing a file with large numbrts.' do before(:all) do @creek = Creek::Book.new 'spec/fixtures/large_numbers.xlsx' - @expected_simple_rows = [{"A"=>"7.83294732E8", "B"=>"783294732", "C"=>783294732.0}] + @expected_simple_rows = [{ 'A' => '7.83294732E8', 'B' => '783294732', 'C' => 783_294_732.0 }] end after(:all) do @@ -65,7 +66,7 @@ end it 'Parse simple rows successfully.' do - rows = Array.new + rows = [] row_count = 0 @creek.sheets[0].simple_rows.each do |row| rows << row @@ -78,24 +79,28 @@ describe 'Creek parsing a sample XLSX file' do before(:all) do @creek = Creek::Book.new 'spec/fixtures/sample.xlsx' - @expected_rows = [{'A1'=>'Content 1', 'B1'=>nil, 'C1'=>'Content 2', 'D1'=>nil, 'E1'=>'Content 3'}, - {'A2'=>nil, 'B2'=>'Content 4', 'C2'=>nil, 'D2'=>'Content 5', 'E2'=>nil, 'F2'=>'Content 6'}, - {}, - {'A4'=>'Content 7', 'B4'=>'Content 8', 'C4'=>'Content 9', 'D4'=>'Content 10', 'E4'=>'Content 11', 'F4'=>'Content 12'}, - {'A5'=>nil, 'B5'=>nil, 'C5'=>nil, 'D5'=>nil, 'E5'=>nil, 'F5'=>nil, 'G5'=>nil, 'H5'=>nil, 'I5'=>nil, 'J5'=>nil, 'K5'=>nil, 'L5'=>nil, 'M5'=>nil, 'N5'=>nil, 'O5'=>nil, 'P5'=>nil, 'Q5'=>nil, 'R5'=>nil, 'S5'=>nil, 'T5'=>nil, 'U5'=>nil, 'V5'=>nil, 'W5'=>nil, 'X5'=>nil, 'Y5'=>nil, 'Z5'=>'Z Content', 'AA5'=>nil, 'AB5'=>nil, 'AC5'=>nil, 'AD5'=>nil, 'AE5'=>nil, 'AF5'=>nil, 'AG5'=>nil, 'AH5'=>nil, 'AI5'=>nil, 'AJ5'=>nil, 'AK5'=>nil, 'AL5'=>nil, 'AM5'=>nil, 'AN5'=>nil, 'AO5'=>nil, 'AP5'=>nil, 'AQ5'=>nil, 'AR5'=>nil, 'AS5'=>nil, 'AT5'=>nil, 'AU5'=>nil, 'AV5'=>nil, 'AW5'=>nil, 'AX5'=>nil, 'AY5'=>nil, 'AZ5'=>'Content 13'}, - {'A6'=>'1', 'B6'=>'2', 'C6'=>'3'}, {'A7'=>'Content 15', 'B7'=>'Content 16', 'C7'=>'Content 18', 'D7'=>'Content 19'}, - {'A8'=>nil, 'B8'=>'Content 20', 'C8'=>nil, 'D8'=>nil, 'E8'=>nil, 'F8'=>'Content 21'}, - {'A10' => 0.15, 'B10' => 0.15}] - - @expected_simple_rows = [{"A"=>"Content 1", "B"=>nil, "C"=>"Content 2", "D"=>nil, "E"=>"Content 3"}, - {"A"=>nil, "B"=>"Content 4", "C"=>nil, "D"=>"Content 5", "E"=>nil, "F"=>"Content 6"}, - {}, - {"A"=>"Content 7", "B"=>"Content 8", "C"=>"Content 9", "D"=>"Content 10", "E"=>"Content 11", "F"=>"Content 12"}, - {"A"=>nil, "B"=>nil, "C"=>nil, "D"=>nil, "E"=>nil, "F"=>nil, "G"=>nil, "H"=>nil, "I"=>nil, "J"=>nil, "K"=>nil, "L"=>nil, "M"=>nil, "N"=>nil, "O"=>nil, "P"=>nil, "Q"=>nil, "R"=>nil, "S"=>nil, "T"=>nil, "U"=>nil, "V"=>nil, "W"=>nil, "X"=>nil, "Y"=>nil, "Z"=>"Z Content", "AA"=>nil, "AB"=>nil, "AC"=>nil, "AD"=>nil, "AE"=>nil, "AF"=>nil, "AG"=>nil, "AH"=>nil, "AI"=>nil, "AJ"=>nil, "AK"=>nil, "AL"=>nil, "AM"=>nil, "AN"=>nil, "AO"=>nil, "AP"=>nil, "AQ"=>nil, "AR"=>nil, "AS"=>nil, "AT"=>nil, "AU"=>nil, "AV"=>nil, "AW"=>nil, "AX"=>nil, "AY"=>nil, "AZ"=>"Content 13"}, - {"A"=>"1", "B"=>"2", "C"=>"3"}, - {"A"=>"Content 15", "B"=>"Content 16", "C"=>"Content 18", "D"=>"Content 19"}, - {"A"=>nil, "B"=>"Content 20", "C"=>nil, "D"=>nil, "E"=>nil, "F"=>"Content 21"}, - {"A"=>0.15, "B"=>0.15}] + @expected_rows = [{ 'A1' => 'Content 1', 'B1' => nil, 'C1' => 'Content 2', 'D1' => nil, 'E1' => 'Content 3' }, + { 'A2' => nil, 'B2' => 'Content 4', 'C2' => nil, 'D2' => 'Content 5', 'E2' => nil, 'F2' => 'Content 6' }, + {}, + { 'A4' => 'Content 7', 'B4' => 'Content 8', 'C4' => 'Content 9', 'D4' => 'Content 10', 'E4' => 'Content 11', 'F4' => 'Content 12' }, + { 'A5' => nil, 'B5' => nil, 'C5' => nil, 'D5' => nil, 'E5' => nil, 'F5' => nil, 'G5' => nil, 'H5' => nil, 'I5' => nil, 'J5' => nil, 'K5' => nil, 'L5' => nil, 'M5' => nil, 'N5' => nil, 'O5' => nil, 'P5' => nil, 'Q5' => nil, 'R5' => nil, 'S5' => nil, 'T5' => nil, 'U5' => nil, 'V5' => nil, 'W5' => nil, 'X5' => nil, 'Y5' => nil, 'Z5' => 'Z Content', 'AA5' => nil, 'AB5' => nil, 'AC5' => nil, 'AD5' => nil, 'AE5' => nil, 'AF5' => nil, 'AG5' => nil, 'AH5' => nil, 'AI5' => nil, 'AJ5' => nil, 'AK5' => nil, 'AL5' => nil, 'AM5' => nil, 'AN5' => nil, 'AO5' => nil, 'AP5' => nil, 'AQ5' => nil, 'AR5' => nil, 'AS5' => nil, 'AT5' => nil, 'AU5' => nil, 'AV5' => nil, 'AW5' => nil, 'AX5' => nil, 'AY5' => nil, 'AZ5' => 'Content 13' }, + { 'A6' => '1', 'B6' => '2', 'C6' => '3' }, { 'A7' => 'Content 15', 'B7' => 'Content 16', 'C7' => 'Content 18', 'D7' => 'Content 19' }, + { 'A8' => nil, 'B8' => 'Content 20', 'C8' => nil, 'D8' => nil, 'E8' => nil, 'F8' => 'Content 21' }, + { 'A10' => 0.15, 'B10' => 0.15 }] + + @expected_simple_rows = [{ 'A' => 'Content 1', 'B' => nil, 'C' => 'Content 2', 'D' => nil, 'E' => 'Content 3' }, + { 'A' => nil, 'B' => 'Content 4', 'C' => nil, 'D' => 'Content 5', 'E' => nil, + 'F' => 'Content 6' }, + {}, + { 'A' => 'Content 7', 'B' => 'Content 8', 'C' => 'Content 9', 'D' => 'Content 10', 'E' => 'Content 11', + 'F' => 'Content 12' }, + { 'A' => nil, 'B' => nil, 'C' => nil, 'D' => nil, 'E' => nil, 'F' => nil, 'G' => nil, 'H' => nil, 'I' => nil, + 'J' => nil, 'K' => nil, 'L' => nil, 'M' => nil, 'N' => nil, 'O' => nil, 'P' => nil, 'Q' => nil, 'R' => nil, 'S' => nil, 'T' => nil, 'U' => nil, 'V' => nil, 'W' => nil, 'X' => nil, 'Y' => nil, 'Z' => 'Z Content', 'AA' => nil, 'AB' => nil, 'AC' => nil, 'AD' => nil, 'AE' => nil, 'AF' => nil, 'AG' => nil, 'AH' => nil, 'AI' => nil, 'AJ' => nil, 'AK' => nil, 'AL' => nil, 'AM' => nil, 'AN' => nil, 'AO' => nil, 'AP' => nil, 'AQ' => nil, 'AR' => nil, 'AS' => nil, 'AT' => nil, 'AU' => nil, 'AV' => nil, 'AW' => nil, 'AX' => nil, 'AY' => nil, 'AZ' => 'Content 13' }, + { 'A' => '1', 'B' => '2', 'C' => '3' }, + { 'A' => 'Content 15', 'B' => 'Content 16', 'C' => 'Content 18', 'D' => 'Content 19' }, + { 'A' => nil, 'B' => 'Content 20', 'C' => nil, 'D' => nil, 'E' => nil, + 'F' => 'Content 21' }, + { 'A' => 0.15, 'B' => 0.15 }] end after(:all) do @@ -129,40 +134,39 @@ end it 'Parse simple rows successfully.' do - rows = Array.new + rows = [] row_count = 0 @creek.sheets[0].simple_rows.each do |row| rows << row row_count += 1 end - (0..8).each do |number| + 9.times do |number| expect(rows[number]).to eq(@expected_simple_rows[number]) end expect(row_count).to eq(9) end - it 'Parse rows with empty cells successfully.' do - rows = Array.new + rows = [] row_count = 0 @creek.sheets[0].rows.each do |row| rows << row row_count += 1 end - (0..8).each do |number| + 9.times do |number| expect(rows[number]).to eq(@expected_rows[number]) end expect(row_count).to eq(9) end it 'Parse rows with empty cells and meta data successfully.' do - rows = Array.new + rows = [] row_count = 0 @creek.sheets[0].rows_with_meta_data.each do |row| rows << row row_count += 1 end - expect(rows.map{|r| r['cells']}).to eq(@expected_rows) + expect(rows.map { |r| r['cells'] }).to eq(@expected_rows) end end