Skip to content

Commit

Permalink
Excelx Memory and Performance Optimization (#434)
Browse files Browse the repository at this point in the history
* Excelx Memory and Performance Optimization

* test case fixup

* Use magic string literal comment over String#freeze

Use Time#to_datetime over Datetime.civil

* Cosmetic fixup

* Added Test Case for Datetime timezone offset change overlap
  • Loading branch information
chopraanmol1 authored and tgturner committed Sep 15, 2018
1 parent 466e5b9 commit 4f9b166
Show file tree
Hide file tree
Showing 25 changed files with 1,124 additions and 143 deletions.
4 changes: 3 additions & 1 deletion lib/roo.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require 'roo/version'
require 'roo/constants'
require 'roo/errors'
Expand All @@ -10,7 +12,7 @@ module Roo
autoload :Excelx, 'roo/excelx'
autoload :CSV, 'roo/csv'

TEMP_PREFIX = 'roo_'.freeze
TEMP_PREFIX = 'roo_'

CLASS_FOR_EXTENSION = {
ods: Roo::OpenOffice,
Expand Down
4 changes: 2 additions & 2 deletions lib/roo/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class Roo::Base
include Roo::Formatters::XML
include Roo::Formatters::YAML

MAX_ROW_COL = 999_999.freeze
MIN_ROW_COL = 0.freeze
MAX_ROW_COL = 999_999
MIN_ROW_COL = 0

attr_reader :headers

Expand Down
8 changes: 5 additions & 3 deletions lib/roo/constants.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

module Roo
ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel.".freeze
ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML.".freeze
ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google.".freeze
ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel."
ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML."
ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google."
end
17 changes: 11 additions & 6 deletions lib/roo/excelx/cell.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,24 @@ def type
end

def self.create_cell(type, *values)
type_class = cell_class(type)
type_class && type_class.new(*values)
end

def self.cell_class(type)
case type
when :string
Cell::String.new(*values)
Cell::String
when :boolean
Cell::Boolean.new(*values)
Cell::Boolean
when :number
Cell::Number.new(*values)
Cell::Number
when :date
Cell::Date.new(*values)
Cell::Date
when :datetime
Cell::DateTime.new(*values)
Cell::DateTime
when :time
Cell::Time.new(*values)
Cell::Time
end
end

Expand Down
4 changes: 3 additions & 1 deletion lib/roo/excelx/cell/boolean.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

module Roo
class Excelx
class Cell
Expand All @@ -11,7 +13,7 @@ def initialize(value, formula, style, link, coordinate)
end

def formatted_value
value ? 'TRUE'.freeze : 'FALSE'.freeze
value ? 'TRUE' : 'FALSE'
end

private
Expand Down
7 changes: 3 additions & 4 deletions lib/roo/excelx/cell/date.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ def initialize(value, formula, excelx_type, style, link, base_date, coordinate)

private

def create_date(base_date, value)
date = base_date + value.to_i
yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
def create_datetime(_,_); end

::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
def create_date(base_date, value)
base_date + value.to_i
end
end
end
Expand Down
25 changes: 10 additions & 15 deletions lib/roo/excelx/cell/datetime.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
# frozen_string_literal: true

require 'date'

module Roo
class Excelx
class Cell
class DateTime < Cell::Base
SECONDS_IN_DAY = 60 * 60 * 24

attr_reader :value, :formula, :format, :cell_value, :link, :coordinate

def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
def initialize(value, formula, excelx_type, style, link, base_timestamp, coordinate)
super(value, formula, excelx_type, style, link, coordinate)
@type = :datetime
@format = excelx_type.last
@value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value)
@value = link? ? Roo::Link.new(link, value) : create_datetime(base_timestamp, value)
end

# Public: Returns formatted value for a datetime. Format's can be an
Expand Down Expand Up @@ -78,7 +82,7 @@ def parse_date_or_time_format(part)

TIME_FORMATS = {
'hh' => '%H', # Hour (24): 01
'h' => '%-k'.freeze, # Hour (24): 1
'h' => '%-k', # Hour (24): 1
# 'hh'.freeze => '%I'.freeze, # Hour (12): 08
# 'h'.freeze => '%-l'.freeze, # Hour (12): 8
'mm' => '%M', # Minute: 01
Expand All @@ -92,18 +96,9 @@ def parse_date_or_time_format(part)
'0' => '%1N' # Fractional Seconds: tenths.
}

def create_datetime(base_date, value)
date = base_date + value.to_f.round(6)
datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
t = round_datetime(datetime_string)

::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
end

def round_datetime(datetime_string)
/(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string

::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0)
def create_datetime(base_timestamp, value)
timestamp = (base_timestamp + (value.to_f.round(6) * SECONDS_IN_DAY)).round(0)
::Time.at(timestamp).utc.to_datetime
end
end
end
Expand Down
2 changes: 2 additions & 0 deletions lib/roo/excelx/cell/number.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

module Roo
class Excelx
class Cell
Expand Down
4 changes: 4 additions & 0 deletions lib/roo/excelx/coordinate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ def initialize(row, column)
@row = row
@column = column
end

def to_a
@array ||= [row, column].freeze
end
end
end
end
12 changes: 12 additions & 0 deletions lib/roo/excelx/extractor.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
# frozen_string_literal: true

module Roo
class Excelx
class Extractor

COMMON_STRINGS = {
t: "t",
r: "r",
s: "s",
ref: "ref",
html_tag_open: "<html>",
html_tag_closed: "</html>"
}

def initialize(path, options = {})
@path = path
@options = options
Expand Down
69 changes: 38 additions & 31 deletions lib/roo/excelx/format.rb
Original file line number Diff line number Diff line change
@@ -1,49 +1,57 @@
# frozen_string_literal: true

module Roo
class Excelx
module Format
extend self
EXCEPTIONAL_FORMATS = {
'h:mm am/pm' => :date,
'h:mm:ss am/pm' => :date
}

STANDARD_FORMATS = {
0 => 'General'.freeze,
1 => '0'.freeze,
2 => '0.00'.freeze,
3 => '#,##0'.freeze,
4 => '#,##0.00'.freeze,
9 => '0%'.freeze,
10 => '0.00%'.freeze,
11 => '0.00E+00'.freeze,
12 => '# ?/?'.freeze,
13 => '# ??/??'.freeze,
14 => 'mm-dd-yy'.freeze,
15 => 'd-mmm-yy'.freeze,
16 => 'd-mmm'.freeze,
17 => 'mmm-yy'.freeze,
18 => 'h:mm AM/PM'.freeze,
19 => 'h:mm:ss AM/PM'.freeze,
20 => 'h:mm'.freeze,
21 => 'h:mm:ss'.freeze,
22 => 'm/d/yy h:mm'.freeze,
37 => '#,##0 ;(#,##0)'.freeze,
38 => '#,##0 ;[Red](#,##0)'.freeze,
39 => '#,##0.00;(#,##0.00)'.freeze,
40 => '#,##0.00;[Red](#,##0.00)'.freeze,
45 => 'mm:ss'.freeze,
46 => '[h]:mm:ss'.freeze,
47 => 'mmss.0'.freeze,
48 => '##0.0E+0'.freeze,
49 => '@'.freeze
0 => 'General',
1 => '0',
2 => '0.00',
3 => '#,##0',
4 => '#,##0.00',
9 => '0%',
10 => '0.00%',
11 => '0.00E+00',
12 => '# ?/?',
13 => '# ??/??',
14 => 'mm-dd-yy',
15 => 'd-mmm-yy',
16 => 'd-mmm',
17 => 'mmm-yy',
18 => 'h:mm AM/PM',
19 => 'h:mm:ss AM/PM',
20 => 'h:mm',
21 => 'h:mm:ss',
22 => 'm/d/yy h:mm',
37 => '#,##0 ;(#,##0)',
38 => '#,##0 ;[Red](#,##0)',
39 => '#,##0.00;(#,##0.00)',
40 => '#,##0.00;[Red](#,##0.00)',
45 => 'mm:ss',
46 => '[h]:mm:ss',
47 => 'mmss.0',
48 => '##0.0E+0',
49 => '@'
}

def to_type(format)
@to_type ||= {}
@to_type[format] ||= _to_type(format)
end

def _to_type(format)
format = format.to_s.downcase
if (type = EXCEPTIONAL_FORMATS[format])
type
elsif format.include?('#')
:float
elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
elsif format.include?('y') || !format.match(/d+(?![\]])/).nil?
if format.include?('h') || format.include?('s')
:datetime
else
Expand All @@ -58,7 +66,6 @@ def to_type(format)
end
end

module_function :to_type
end
end
end
end
4 changes: 4 additions & 0 deletions lib/roo/excelx/shared.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def workbook
def base_date
workbook.base_date
end

def base_timestamp
workbook.base_timestamp
end
end
end
end
18 changes: 6 additions & 12 deletions lib/roo/excelx/shared_strings.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
# frozen_string_literal: true

require 'roo/excelx/extractor'

module Roo
class Excelx
class SharedStrings < Excelx::Extractor

COMMON_STRINGS = {
t: "t",
r: "r",
html_tag_open: "<html>",
html_tag_closed: "</html>"
}

def [](index)
to_a[index]
end
Expand Down Expand Up @@ -46,7 +40,7 @@ def extract_shared_strings
document = fix_invalid_shared_strings(doc)
# read the shared strings xml document
document.xpath('/sst/si').map do |si|
shared_string = ''
shared_string = String.new
si.children.each do |elem|
case elem.name
when 'r'
Expand All @@ -66,7 +60,7 @@ def extract_html
fix_invalid_shared_strings(doc)
# read the shared strings xml document
doc.xpath('/sst/si').map do |si|
html_string = '<html>'
html_string = '<html>'.dup
si.children.each do |elem|
case elem.name
when 'r'
Expand Down Expand Up @@ -96,7 +90,7 @@ def extract_html
#
# Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
def extract_html_r(r_elem)
str = ''
str = String.new
xml_elems = {
sub: false,
sup: false,
Expand Down Expand Up @@ -141,7 +135,7 @@ def extract_html_r(r_elem)

# This will return an html string
def create_html(text, formatting)
tmp_str = ''
tmp_str = String.new
formatting.each do |elem, val|
tmp_str << "<#{elem}>" if val
end
Expand Down
10 changes: 6 additions & 4 deletions lib/roo/excelx/sheet.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,16 @@ def each_row(options = {}, &block)

def row(row_number)
first_column.upto(last_column).map do |col|
cells[[row_number, col]]
end.map { |cell| cell && cell.value }
cell = cells[[row_number, col]]
cell && cell.value
end
end

def column(col_number)
first_row.upto(last_row).map do |row|
cells[[row, col_number]]
end.map { |cell| cell && cell.value }
cell = cells[[row, col_number]]
cell && cell.value
end
end

# returns the number of the first non-empty row
Expand Down
Loading

0 comments on commit 4f9b166

Please sign in to comment.