Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support numeric times in time parser formatter #1254

Merged
merged 6 commits into from
Oct 5, 2016
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 3 additions & 27 deletions lib/fluent/plugin/formatter_out_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,36 +32,12 @@ class OutFileFormatter < Formatter
else "\t"
end
end
config_param :time_type, :enum, list: [:float, :unixtime, :string], default: :string
config_set_default :time_type, :string
config_set_default :time_format, nil # time_format nil => iso8601

def configure(conf)
# TODO: make a utility method in TimeFormatter to handle these conversion
# copies of this code: plugin_helper/compat_parameters, compat/formatter_utils and here
if conf.has_key?('time_as_epoch') && Fluent::Config.bool_value(conf['time_as_epoch'])
conf['time_type'] = 'unixtime'
end
if conf.has_key?('localtime') || conf.has_key?('utc')
if conf.has_key?('localtime') && conf.has_key?('utc')
raise Fluent::ConfigError, "both of utc and localtime are specified, use only one of them"
elsif conf.has_key?('localtime')
conf['localtime'] = Fluent::Config.bool_value(conf['localtime'])
elsif conf.has_key?('utc')
conf['localtime'] = !(Fluent::Config.bool_value(conf['utc']))
# Specifying "localtime false" means using UTC in TimeFormatter
# And specifying "utc" is different from specifying "timezone +0000"(it's not always UTC).
# There are difference between "Z" and "+0000" in timezone formatting.
# TODO: add kwargs to TimeFormatter to specify "using localtime", "using UTC" or "using specified timezone" in more explicit way
end
end

super

@timef = case @time_type
when :float then ->(time){ time.to_r.to_f }
when :unixtime then ->(time){ time.to_i }
else
time_formatter_create
end
@timef = time_formatter_create
end

def format(tag, time, record)
Expand Down
22 changes: 21 additions & 1 deletion lib/fluent/plugin_helper/compat_parameters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ module CompatParameters
BUFFER_TIME_SLICED_PARAMS = {
"time_slice_format" => nil,
"time_slice_wait" => "timekey_wait",
"timezone" => "timekey_zone",
}

PARSER_PARAMS = {
Expand All @@ -56,6 +57,7 @@ module CompatParameters
"format_firstline" => "format_firstline", # MultilineParser
"message_key" => "message_key", # NoneParser
"with_priority" => "with_priority", # SyslogParser
# There has been no parsers which can handle timezone in v0.12
}

INJECT_PARAMS = {
Expand All @@ -77,7 +79,10 @@ module CompatParameters
"json_parser" => "json_parser", # JSONFormatter
"label_delimiter" => "label_delimiter", # LabeledTSVFormatter
"output_time" => "output_time", # OutFileFormatter
"output_tag" => "output_tag", # OutFileFormatter
"output_tag" => "output_tag", # OutFileFormatter
"localtime" => "localtime", # OutFileFormatter
"utc" => "utc", # OutFileFormatter
"timezone" => "timezone", # OutFileFormatter
"message_key" => "message_key", # SingleValueFormatter
"add_newline" => "add_newline", # SingleValueFormatter
"output_type" => "output_type", # StdoutFormatter
Expand Down Expand Up @@ -129,6 +134,15 @@ def compat_parameters_buffer(conf, default_chunk_key: '')
else
raise Fluent::ConfigError, "time_slice_format only with %Y or %m is too long"
end
if conf.has_key?('localtime') || conf.has_key?('utc')
if conf.has_key?('localtime') && conf.has_key?('utc')
raise Fluent::ConfigError, "both of utc and localtime are specified, use only one of them"
elsif conf.has_key?('localtime')
attr['timekey_use_utc'] = !(Fluent::Config.bool_value(conf['localtime']))
elsif conf.has_key?('utc')
attr['timekey_use_utc'] = Fluent::Config.bool_value(conf['utc'])
end
end
else
if chunk_key == 'time'
attr['timekey'] = 86400 # TimeSliceOutput.time_slice_format default value is '%Y%m%d'
Expand Down Expand Up @@ -199,6 +213,12 @@ def compat_parameters_formatter(conf)
# TODO: warn obsolete parameters if these are deprecated
attr = compat_parameters_copy_to_subsection_attributes(conf, FORMATTER_PARAMS)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Off topic. Ruby has Module#attr method so avoiding same name is better.


# TODO: make a utility method in TimeFormatter to handle these conversion
Copy link
Member

@repeatedly repeatedly Oct 4, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is still needed? Hard to implement in TimeFormatter?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

time_as_epoch shouldn't be implemented in TimeFormatter, because it's now have time_type for unixtime, float (and string). It's just for compatibility for existing plugins.

# copies of this code: plugin_helper/compat_parameters, compat/formatter_utils and here
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"and here" can be removed.

if conf.has_key?('time_as_epoch') && Fluent::Config.bool_value(conf['time_as_epoch'])
attr['time_type'] = 'unixtime'
end

e = Fluent::Config::Element.new('format', '', attr, [])
conf.elements << e

Expand Down
2 changes: 2 additions & 0 deletions lib/fluent/plugin_helper/extract.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ module ExtractParams
config_section :extract, required: false, multi: false, param_name: :extract_config do
config_param :tag_key, :string, default: nil
config_param :time_key, :string, default: nil

# To avoid defining :time_type twice
config_param :time_type, :enum, list: [:float, :unixtime, :string], default: :float

Fluent::TimeMixin::TIME_PARAMETERS.each do |name, type, opts|
Expand Down
2 changes: 2 additions & 0 deletions lib/fluent/plugin_helper/inject.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ module InjectParams
config_param :hostname, :string, default: nil
config_param :tag_key, :string, default: nil
config_param :time_key, :string, default: nil

# To avoid defining :time_type twice
config_param :time_type, :enum, list: [:float, :unixtime, :string], default: :float

Fluent::TimeMixin::TIME_PARAMETERS.each do |name, type, opts|
Expand Down
133 changes: 130 additions & 3 deletions lib/fluent/time.rb
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,45 @@ module TimeMixin
[:utc, :bool, {default: false}], # to turn :localtime false
[:timezone, :string, {default: nil}],
]
TIME_FULL_PARAMETERS = [
# To avoid to define :time_type twice (in plugin_helper/inject)
[:time_type, :enum, {default: :string, list: [:string, :unixtime, :float]}],
] + TIME_PARAMETERS

module TimeParameters
include Fluent::Configurable
TIME_PARAMETERS.each do |name, type, opts|
TIME_FULL_PARAMETERS.each do |name, type, opts|
config_param name, type, opts
end

def configure(conf)
if conf.has_key?('localtime') || conf.has_key?('utc')
if conf.has_key?('localtime') && conf.has_key?('utc')
raise Fluent::ConfigError, "both of utc and localtime are specified, use only one of them"
elsif conf.has_key?('localtime')
conf['localtime'] = Fluent::Config.bool_value(conf['localtime'])
elsif conf.has_key?('utc')
conf['localtime'] = !(Fluent::Config.bool_value(conf['utc']))
# Specifying "localtime false" means using UTC in TimeFormatter
# And specifying "utc" is different from specifying "timezone +0000"(it's not always UTC).
# There are difference between "Z" and "+0000" in timezone formatting.
# TODO: add kwargs to TimeFormatter to specify "using localtime", "using UTC" or "using specified timezone" in more explicit way
end
end

super

Fluent::Timezone.validate!(@timezone) if @timezone
end
end

module Parser
def self.included(mod)
mod.include TimeParameters
end

def time_parser_create(format: @time_format, timezone: @timezone, force_localtime: false)
def time_parser_create(type: @time_type, format: @time_format, timezone: @timezone, force_localtime: false)
return NumericTimeParser.new(type) if type != :string
return TimeParser.new(format, true, nil) if force_localtime

localtime = @localtime && (timezone.nil? && !@utc)
Expand All @@ -138,7 +164,8 @@ def self.included(mod)
mod.include TimeParameters
end

def time_formatter_create(format: @time_format, timezone: @timezone, force_localtime: false)
def time_formatter_create(type: @time_type, format: @time_format, timezone: @timezone, force_localtime: false)
return NumericTimeFormatter.new(type) if type != :string
return TimeFormatter.new(format, true, nil) if force_localtime

localtime = @localtime && (timezone.nil? && !@utc)
Expand Down Expand Up @@ -207,6 +234,77 @@ def parse(value)
alias :call :parse
end

class NumericTimeParser < TimeParser # to include TimeParseError
def initialize(type, localtime = nil, timezone = nil)
@cache1_key = @cache1_time = @cache2_key = @cache2_time = nil

if type == :unixtime
define_singleton_method(:parse, method(:parse_unixtime))
define_singleton_method(:call, method(:parse_unixtime))
else # :float
define_singleton_method(:parse, method(:parse_float))
define_singleton_method(:call, method(:parse_float))
end
end

def parse_unixtime(value)
unless value.is_a?(String)
raise TimeParseError, "value must be a string: #{value}"
end

if @cache1_key == value
return @cache1_time
elsif @cache2_key == value
return @cache2_time
end

begin
time = Fluent::EventTime.new(value.to_i)
rescue => e
raise TimeParseError, "invalid time format: value = #{value}, error_class = #{e.class.name}, error = #{e.message}"
end
@cache1_key = @cache2_key
@cache1_time = @cache2_time
@cache2_key = value
@cache2_time = time
time
end

# rough benchmark result to compare handmade parser vs Fluent::EventTime.from_time(Time.at(value.to_r))
# full: with 9-digits of nsec after dot
# msec: with 3-digits of msec after dot
# 10_000_000 times loop on MacBookAir
## parse_by_myself(full): 12.162475 sec
## parse_by_myself(msec): 15.050435 sec
## parse_by_to_r (full): 28.722362 sec
## parse_by_to_r (msec): 28.232856 sec
def parse_float(value)
unless value.is_a?(String)
raise TimeParseError, "value must be a string: #{value}"
end

if @cache1_key == value
return @cache1_time
elsif @cache2_key == value
return @cache2_time
end

begin
sec_s, nsec_s, _ = value.split('.', 3) # throw away second-dot and later
nsec_s = nsec_s[0..9]
nsec_s += '0' * (9 - nsec_s.size) if nsec_s.size < 9
time = Fluent::EventTime.new(sec_s.to_i, nsec_s.to_i)
rescue => e
raise TimeParseError, "invalid time format: value = #{value}, error_class = #{e.class.name}, error = #{e.message}"
end
@cache1_key = @cache2_key
@cache1_time = @cache2_time
@cache2_key = value
@cache2_time = time
time
end
end

class TimeFormatter
def initialize(format = nil, localtime = true, timezone = nil)
@tc1 = 0
Expand Down Expand Up @@ -276,4 +374,33 @@ def format_nocache(time)
@format_nocache.call(time)
end
end

class NumericTimeFormatter < TimeFormatter
def initialize(type, localtime = nil, timezone = nil)
@cache1_key = @cache1_time = @cache2_key = @cache2_time = nil

if type == :unixtime
define_singleton_method(:format, method(:format_unixtime))
define_singleton_method(:call, method(:format_unixtime))
else # :float
define_singleton_method(:format, method(:format_float))
define_singleton_method(:call, method(:format_float))
end
end

def format_unixtime(time)
time.to_i.to_s
end

def format_float(time)
if time.is_a?(Fluent::EventTime) || time.is_a?(Time)
# 10.015 secs for 10_000_000 times call on MacBookAir
nsec_s = time.nsec.to_s
nsec_s = '0' * (9 - nsec_s.size) if nsec_s.size < 9
"#{time.sec}.#{nsec_s}"
else # integer (or float?)
time.to_f.to_s
end
end
end
end
16 changes: 16 additions & 0 deletions test/test_time_formatter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -263,4 +263,20 @@ class DummyForTimeFormatter
assert_equal "09/02/2016 11-42-31 012345678", str
end
end

test '#time_formatter_create returns NumericTimeFormatter to format time as unixtime when time_type unixtime specified' do
i = DummyForTimeFormatter.new
i.configure(config_element('format', '', {'time_type' => 'unixtime'}))
fmt = i.time_formatter_create
time = event_time("2016-10-03 20:08:30.123456789 +0100", format: '%Y-%m-%d %H:%M:%S.%N %z')
assert_equal "#{time.sec}", fmt.format(time)
end

test '#time_formatter_create returns NumericTimeFormatter to format time as float when time_type float specified' do
i = DummyForTimeFormatter.new
i.configure(config_element('format', '', {'time_type' => 'float'}))
fmt = i.time_formatter_create
time = event_time("2016-10-03 20:08:30.123456789 +0100", format: '%Y-%m-%d %H:%M:%S.%N %z')
assert_equal "#{time.sec}.#{time.nsec}", fmt.format(time)
end
end
16 changes: 16 additions & 0 deletions test/test_time_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -191,5 +191,21 @@ class DummyForTimeParser
end
assert_equal_event_time(event_time("2016-09-05 17:59:38.987654321 +09:00", format: '%Y-%m-%d %H:%M:%S.%N %z'), time)
end

test '#time_parser_create returns NumericTimeParser to parse time as unixtime when time_type unixtime specified' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '', {'time_type' => 'unixtime'}))
parser = i.time_parser_create
time = event_time("2016-10-03 20:08:30.123456789 +0100", format: '%Y-%m-%d %H:%M:%S.%N %z')
assert_equal_event_time(Fluent::EventTime.new(time.to_i), parser.parse("#{time.sec}"))
end

test '#time_parser_create returns NumericTimeParser to parse time as float when time_type float specified' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '', {'time_type' => 'float'}))
parser = i.time_parser_create
time = event_time("2016-10-03 20:08:30.123456789 +0100", format: '%Y-%m-%d %H:%M:%S.%N %z')
assert_equal_event_time(time, parser.parse("#{time.sec}.#{time.nsec}"))
end
end
end