Skip to content

Commit

Permalink
Decrease string allocations in apply_inflections
Browse files Browse the repository at this point in the history
In `apply_inflections` a string is down cased and some whitespace stripped in the front (which allocate strings). This would normally be fine, however `uncountables` is a fairly small array (10 elements out of the box) and this method gets called a TON. Instead we can keep an array of valid regexes for each uncountable so we don't have to allocate new strings.

This change buys us 325,106 bytes of memory and 3,251 fewer objects per request.
  • Loading branch information
schneems committed Jul 30, 2015
1 parent f80aa59 commit 1bf50ba
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
37 changes: 34 additions & 3 deletions activesupport/lib/active_support/inflector/inflections.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,45 @@ module Inflector
class Inflections
@__instance__ = ThreadSafe::Cache.new

class Uncountables < Array
def initialize
@regex_array = []
super
end

def delete(entry)
super entry
@regex_array.delete(to_regex(entry))
end

def <<(*word)
add(word)
end

def add(words)
self.concat(words.flatten.map(&:downcase))
@regex_array += self.map {|word| to_regex(word) }
self
end

def uncountable?(str)
@regex_array.detect {|regex| regex.match(str) }

This comment has been minimized.

Copy link
@AaronLasseigne

AaronLasseigne Aug 6, 2015

( I hope you don't mind me commenting here.)

You can avoid creating the MatchData object by using [] or =~ on str and passing it regex.

@regex_array.detect { |regex| str[regex] } or @regex_array.detect { |regex| str =~ regex }

Also any? seems more appropriate than detect. Is there a reason to use detect?

This comment has been minimized.

Copy link
@schneems

schneems Aug 6, 2015

Author Owner

( I hope you don't mind me commenting here.)

Not at all, thanks for chiming in! You're right about the match:

require 'benchmark/ips'

string = "foo".freeze

Benchmark.ips do |x|
  x.report("match") {
    /boo/.match(string)
  }
  x.report("=~") {
    /boo/ =~ string
  }
  x.report("[]") {
    string[/boo/]
  }
end

results in

Calculating -------------------------------------
               match    66.837k i/100ms
                  =~    74.183k i/100ms
                  []    71.748k i/100ms
-------------------------------------------------
               match      1.783M (±12.8%) i/s -      8.822M
                  =~      2.077M (±13.0%) i/s -     10.237M
                  []      1.910M (±13.1%) i/s -      9.399M

Looks like switching to any? would also give a speed bump:

require 'benchmark/ips'

string = "foo".freeze
array = [/zoo/, /boo/, /moo/]

Benchmark.ips do |x|
  x.report("detect") {
    array.detect {|regex| string =~ regex }
  }
  x.report("any?") {
    array.any? {|regex| string =~ regex }
  }
end
Calculating -------------------------------------
              detect    31.241k i/100ms
                any?    37.831k i/100ms
-------------------------------------------------
              detect    489.087k (±10.2%) i/s -      2.437M
                any?    616.260k (± 9.5%) i/s -      3.064M

Could you put that in the change as well? Didn't realize there was a speed difference. Here's the source:

This is used for any?

static VALUE
rb_ary_any_p(VALUE ary)
{
    long i, len = RARRAY_LEN(ary);

    if (!len) return Qfalse;
    if (!rb_block_given_p()) {
    const VALUE *ptr = RARRAY_CONST_PTR(ary);
    for (i = 0; i < len; ++i) if (RTEST(ptr[i])) return Qtrue;
    }
    else {
    for (i = 0; i < RARRAY_LEN(ary); ++i) {
        if (RTEST(rb_yield(RARRAY_AREF(ary, i)))) return Qtrue;
    }
    }
    return Qfalse;
}

This is used for detect:

static VALUE
enum_find(int argc, VALUE *argv, VALUE obj)
{
    struct MEMO *memo;
    VALUE if_none;

    rb_scan_args(argc, argv, "01", &if_none);
    RETURN_ENUMERATOR(obj, argc, argv);
    memo = MEMO_NEW(Qundef, 0, 0);
    rb_block_call(obj, id_each, 0, 0, find_i, (VALUE)memo);
    if (memo->u3.cnt) {
    return memo->v1;
    }
    if (!NIL_P(if_none)) {
    return rb_funcallv(if_none, id_call, 0, 0);
    }
    return Qnil;
}

I guess the extra speed comes from not having to allocate an enumerator and the extra layer of indirection? Could you give me a PR and @-mention me to change this to a str =~ regex and change the method to any? since you found the change?

This comment has been minimized.

Copy link
@AaronLasseigne

AaronLasseigne Aug 6, 2015

Sure thing.

This comment has been minimized.

Copy link
@schneems

schneems Aug 6, 2015

Author Owner

I added the String#=~ to fast-ruby btw fastruby/fast-ruby#59. It would be good to add the any? versus detect benchmark too if you get a bit more time after your rails patch.

Thanks again for the help 👍 ❤️

end

private
def to_regex(string)
/\b#{::Regexp.escape(string)}\Z/i
end
end

def self.instance(locale = :en)
@__instance__[locale] ||= new
end

attr_reader :plurals, :singulars, :uncountables, :humans, :acronyms, :acronym_regex

def initialize
@plurals, @singulars, @uncountables, @humans, @acronyms, @acronym_regex = [], [], [], [], {}, /(?=a)b/
@plurals, @singulars, @uncountables, @humans, @acronyms, @acronym_regex = [], [], Uncountables.new, [], {}, /(?=a)b/
end

# Private, for the test suite.
Expand Down Expand Up @@ -160,7 +191,7 @@ def irregular(singular, plural)
# uncountable 'money', 'information'
# uncountable %w( money information rice )
def uncountable(*words)
@uncountables += words.flatten.map(&:downcase)
@uncountables.add(words)
end

# Specifies a humanized form of a string by a regular expression rule or
Expand All @@ -185,7 +216,7 @@ def human(rule, replacement)
def clear(scope = :all)
case scope
when :all
@plurals, @singulars, @uncountables, @humans = [], [], [], []
@plurals, @singulars, @uncountables, @humans = [], [], Uncountables.new, []
else
instance_variable_set "@#{scope}", []
end
Expand Down
4 changes: 2 additions & 2 deletions activesupport/lib/active_support/inflector/methods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def ordinalize(number)
# const_regexp("Foo::Bar::Baz") # => "Foo(::Bar(::Baz)?)?"
# const_regexp("::") # => "::"
def const_regexp(camel_cased_word) #:nodoc:
parts = camel_cased_word.split("::")
parts = camel_cased_word.split("::".freeze)

return Regexp.escape(camel_cased_word) if parts.blank?

Expand All @@ -372,7 +372,7 @@ def const_regexp(camel_cased_word) #:nodoc:
def apply_inflections(word, rules)
result = word.to_s.dup

if word.empty? || inflections.uncountables.include?(result.downcase[/\b\w+\Z/])
if word.empty? || inflections.uncountables.uncountable?(result)
result
else
rules.each { |(rule, replacement)| break if result.sub!(rule, replacement) }
Expand Down

0 comments on commit 1bf50ba

Please sign in to comment.