Skip to content

Commit

Permalink
Merge branch 'release/0.2.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
paulgoetze committed Jan 19, 2016
2 parents ae04ca6 + 287ee0e commit 88b3f58
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 5 deletions.
47 changes: 47 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ Or install it yourself as:

## Usage

* [Instances](#instances)
* [Filters](#filters)
* [Attribute selection](#attribute-selection)
* [Classifiers](#classifiers)
* [Clusterers](#clusterers)
* [Serializing objects](#serializing-objects)

Start using Weka's Machine Learning and Data Mining algorithms by requiring the gem:

```ruby
Expand Down Expand Up @@ -233,6 +240,9 @@ discretize = Discretize.new

# apply a filter chain on instances
filtered_data = instances.apply_filter(normalize).apply_filter(discretize)

# or even shorter
filtered_data = instances.apply_filters(normalize, discretize)
```

#### Setting Filter options
Expand Down Expand Up @@ -655,6 +665,43 @@ sunny,80,90,TRUE,cluster1
...
```

### Serializing Objects

You can serialize objects with the `Weka::Core::SerializationHelper` class:

```ruby
# writing an Object to a file:
Weka::Core::SerializationHelper.write('path/to/file.model', classifier)

# load an Object from a serialized file:
object = Weka::Core::SerializationHelper.read('path/to/file.model')
```

Instead of `.write` and `.read` you can also call the aliases `.serialize` and `.deserialize`.

Serialization can be helpful if the training of e.g. a classifier model takes
some minutes. Instead of running the whole training on instantiating a classifier you
can speed up this process tremendously by serializing a classifier once it was trained and later load it from the file again.

Classifiers, Clusterers, Instances and Filters also have a `#serialize` method
which you can use to directly serialize an Instance of these, e.g. for a Classifier:

```ruby
instances = Weka::Core::Instances.from_arff('weather.arff')
instances.class_attribute = :play

classifier = Weka::Core::Trees::RandomForest.build do
train_with_instances instances
end

# store trained model as binary file
classifier.serialize('randomforest.model')

# load Classifier from binary file
loaded_classifier = Weka::Core::SerializationHelper.deserialize('randomforest.model')
# => #<Java::WekaClassifiersTrees::RandomForest:0x197db331>
```

## Development

After checking out the repo, run `bin/setup` to install dependencies.
Expand Down
12 changes: 10 additions & 2 deletions lib/weka/class_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ module ClassMethods

def build_class(class_name, weka_module: nil, include_concerns: true)
java_import java_class_path(class_name, weka_module)
define_class(class_name, include_concerns: include_concerns)
define_class(class_name, weka_module, include_concerns: include_concerns)
end

def build_classes(*class_names, weka_module: nil, include_concerns: true)
Expand Down Expand Up @@ -52,15 +52,23 @@ def toplevel_module?
self.name.scan('::').count == 1
end

def define_class(class_name, include_concerns: true)
def define_class(class_name, weka_module, include_concerns: true)
module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
class #{class_name}
#{'include Concerns' if include_concerns}
#{include_serializable_for(class_name, weka_module)}
#{include_utils}
end
CLASS_DEFINITION
end

def include_serializable_for(class_name, weka_module)
class_path = java_class_path(class_name, weka_module)
serializable = Weka::Core::SerializationHelper.serializable?(class_path)

"include Weka::Concerns::Serializable" if serializable
end

def include_utils
return unless utils_defined?
"include #{utils}"
Expand Down
1 change: 1 addition & 0 deletions lib/weka/concerns.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
require 'weka/concerns/describable'
require 'weka/concerns/optionizable'
require 'weka/concerns/persistent'
require 'weka/concerns/serializable'

module Weka
module Concerns
Expand Down
17 changes: 17 additions & 0 deletions lib/weka/concerns/serializable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
require 'active_support/concern'
require 'weka/core/serialization_helper'

module Weka
module Concerns
module Serializable
extend ActiveSupport::Concern

included do
def serialize(filename)
Weka::Core::SerializationHelper.write(filename, self)
end
end

end
end
end
1 change: 1 addition & 0 deletions lib/weka/core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
require 'weka/core/attribute'
require 'weka/core/dense_instance'
require 'weka/core/instances'
require 'weka/core/serialization_helper'
8 changes: 8 additions & 0 deletions lib/weka/core/instances.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
require 'weka/core/loader'
require 'weka/core/saver'
require 'weka/core/dense_instance'
require 'weka/concerns/serializable'

module Weka
module Core
java_import "weka.core.Instances"
java_import "weka.core.FastVector"

class Instances
include Weka::Concerns::Serializable

DEFAULT_RELATION_NAME = 'Instances'

Expand Down Expand Up @@ -163,6 +165,12 @@ def apply_filter(filter)
filter.filter(self)
end

def apply_filters(*filters)
filters.inject(self) do |filtered_instances, filter|
filter.filter(filtered_instances)
end
end

private

def add_attribute(attribute)
Expand Down
13 changes: 13 additions & 0 deletions lib/weka/core/serialization_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Weka
module Core
java_import 'weka.core.SerializationHelper'

class SerializationHelper

class << self
alias :deserialize :read
alias :serialize :write
end
end
end
end
2 changes: 1 addition & 1 deletion lib/weka/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module Weka
VERSION = "0.1.0"
VERSION = "0.2.0"
end
24 changes: 24 additions & 0 deletions spec/concerns/serializable_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
require 'spec_helper'

describe Weka::Concerns::Serializable do

subject do
Class.new { include Weka::Concerns::Serializable }
end

let(:filename) { 'file.model' }

it 'should respond to #serialize' do
expect(subject.new).to respond_to :serialize
end

describe '#serialize' do
it 'should call Weka::Core::SerializationHelper.write' do
expect(Weka::Core::SerializationHelper)
.to receive(:write)
.with(filename, subject)

subject.new.serialize(filename)
end
end
end
15 changes: 14 additions & 1 deletion spec/core/instances_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@

it { is_expected.to respond_to :add_instance }
it { is_expected.to respond_to :apply_filter }
it { is_expected.to respond_to :apply_filters }

it { is_expected.to respond_to :class_attribute= }
it { is_expected.to respond_to :class_attribute }
it { is_expected.to respond_to :reset_class_attribute }

it { is_expected.to respond_to :serialize }

describe 'aliases:' do
let (:instances) { described_class.new }

Expand Down Expand Up @@ -441,10 +444,20 @@
let(:filter) { double('filter') }
before { allow(filter).to receive(:filter).and_return(subject) }

it 'should call the given filters #filter method' do
it 'should call the given filter‘s #filter method' do
expect(filter).to receive(:filter).once.with(subject)
subject.apply_filter(filter)
end
end

describe '#apply_filters' do
let(:filter) { double('filter') }
before { allow(filter).to receive(:filter).and_return(subject) }

it 'should call the given filters‘ #filter methods' do
expect(filter).to receive(:filter).twice.with(subject)
subject.apply_filters(filter, filter)
end
end

end
18 changes: 18 additions & 0 deletions spec/core/serialization_helper_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require 'spec_helper'

describe Weka::Core::SerializationHelper do

it { is_expected.to be_kind_of Java::WekaCore::SerializationHelper }

describe 'aliases:' do
{
write: :serialize,
read: :deserialize
}.each do |method, alias_method|
it "should define the alias .#{alias_method} for .#{method}" do
expect(Weka::Core::SerializationHelper.public_class_method(method))
.to eq Weka::Core::SerializationHelper.public_class_method(alias_method)
end
end
end
end
2 changes: 1 addition & 1 deletion weka.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
spec.email = ['paul.christoph.goetze@gmail.com']

spec.summary = %q{Machine Learning & Data Mining with JRuby.}
spec.description = %q{A wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)}
spec.description = %q{A JRuby wrapper for the Weka library (http://www.cs.waikato.ac.nz/ml/weka/)}
spec.homepage = 'https://github.com/paulgoetze/weka-jruby'
spec.license = 'MIT'

Expand Down

0 comments on commit 88b3f58

Please sign in to comment.