Skip to content

Commit

Permalink
Merge branch 'master' into windows5
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Nov 13, 2024
2 parents 1776e4f + 0de57e5 commit e97a8ab
Show file tree
Hide file tree
Showing 15 changed files with 154 additions and 51 deletions.
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
## 0.3.3 (unreleased)
## 0.4.0 (unreleased)

- Added support for hashes and Rover data frames to `predict` method
- Changed `Dataset` to use column names for feature names with Rover and Daru
- Changed `predict` method to match feature names with Daru
- Dropped support for Ruby < 3.1

## 0.3.4 (2024-07-28)

- Updated LightGBM to 4.5.0

## 0.3.3 (2024-06-15)

- Updated LightGBM to 4.4.0

Expand Down
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ gemspec
gem "rake"
gem "minitest", ">= 5"
gem "daru"
gem "matrix" # for daru
gem "matrix"
gem "numo-narray", platform: [:ruby, :x64_mingw]
gem "rover-df", platform: [:ruby, :x64_mingw]
gem "csv"
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
The MIT License (MIT)

Copyright (c) Microsoft Corporation
Copyright (c) 2019-2023 Andrew Kane
Copyright (c) 2019-2024 Andrew Kane

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby

[![Build Status](https://github.com/ankane/lightgbm-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/lightgbm-ruby/actions)
[![Build Status](https://github.com/ankane/lightgbm-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/lightgbm-ruby/actions)

## Installation

Expand Down
12 changes: 6 additions & 6 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def download_file(file, sha256)
require "open-uri"

# also update licenses in vendor/
version = "4.4.0"
version = "4.5.0"

url =
if file == "lib_lightgbm.arm64.dylib"
Expand All @@ -45,24 +45,24 @@ end
# https://github.com/microsoft/LightGBM/releases
namespace :vendor do
task :linux do
download_file("lib_lightgbm.so", "fdbb5b5786d4a99f661d453a62cc07c6607b780a1e4e774443df67aded6bb8b3")
download_file("lib_lightgbm.so", "4b2b68c4d0fa99bace6cc540224b457ff899ccee0fdc8875e4625a38b00fc5e5")
end

task :mac do
download_file("lib_lightgbm.dylib", "c5824d085fd342c58f92291f40f02554f13ca1504fa26f1b2aef3151e8a70fdc")
download_file("lib_lightgbm.arm64.dylib", "58b7d2c1e04c8af20c9558582e07957e3e227ef6bb31a10644b92cc93610a1fc")
download_file("lib_lightgbm.dylib", "b02d48071ba4ae1e13e336a902dc5f82a5732de4448d47a20d8e9d94d5d3db2a")
download_file("lib_lightgbm.arm64.dylib", "840e16754db0d3e4852bdfdecc1ee08bc367b138e0bf18fabb4ce3d9b39c936a")
end

task :windows do
download_file("lib_lightgbm.dll", "922c627c23e065f85d8e5e975be4ec78c65a424bdf12253f3168110cc2391185")
download_file("lib_lightgbm.dll", "1d281ec96684806d83468469fb6052880308f39bf03a34d85ee9aa38195d260c")
end

task all: [:linux, :mac, :windows]

task :platform do
if Gem.win_platform?
Rake::Task["vendor:windows"].invoke
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
elsif RbConfig::CONFIG["host_os"].match?(/darwin/i)
Rake::Task["vendor:mac"].invoke
else
Rake::Task["vendor:linux"].invoke
Expand Down
2 changes: 1 addition & 1 deletion lib/lightgbm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def cv(params, train_set, num_boost_round: 100, nfold: 5, seed: 0, shuffle: true
boosters.each(&:update)

scores = {}
boosters.map(&:eval_valid).map(&:reverse).flatten(1).each do |r|
boosters.map(&:eval_valid).flat_map(&:reverse).each do |r|
(scores[r[1]] ||= []) << r[2]
end

Expand Down
27 changes: 19 additions & 8 deletions lib/lightgbm/booster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def dump_model(num_iteration: nil, start_iteration: 0)
out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
feature_importance_type = 0 # TODO add option
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, feature_importance_type, buffer_len, out_len, out_str)
actual_len = read_int64(out_len)
actual_len = out_len.read_int64
if actual_len > buffer_len
out_str = ::FFI::MemoryPointer.new(:char, actual_len)
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, feature_importance_type, actual_len, out_len, out_str)
Expand All @@ -51,7 +51,7 @@ def dump_model(num_iteration: nil, start_iteration: 0)
alias_method :to_json, :dump_model

def eval_valid
@name_valid_sets.each_with_index.map { |n, i| inner_eval(n, i + 1) }.flatten(1)
@name_valid_sets.each_with_index.flat_map { |n, i| inner_eval(n, i + 1) }
end

def eval_train
Expand Down Expand Up @@ -99,6 +99,7 @@ def feature_name
def model_from_string(model_str)
out_num_iterations = ::FFI::MemoryPointer.new(:int)
check_result FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, @handle)
@cached_feature_name = nil
self
end

Expand All @@ -109,7 +110,7 @@ def model_to_string(num_iteration: nil, start_iteration: 0)
out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
feature_importance_type = 0 # TODO add option
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, feature_importance_type, buffer_len, out_len, out_str)
actual_len = read_int64(out_len)
actual_len = out_len.read_int64
if actual_len > buffer_len
out_str = ::FFI::MemoryPointer.new(:char, actual_len)
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, feature_importance_type, actual_len, out_len, out_str)
Expand Down Expand Up @@ -140,7 +141,14 @@ def num_trees
def predict(input, start_iteration: nil, num_iteration: nil, **params)
input =
if daru?(input)
input.map_rows(&:to_a)
input[*cached_feature_name].map_rows(&:to_a)
elsif input.is_a?(Hash) # sort feature.values to match the order of model.feature_name
sorted_feature_values(input)
elsif input.is_a?(Array) && input.first.is_a?(Hash) # on multiple elems, if 1st is hash, assume they all are
input.map(&method(:sorted_feature_values))
elsif rover?(input)
# TODO improve performance
input[cached_feature_name].to_numo.to_a
else
input.to_a
end
Expand All @@ -160,7 +168,7 @@ def predict(input, start_iteration: nil, num_iteration: nil, **params)
out_len = ::FFI::MemoryPointer.new(:int64)
out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 1, input.count, input.first.count, 1, 0, start_iteration, num_iteration, params_str(params), out_len, out_result)
out = out_result.read_array_of_double(read_int64(out_len))
out = out_result.read_array_of_double(out_len.read_int64)
out = out.each_slice(num_class).to_a if num_class > 1

singular ? out.first : out
Expand Down Expand Up @@ -236,9 +244,12 @@ def num_class
out.read_int
end

# read_int64 not available on JRuby
def read_int64(ptr)
ptr.read_array_of_int64(1).first
def sorted_feature_values(input_hash)
input_hash.transform_keys(&:to_s).fetch_values(*cached_feature_name)
end

def cached_feature_name
@cached_feature_name ||= feature_name
end

include Utils
Expand Down
49 changes: 30 additions & 19 deletions lib/lightgbm/dataset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module LightGBM
class Dataset
attr_reader :data, :params

def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_name: nil, feature_names: nil)
@data = data
@label = label
@weight = weight
Expand All @@ -11,7 +11,7 @@ def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference
@reference = reference
@used_indices = used_indices
@categorical_feature = categorical_feature
@feature_names = feature_names
@feature_name = feature_name || feature_names || "auto"

construct
end
Expand All @@ -24,7 +24,7 @@ def weight
field("weight")
end

def feature_names
def feature_name
# must preallocate space
num_feature_names = ::FFI::MemoryPointer.new(:int)
out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
Expand All @@ -48,6 +48,7 @@ def feature_names
# from most recent call (instead of num_features)
str_ptrs[0, num_feature_names.read_int].map(&:read_string)
end
alias_method :feature_names, :feature_name

def label=(label)
@label = label
Expand All @@ -64,12 +65,15 @@ def group=(group)
set_field("group", group, type: :int32)
end

def feature_names=(feature_names)
def feature_name=(feature_names)
@feature_names = feature_names
c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
# keep reference to string pointers
str_ptrs = feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) }
c_feature_names.write_array_of_pointer(str_ptrs)
check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
end
alias_method :feature_names=, :feature_name=

# TODO only update reference if not in chain
def reference=(reference)
Expand Down Expand Up @@ -106,12 +110,7 @@ def subset(used_indices, params: nil)
end

def handle_pointer
@handle.read_pointer
end

def self.finalize(addr)
# must use proc instead of stabby lambda
proc { FFI.LGBM_DatasetFree(::FFI::Pointer.new(:pointer, addr)) }
@handle
end

private
Expand All @@ -127,25 +126,33 @@ def construct
end
set_verbosity(params)

@handle = ::FFI::MemoryPointer.new(:pointer)
handle = ::FFI::MemoryPointer.new(:pointer)
parameters = params_str(params)
reference = @reference.handle_pointer if @reference
if used_indices
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
used_row_indices.write_array_of_int32(used_indices)
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, handle)
elsif data.is_a?(String)
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, handle)
else
if matrix?(data)
nrow = data.row_count
ncol = data.column_count
flat_data = data.to_a.flatten
elsif daru?(data)
if @feature_name == "auto"
@feature_name = data.vectors.to_a
end
nrow, ncol = data.shape
flat_data = data.map_rows(&:to_a).flatten
elsif numo?(data) || rover?(data)
data = data.to_numo if rover?(data)
elsif numo?(data)
nrow, ncol = data.shape
elsif rover?(data)
if @feature_name == "auto"
@feature_name = data.keys
end
data = data.to_numo
nrow, ncol = data.shape
else
nrow = data.count
Expand All @@ -161,14 +168,18 @@ def construct
c_data.write_array_of_double(flat_data)
end

check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, handle)
end
if used_indices
@handle = handle.read_pointer
else
@handle = ::FFI::AutoPointer.new(handle.read_pointer, FFI.method(:LGBM_DatasetFree))
end
ObjectSpace.define_finalizer(@handle, self.class.finalize(handle_pointer.to_i)) unless used_indices

self.label = @label if @label
self.weight = @weight if @weight
self.group = @group if @group
self.feature_names = @feature_names if @feature_names
self.feature_name = @feature_name if @feature_name && @feature_name != "auto"
end

def dump_text(filename)
Expand Down
2 changes: 1 addition & 1 deletion lib/lightgbm/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def params_str(params)
end

def check_param(v)
raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match?(v)
v
end

Expand Down
2 changes: 1 addition & 1 deletion lib/lightgbm/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module LightGBM
VERSION = "0.3.2"
VERSION = "0.3.4"
end
2 changes: 1 addition & 1 deletion lightgbm.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
spec.files = Dir["*.{md,txt}", "{lib,vendor}/**/*"]
spec.require_path = "lib"

spec.required_ruby_version = ">= 3"
spec.required_ruby_version = ">= 3.1"

spec.add_dependency "ffi"
end
58 changes: 58 additions & 0 deletions test/booster_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ def test_model_str
assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], y_pred.first(2)
end

def test_model_from_string
x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
booster = LightGBM.train(binary_params, binary_train)
booster.model_from_string(File.read("test/support/model.txt"))
y_pred = booster.predict(x_test)
assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], y_pred.first(2)
end

def test_feature_importance
assert_equal [280, 285, 335, 148], booster.feature_importance
end
Expand All @@ -30,6 +38,56 @@ def test_feature_importance_bad_importance_type
assert_includes error.message, "Unknown importance type"
end

def test_predict_hash
pred = booster.predict({x0: 3.7, x1: 1.2, x2: 7.2, x3: 9.0})
assert_in_delta 0.9823112229173586, pred

pred = booster.predict({"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7})
assert_in_delta 0.9823112229173586, pred

pred =
booster.predict([
{"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7},
{"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5}
])
assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2)

assert_raises(KeyError) do
booster.predict({"x0" => 3.7})
end
end

def test_predict_daru
x_test =
Daru::DataFrame.new([
{"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7},
{"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5}
])
pred = booster.predict(x_test)
assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2)

assert_raises(IndexError) do
booster.predict(Daru::DataFrame.new([{"x0" => 3.7}]))
end
end

def test_predict_rover
skip if jruby?

require "rover"
x_test =
Rover::DataFrame.new([
{"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7},
{"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5}
])
pred = booster.predict(x_test)
assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2)

assert_raises(KeyError) do
booster.predict(Rover::DataFrame.new([{"x0" => 3.7}]))
end
end

def test_model_to_string
assert booster.model_to_string
end
Expand Down
Loading

0 comments on commit e97a8ab

Please sign in to comment.