diff --git a/Cargo.toml b/Cargo.toml index de79cf2..2a68045 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,14 +8,15 @@ homepage = "https://github.com/davechallis/rust-xgboost" description = "Machine learning using XGBoost" documentation = "https://docs.rs/xgboost" readme = "README.md" +edition = "2021" [dependencies] xgboost-sys = { path = "xgboost-sys" } libc = "0.2" -derive_builder = "0.12" +derive_builder = "0.20" log = "0.4" -tempfile = "3.9" -indexmap = "2.1" +tempfile = "3.15" +indexmap = "2.7" [features] cuda = ["xgboost-sys/cuda"] diff --git a/src/booster.rs b/src/booster.rs index 4e91689..a965b6d 100644 --- a/src/booster.rs +++ b/src/booster.rs @@ -1,5 +1,5 @@ -use dmatrix::DMatrix; -use error::XGBError; +use crate::dmatrix::DMatrix; +use crate::error::XGBError; use libc; use std::collections::{BTreeMap, HashMap}; use std::io::{self, BufRead, BufReader, Write}; @@ -13,7 +13,7 @@ use tempfile; use xgboost_sys; use super::XGBResult; -use parameters::{BoosterParameters, TrainingParameters}; +use crate::parameters::{BoosterParameters, TrainingParameters}; pub type CustomObjective = fn(&[f32], &DMatrix) -> (Vec, Vec); @@ -148,29 +148,8 @@ impl Booster { dmats }; - let mut bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; - // load distributed code checkpoint from rabit - let mut version = bst.load_rabit_checkpoint()?; - debug!("Loaded Rabit checkpoint: version={}", version); - assert!(unsafe { xgboost_sys::RabitGetWorldSize() != 1 || version == 0 }); - let start_iteration = version / 2; - for i in start_iteration..params.boost_rounds as i32 { - // distributed code: need to resume to this point - // skip first update if a recovery step - if version % 2 == 0 { - if let Some(objective_fn) = params.custom_objective_fn { - debug!("Boosting in round: {}", i); - bst.update_custom(params.dtrain, objective_fn)?; - } else { - debug!("Updating in round: {}", i); - bst.update(params.dtrain, i)?; - } - let _ = bst.save_rabit_checkpoint()?; - version += 1; - } - - assert!(unsafe { xgboost_sys::RabitGetWorldSize() == 1 || version == xgboost_sys::RabitVersionNumber() }); - + let bst = Booster::new_with_cached_dmats(¶ms.booster_params, &cached_dmats)?; + for i in 0..params.boost_rounds as i32 { if let Some(eval_sets) = params.evaluation_sets { let mut dmat_eval_results = bst.eval_set(eval_sets, i)?; @@ -203,10 +182,6 @@ impl Booster { } println!(); } - - // do checkpoint after evaluation, in case evaluation also updates booster. - let _ = bst.save_rabit_checkpoint(); - version += 1; } Ok(bst) @@ -365,13 +340,16 @@ impl Booster { let mut out_len = 0; let mut out = ptr::null_mut(); xgb_call!(xgboost_sys::XGBoosterGetAttrNames(self.handle, &mut out_len, &mut out))?; - - let out_ptr_slice = unsafe { slice::from_raw_parts(out, out_len as usize) }; - let out_vec = out_ptr_slice - .iter() - .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) - .collect(); - Ok(out_vec) + if out_len > 0 { + let out_ptr_slice = unsafe { slice::from_raw_parts(out, out_len as usize) }; + let out_vec = out_ptr_slice + .iter() + .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) + .collect(); + Ok(out_vec) + } else { + Ok(Vec::new()) + } } /// Predict results for given data. @@ -517,7 +495,7 @@ impl Booster { Err(err) => return Err(XGBError::new(err.to_string())), }; - let file_path = tmp_dir.path().join("fmap.txt"); + let file_path = tmp_dir.path().join("fmap.json"); let mut file: File = match File::create(&file_path) { Ok(f) => f, Err(err) => return Err(XGBError::new(err.to_string())), @@ -551,24 +529,18 @@ impl Booster { &mut out_dump_array ))?; - let out_ptr_slice = unsafe { slice::from_raw_parts(out_dump_array, out_len as usize) }; - let out_vec: Vec = out_ptr_slice - .iter() - .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) - .collect(); + if out_len > 0 { + let out_ptr_slice = unsafe { slice::from_raw_parts(out_dump_array, out_len as usize) }; + let out_vec: Vec = out_ptr_slice + .iter() + .map(|str_ptr| unsafe { ffi::CStr::from_ptr(*str_ptr).to_str().unwrap().to_owned() }) + .collect(); - assert_eq!(out_len as usize, out_vec.len()); - Ok(out_vec.join("\n")) - } - - pub(crate) fn load_rabit_checkpoint(&self) -> XGBResult { - let mut version = 0; - xgb_call!(xgboost_sys::XGBoosterLoadRabitCheckpoint(self.handle, &mut version))?; - Ok(version) - } - - pub(crate) fn save_rabit_checkpoint(&self) -> XGBResult<()> { - xgb_call!(xgboost_sys::XGBoosterSaveRabitCheckpoint(self.handle)) + assert_eq!(out_len as usize, out_vec.len()); + Ok(out_vec.join("\n")) + } else { + Ok(String::new()) + } } pub fn set_param(&mut self, name: &str, value: &str) -> XGBResult<()> { @@ -721,7 +693,7 @@ impl fmt::Display for FeatureType { #[cfg(test)] mod tests { use super::*; - use parameters::{self, learning, tree}; + use crate::parameters::{self, learning, tree}; fn read_train_matrix() -> XGBResult { DMatrix::load(r#"{"uri": "xgboost-sys/xgboost/demo/data/agaricus.txt.train?format=libsvm"}"#) @@ -739,7 +711,6 @@ mod tests { assert!(res.is_ok()); } - #[test] fn get_set_attr() { let mut booster = load_test_booster(); diff --git a/src/dmatrix.rs b/src/dmatrix.rs index 98cf30a..4c0b959 100644 --- a/src/dmatrix.rs +++ b/src/dmatrix.rs @@ -314,7 +314,11 @@ impl DMatrix { &mut out_dptr ))?; - Ok(unsafe { slice::from_raw_parts(out_dptr as *mut c_float, out_len as usize) }) + if out_len > 0 { + Ok(unsafe { slice::from_raw_parts(out_dptr as *mut c_float, out_len as usize) }) + } else { + Err(XGBError::new("error")) + } } fn set_float_info(&mut self, field: &str, array: &[f32]) -> XGBResult<()> { diff --git a/xgboost-sys/.cargo/config b/xgboost-sys/.cargo/config.toml similarity index 100% rename from xgboost-sys/.cargo/config rename to xgboost-sys/.cargo/config.toml diff --git a/xgboost-sys/Cargo.toml b/xgboost-sys/Cargo.toml index b4603ff..b9749af 100644 --- a/xgboost-sys/Cargo.toml +++ b/xgboost-sys/Cargo.toml @@ -8,12 +8,13 @@ license = "MIT" repository = "https://github.com/davechallis/rust-xgboost" description = "Native bindings to the xgboost library" readme = "README.md" +edition = "2021" [dependencies] libc = "0.2" [build-dependencies] -bindgen = "0.69" +bindgen = "0.71" cmake = "0.1" [features] diff --git a/xgboost-sys/build.rs b/xgboost-sys/build.rs index c6ba1a6..7fc9a9a 100644 --- a/xgboost-sys/build.rs +++ b/xgboost-sys/build.rs @@ -25,8 +25,12 @@ fn main() { dst.define("BUILD_STATIC_LIB", "ON").define("CMAKE_CXX_STANDARD", "17"); // CMake + let mut dst = Config::new(&xgb_root); + let mut dst = dst.define("BUILD_STATIC_LIB", "ON"); + #[cfg(feature = "cuda")] - dst.define("USE_CUDA", "ON") + let mut dst = dst + .define("USE_CUDA", "ON") .define("BUILD_WITH_CUDA", "ON") .define("BUILD_WITH_CUDA_CUB", "ON"); @@ -34,7 +38,8 @@ fn main() { { let path = PathBuf::from("/opt/homebrew/"); // check for m1 vs intel config if let Ok(_dir) = std::fs::read_dir(&path) { - dst.define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") + dst = dst + .define("CMAKE_C_COMPILER", "/opt/homebrew/opt/llvm/bin/clang") .define("CMAKE_CXX_COMPILER", "/opt/homebrew/opt/llvm/bin/clang++") .define("OPENMP_LIBRARIES", "/opt/homebrew/opt/llvm/lib") .define("OPENMP_INCLUDES", "/opt/homebrew/opt/llvm/include"); @@ -54,9 +59,11 @@ fn main() { #[cfg(feature = "cuda")] let bindings = bindings.clang_arg("-I/usr/local/cuda/include"); - let bindings = bindings.generate().expect("Unable to generate bindings."); + let bindings = bindings + .generate() + .expect("Unable to generate bindings."); - let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + let out_path = PathBuf::from(out_dir); bindings .write_to_file(out_path.join("bindings.rs")) .expect("Couldn't write bindings."); diff --git a/xgboost-sys/xgboost b/xgboost-sys/xgboost index 82d846b..5e64276 160000 --- a/xgboost-sys/xgboost +++ b/xgboost-sys/xgboost @@ -1 +1 @@ -Subproject commit 82d846bbeb83c652a0b1dff0e3519e67569c4a3d +Subproject commit 5e64276a9b95df57e6dd8f9e63347636f4e5d331