Skip to content

Commit

Permalink
dmlc/xgboost#14a33f6 2543
Browse files Browse the repository at this point in the history
Updating to dmlc/xgboost#2543 (24/07/2017)
  • Loading branch information
Laurae2 committed Aug 1, 2017
1 parent 9dc805c commit abaa8ca
Show file tree
Hide file tree
Showing 49 changed files with 2,069 additions and 1,119 deletions.
154 changes: 125 additions & 29 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,54 @@
#!/usr/bin/groovy
// -*- mode: groovy -*-
// Jenkins pipeline
// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/

// command to start a docker container
docker_run = 'tests/ci_build/ci_build.sh'
// Command to run command inside a docker container
dockerRun = 'tests/ci_build/ci_build.sh'

// timeout in minutes
max_time = 60
def buildMatrix = [
[ "enabled": true, "os" : "linux", "withGpu": true, "withOmp": true, "pythonVersion": "2.7" ],
[ "enabled": true, "os" : "linux", "withGpu": false, "withOmp": true, "pythonVersion": "2.7" ],
[ "enabled": false, "os" : "osx", "withGpu": false, "withOmp": false, "pythonVersion": "2.7" ],
]

pipeline {
// Each stage specify its own agent
agent none

// Setup common job properties
options {
ansiColor('xterm')
timestamps()
timeout(time: 120, unit: 'MINUTES')
buildDiscarder(logRotator(numToKeepStr: '10'))
}

// Build stages
stages {
stage('Get sources') {
agent any
steps {
checkoutSrcs()
stash name: 'srcs', excludes: '.git/'
milestone label: 'Sources ready', ordinal: 1
}
}
stage('Build & Test') {
steps {
script {
parallel (buildMatrix.findAll{it['enabled']}.collectEntries{ c ->
def buildName = getBuildName(c)
buildFactory(buildName, c)
})
}
}
}
}
}

// initialize source codes
def init_git() {
def checkoutSrcs() {
retry(5) {
try {
timeout(time: 2, unit: 'MINUTES') {
Expand All @@ -23,33 +62,90 @@ def init_git() {
}
}

stage('Build') {
node('GPU' && 'linux') {
ws('workspace/xgboost/build-gpu-cmake') {
init_git()
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} gpu tests/ci_build/build_gpu_cmake.sh"
}
}
/**
* Creates cmake and make builds
*/
def buildFactory(buildName, conf) {
def os = conf["os"]
def nodeReq = conf["withGpu"] ? "${os} && gpu" : "${os}"
def dockerTarget = conf["withGpu"] ? "gpu" : "cpu"
[ ("cmake_${buildName}") : { buildPlatformCmake("cmake_${buildName}", conf, nodeReq, dockerTarget) },
("make_${buildName}") : { buildPlatformMake("make_${buildName}", conf, nodeReq, dockerTarget) }
]
}

/**
* Build platform and test it via cmake.
*/
def buildPlatformCmake(buildName, conf, nodeReq, dockerTarget) {
def opts = cmakeOptions(conf)
// Destination dir for artifacts
def distDir = "dist/${buildName}"
// Build node - this is returned result
node(nodeReq) {
unstash name: 'srcs'
echo """
|===== XGBoost CMake build =====
| dockerTarget: ${dockerTarget}
| cmakeOpts : ${opts}
|=========================
""".stripMargin('|')
// Invoke command inside docker
sh """
${dockerRun} ${dockerTarget} tests/ci_build/build_via_cmake.sh ${opts}
${dockerRun} ${dockerTarget} tests/ci_build/test_${dockerTarget}.sh
${dockerRun} ${dockerTarget} bash -c "cd python-package; python setup.py bdist_wheel"
rm -rf "${distDir}"; mkdir -p "${distDir}/py"
cp xgboost "${distDir}"
cp -r lib "${distDir}"
cp -r python-package/dist "${distDir}/py"
"""
archiveArtifacts artifacts: "${distDir}/**/*.*", allowEmptyArchive: true
}
node('GPU' && 'linux') {
ws('workspace/xgboost/build-gpu-make') {
init_git()
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} gpu make PLUGIN_UPDATER_GPU=ON"
}
}
}

/**
* Build platform via make
*/
def buildPlatformMake(buildName, conf, nodeReq, dockerTarget) {
def opts = makeOptions(conf)
// Destination dir for artifacts
def distDir = "dist/${buildName}"
// Build node
node(nodeReq) {
unstash name: 'srcs'
echo """
|===== XGBoost Make build =====
| dockerTarget: ${dockerTarget}
| makeOpts : ${opts}
|=========================
""".stripMargin('|')
// Invoke command inside docker
sh """
${dockerRun} ${dockerTarget} tests/ci_build/build_via_make.sh ${opts}
"""
}
}

def makeOptions(conf) {
return ([
conf["withGpu"] ? 'PLUGIN_UPDATER_GPU=ON' : 'PLUGIN_UPDATER_GPU=OFF',
conf["withOmp"] ? 'USE_OPENMP=1' : 'USE_OPENMP=0']
).join(" ")
}

stage('Unit Test') {
node('GPU' && 'linux') {
ws('workspace/xgboost/unit-test') {
init_git()
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} gpu tests/ci_build/test_gpu.ssh"
}
}
}

def cmakeOptions(conf) {
return ([
conf["withGpu"] ? '-DPLUGIN_UPDATER_GPU:BOOL=ON' : '',
conf["withOmp"] ? '-DOPEN_MP:BOOL=ON' : '']
).join(" ")
}

def getBuildName(conf) {
def gpuLabel = conf['withGpu'] ? "_gpu" : "_cpu"
def ompLabel = conf['withOmp'] ? "_omp" : ""
def pyLabel = "_py${conf['pythonVersion']}"
return "${conf['os']}${gpuLabel}${ompLabel}${pyLabel}"
}

7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ export JAVAINCFLAGS = -I${JAVA_HOME}/include -I./java
ifeq ($(TEST_COVER), 1)
CFLAGS += -g -O0 -fprofile-arcs -ftest-coverage
else
CFLAGS += -O3 -funroll-loops -msse2
CFLAGS += -O3 -funroll-loops
ifeq ($(USE_SSE), 1)
CFLAGS += -msse2
endif
endif

ifndef LINT_LANG
Expand Down Expand Up @@ -123,7 +126,7 @@ $(DMLC_CORE)/libdmlc.a: $(wildcard $(DMLC_CORE)/src/*.cc $(DMLC_CORE)/src/*/*.cc
+ cd $(DMLC_CORE); $(MAKE) libdmlc.a config=$(ROOTDIR)/$(config); cd $(ROOTDIR)

$(RABIT)/lib/$(LIB_RABIT): $(wildcard $(RABIT)/src/*.cc)
+ cd $(RABIT); $(MAKE) lib/$(LIB_RABIT); cd $(ROOTDIR)
+ cd $(RABIT); $(MAKE) lib/$(LIB_RABIT) USE_SSE=$(USE_SSE); cd $(ROOTDIR)

jvm: jvm-packages/lib/libxgboost4j.so

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ This is a version of xgboost **maintained by Laurae** for easy installation for
**Install in R easily in ONE command**:

```r
devtools::install_github("Laurae2/ez_xgb/R-package@2017-07-09-v3")
devtools::install_github("Laurae2/ez_xgb/R-package@2017-07-24-v3")
```

---
Expand Down
3 changes: 2 additions & 1 deletion cmake/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ endfunction(set_default_configuration_release)

function(format_gencode_flags flags out)
foreach(ver ${flags})
set(${out} "${${out}}-gencode arch=compute_${ver},code=sm_${ver};" PARENT_SCOPE)
set(${out} "${${out}}-gencode arch=compute_${ver},code=sm_${ver};")
endforeach()
set(${out} "${${out}}" PARENT_SCOPE)
endfunction(format_gencode_flags flags)
2 changes: 1 addition & 1 deletion doc/jvm/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ You have find XGBoost JVM Package!

Installation
------------
Currently, XGBoost4J only support installation from source. Building XGBoost4J using Maven requires Maven 3 or newer and Java 7+.
Currently, XGBoost4J only support installation from source. Building XGBoost4J using Maven requires Maven 3 or newer, Java 7+ and CMake 3.2+ for compiling the JNI bindings.

Before you install XGBoost4J, you need to define environment variable `JAVA_HOME` as your JDK directory to ensure that your compiler can find `jni.h` correctly, since XGBoost4J relies on JNI to implement the interaction between the JVM and native libraries.

Expand Down
17 changes: 17 additions & 0 deletions doc/parameter.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,23 @@ Parameters for Tree Booster
- This is only used if 'hist' is specified as `tree_method`.
- Maximum number of discrete bins to bucket continuous features.
- Increasing this number improves the optimality of splits at the cost of higher computation time.
* use_columnar_access, [default=1]
- This is only used if 'hist' is specified as `tree_method`.
- If greater than zero, store a transposed copy of input matrix for fast columnar access. May increase memory usage and initial setup time.
* sparse_threshold, [default=0.2]
- range: [0.0, 1.0]
- This is only used if 'hist' is specified as `tree_method`.
- Percentage threshold for treating a feature as sparse. For instance, 0.2 indicates that any feature with fewer than 20% nonzero rows will be considered sparse. May impact computation time slightly.
* enable_feature_grouping, [default=0]
- This is only used if 'hist' is specified as `tree_method`.
- If greater than zero, group complementary features together so as to improve work balance for parallel histogram aggregation. May increase memory usage and initial setup time.
* max_conflict_rate, [default=0]
- range: [0.0, 1.0]
- Only relevant when `enable_feature_grouping=1` is specified.
- Specifies criterion for "complementary" features. By default, only features with no common nonzero rows are considered complementary. Increase this number to encourage larger feature groups.
* max_search_group, [default=100]
- Only relevant when `enable_feature_grouping=1` is specified.
- Increasing this number will result in better feature grouping, at the cost of greater initial setup time.

Additional parameters for Dart Booster
--------------------------------------
Expand Down
16 changes: 16 additions & 0 deletions include/xgboost/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,22 @@ XGB_DLL int XGDMatrixCreateFromMat(const float *data,
bst_ulong ncol,
float missing,
DMatrixHandle *out);
/*!
* \brief create matrix content from dense matrix
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
* \param out created dmatrix
* \param nthread number of threads (up to maximum cores available, if <=0 use all cores)
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data,
bst_ulong nrow,
bst_ulong ncol,
float missing,
DMatrixHandle *out,
int nthread);
/*!
* \brief create a new dmatrix from sliced content of existing matrix
* \param handle instance of data matrix to be sliced
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package ml.dmlc.xgboost4j.scala.spark

import scala.collection.mutable

import ml.dmlc.xgboost4j.java.{IRabitTracker, Rabit, XGBoostError, DMatrix => JDMatrix, RabitTracker => PyRabitTracker}
import ml.dmlc.xgboost4j.java.{IRabitTracker, Rabit, XGBoostError, RabitTracker => PyRabitTracker}
import ml.dmlc.xgboost4j.scala.rabit.RabitTracker
import ml.dmlc.xgboost4j.scala.{XGBoost => SXGBoost, _}
import org.apache.commons.logging.LogFactory
Expand Down Expand Up @@ -115,7 +115,7 @@ object XGBoost extends Serializable {
rabitEnv.put("DMLC_TASK_ID", TaskContext.getPartitionId().toString)
Rabit.init(rabitEnv)
val partitionItr = fromDenseToSparseLabeledPoints(trainingSamples, missing)
val trainingMatrix = new DMatrix(new JDMatrix(partitionItr, cacheFileName))
val trainingMatrix = new DMatrix(partitionItr, cacheFileName)
try {
if (params.contains("groupData") && params("groupData") != null) {
trainingMatrix.setGroup(params("groupData").asInstanceOf[Seq[Seq[Int]]](
Expand Down Expand Up @@ -221,7 +221,7 @@ object XGBoost extends Serializable {
private def overrideParamsAccordingToTaskCPUs(
params: Map[String, Any],
sc: SparkContext): Map[String, Any] = {
val coresPerTask = sc.getConf.get("spark.task.cpus", "1").toInt
val coresPerTask = sc.getConf.getInt("spark.task.cpus", 1)
var overridedParams = params
if (overridedParams.contains("nthread")) {
val nThread = overridedParams("nthread").toString.toInt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package ml.dmlc.xgboost4j.scala.spark

import scala.collection.JavaConverters._

import ml.dmlc.xgboost4j.java.{Rabit, DMatrix => JDMatrix}
import ml.dmlc.xgboost4j.java.Rabit
import ml.dmlc.xgboost4j.scala.spark.params.{BoosterParams, DefaultXGBoostParamsWriter}
import ml.dmlc.xgboost4j.scala.{Booster, DMatrix, EvalTrait}
import org.apache.hadoop.fs.{FSDataOutputStream, Path}
Expand Down Expand Up @@ -66,7 +66,7 @@ abstract class XGBoostModel(protected var _booster: Booster)
val rabitEnv = Map("DMLC_TASK_ID" -> TaskContext.getPartitionId().toString)
Rabit.init(rabitEnv.asJava)
if (testSamples.nonEmpty) {
val dMatrix = new DMatrix(new JDMatrix(testSamples, null))
val dMatrix = new DMatrix(testSamples)
try {
broadcastBooster.value.predictLeaf(dMatrix).iterator
} finally {
Expand Down Expand Up @@ -202,7 +202,7 @@ abstract class XGBoostModel(protected var _booster: Booster)
null
}
}
val dMatrix = new DMatrix(new JDMatrix(testSamples, cacheFileName))
val dMatrix = new DMatrix(testSamples, cacheFileName)
try {
broadcastBooster.value.predict(dMatrix).iterator
} finally {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ml.dmlc.xgboost4j.scala.spark

import java.io.File

import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterEach, FunSuite}

trait PerTest extends BeforeAndAfterEach { self: FunSuite =>
protected val numWorkers: Int = Runtime.getRuntime.availableProcessors()

@transient private var currentSession: SparkSession = _

def ss: SparkSession = getOrCreateSession
implicit def sc: SparkContext = ss.sparkContext

protected def sparkSessionBuilder: SparkSession.Builder = SparkSession.builder()
.master("local[*]")
.appName("XGBoostSuite")
.config("spark.ui.enabled", false)
.config("spark.driver.memory", "512m")

override def beforeEach(): Unit = getOrCreateSession

override def afterEach() {
synchronized {
if (currentSession != null) {
currentSession.stop()
cleanExternalCache(currentSession.sparkContext.appName)
currentSession = null
}
}
}

private def getOrCreateSession = synchronized {
if (currentSession == null) {
currentSession = sparkSessionBuilder.getOrCreate()
currentSession.sparkContext.setLogLevel("ERROR")
}
currentSession
}

private def cleanExternalCache(prefix: String): Unit = {
val dir = new File(".")
for (file <- dir.listFiles() if file.getName.startsWith(prefix)) {
file.delete()
}
}
}
Loading

0 comments on commit abaa8ca

Please sign in to comment.