Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dockerup #23

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 12 additions & 18 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
ARG CUDA_VERSION
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04
ARG XGB_HASH=6d293020fbfa2c67b532d550fe5d55689662caac
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04
ARG CUDA_VERSION
SHELL ["/bin/bash", "-c"]
# Install conda (and use python 3.7)
RUN apt-get update && \
Expand All @@ -16,7 +18,7 @@ RUN apt-get update && \
tar \
unzip \
wget \
zlib1g-dev && \
zlib1g-dev && \
rm -rf /var/lib/apt/*

RUN curl -o /opt/miniconda.sh \
Expand All @@ -26,6 +28,7 @@ RUN curl -o /opt/miniconda.sh \
/opt/conda/bin/conda update -n base conda && \
rm /opt/miniconda.sh
ENV PATH /opt/conda/bin:$PATH

RUN conda install -c conda-forge \
bokeh \
h5py \
Expand All @@ -48,7 +51,7 @@ RUN conda install -c conda-forge \
tqdm && \
conda clean -ya && \
pip install kaggle tqdm && \
conda install -c rapidsai -c nvidia -c conda-forge -c defaults cudf=0.15.0 dask-cuda rmm librmm rapids-xgboost cuml=0.15
conda install -c rapidsai -c nvidia -c conda-forge -c defaults cudf=0.16 dask-cuda rmm librmm cuml=0.16

# cmake
ENV CMAKE_SHORT_VERSION 3.14
Expand Down Expand Up @@ -95,6 +98,7 @@ ENV OPENCL_INCLUDE_DIR /usr/local/cuda/include
RUN git config --global http.sslVerify false && \
git clone --recursive https://github.com/Microsoft/LightGBM /opt/LightGBM && \
cd /opt/LightGBM && \
git log > lgbm_log.txt && \
mkdir build && \
cd build && \
cmake .. \
Expand All @@ -107,33 +111,23 @@ RUN git config --global http.sslVerify false && \
python setup.py install --precompile

# catboost
RUN if ["$CUDA_VERSION" < "11.0"]; then git config --global http.sslVerify false && \
git clone --recursive "https://github.com/catboost/catboost" /opt/catboost && \
cd /opt/catboost && \
cd catboost/python-package/catboost && \
../../../ya make \
-r \
-o ../../.. \
-DUSE_ARCADIA_PYTHON=no \
-DUSE_SYSTEM_PYTHON=3.7\
-DPYTHON_CONFIG=python3-config \
-DCUDA_ROOT=$(dirname $(dirname $(which nvcc))); \
fi
ENV if ["$CUDA_VERSION" < "11.0"]; then PYTHONPATH=$PYTHONPATH:/opt/catboost/catboost/python-package; fi\


RUN pip install catboost

# xgboost
RUN git config --global http.sslVerify false && \
git clone --recursive https://github.com/dmlc/xgboost /opt/xgboost && \
cd /opt/xgboost && \
git checkout $XGB_HASH && \
git submodule update --init --recursive && \
mkdir build && \
cd build && \
RMM_ROOT=/opt/conda cmake .. \
-DUSE_CUDA=ON \
-DUSE_NCCL=ON \
-DPLUGIN_RMM=ON && \
make -j4 && \
git log > xgb_log.txt && \
cd ../python-package && \
pip uninstall -y xgboost && \
python setup.py install
ENV XGBOOST_HASH=${XGB_HASH}
5 changes: 3 additions & 2 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,9 @@ def prepare_bosch(dataset_folder, nrows):

os.system("kaggle competitions download -c bosch-production-line-performance -f " +
filename + " -p " + dataset_folder)
X = pd.read_csv(local_url, index_col=0, compression='zip', dtype=np.float32,
nrows=nrows)
X = pd.read_csv(local_url,compression='zip', dtype=np.float32)
X = X.set_index('Id')
X.index = X.index.astype('int64')
y = X.iloc[:, -1].to_numpy(dtype=np.float32)
X.drop(X.columns[-1], axis=1, inplace=True)
X = X.to_numpy(dtype=np.float32)
Expand Down
54 changes: 54 additions & 0 deletions plotter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os
import sys
import csv
import json
import numpy as np
import pandas as pd
import argparse
import matplotlib.pyplot as plt
import json2csv
import csv_merger

def parse_args():
parser = argparse.ArgumentParser(
description="Visualize benchmarks against another version")
parser.add_argument("-d1", required=True, type=str,
help="comma sperated csv files to merge")
parser.add_argument("-metric", default="train_time", type=str,
help=("The metric we want to visulaize"))
parser.add_argument("-dataset", required=True, type=str,
help="dataset to plot")
parser.add_argument("-title", default="graph", type=str,
help=("The title of the graph"))
parser.add_argument("-output", default=sys.path[0] + "/results.png", type=str,
help="Output json file with visualization")
args = parser.parse_args()
return args

def plot_error_bars(df_lis, args):
fig, ax = plt.subplots()
labels = ["xgb", "cat"]
ngpu = [1, 2, 4, 6, 8]
for idx,df in enumerate(df_lis):
gp = df.groupby("dataset")
means = gp.mean()
errors = gp.std()
means = means.T
means = means.drop(columns=[x for x,y in means.iteritems() if x != args.dataset])
plt.plot(ngpu, means[args.dataset].tolist(), label=labels[idx])
ax.legend()
plt.ylabel(args.metric)
plt.xticks()
plt.title(f"{args.title} plot")
plt.savefig(args.output)

def main():
df_lis = []
args = parse_args()
groups = args.d1.split(":")
for x in groups:
df_lis.append(csv_merger.import_main(x))
plot_error_bars(df_lis, args)

if __name__ == '__main__':
main()
86 changes: 86 additions & 0 deletions trainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import os
import sys
import argparse
import json
import ast
import psutil
import algorithms
from metrics import get_metrics
from runme import benchmark
from runme import get_number_processors
from runme import print_sys_info
from datasets import prepare_dataset

def parse_args():
parser = argparse.ArgumentParser(
description="Benchmark xgboost/lightgbm/catboost on real datasets")
parser.add_argument("-dataset", default="all", type=str,
help="The dataset to be used for benchmarking. 'all' for all datasets.")
parser.add_argument("-root", default="/opt/gbm-datasets",
type=str, help="The root datasets folder")
parser.add_argument("-algorithm", default="all", type=str,
help=("Comma-separated list of algorithms to run; "
"'all' run all"))
parser.add_argument("-gpus", default=-1, type=int,
help=("#GPUs to use for the benchmarks; "
"ignored when not supported. Default is to use all."))
parser.add_argument("-ngpus", default='1', type=str,
help=("#GPUs to use for the benchmarks; "
"ignored when not supported. Default is to use all."))
parser.add_argument("-cpus", default=0, type=int,
help=("#CPUs to use for the benchmarks; "
"0 means psutil.cpu_count(logical=False)"))
parser.add_argument("-output", default=sys.path[0] + "/results.json", type=str,
help="Output json file with runtime/accuracy stats")
parser.add_argument("-ntrees", default=500, type=int,
help=("Number of trees. Default is as specified in "
"the respective dataset configuration"))
parser.add_argument("-nrows", default=None, type=int,
help=(
"Subset of rows in the datasets to use. Useful for test running "
"benchmarks on small amounts of data. WARNING: Some datasets will "
"give incorrect accuracy results if nrows is specified as they have "
"predefined train/test splits."))
parser.add_argument("-cycles", default=1, type=int,
help=("#training of training cycles for each iteration"))
parser.add_argument("-train_cycles", default=1, type=int,
help=("#training of training cycles"))
parser.add_argument("-warmup", action="store_true",
help=("Whether to run a small benchmark (fraud) as a warmup"))
parser.add_argument("-verbose", action="store_true", help="Produce verbose output")
parser.add_argument("-extra", default='{}', help="Extra arguments as a python dictionary")
args = parser.parse_args()
# default value for output json file
if not args.output:
args.output = "%s.json" % args.dataset
return args

def main():
args = parse_args()
args.cpus = get_number_processors(args)
args.extra = ast.literal_eval(args.extra)
print_sys_info(args)
gpu_lis = []
if args.warmup:
benchmark(args, os.path.join(args.root, "fraud"), "fraud")
if args.dataset == 'all':
args.dataset = 'airline,bosch,fraud,higgs,year,epsilon,covtype'
gpu_lis = args.ngpus.split(",")
if len(gpu_lis) != args.train_cycles:
print("please match npus with train_cycles")
else:
for idx, ele in enumerate(range(args.train_cycles)):
results = {}
args.gpus = int(gpu_lis[idx])
for dataset in args.dataset.split(","):
folder = os.path.join(args.root, dataset)
results.update({dataset: benchmark(args, folder, dataset)})
print(json.dumps({dataset: results[dataset]}, indent=2, sort_keys=True))
output = json.dumps(results, indent=2, sort_keys=True)
output_file = open(args.output[:-5]+str(idx)+args.output[-5:], "w")
output_file.write(output + "\n")
output_file.close()
print("Results written to file '%s'" % args.output)

if __name__ == '__main__':
main()