Skip to content

Commit

Permalink
Merge branch 'apk'
Browse files Browse the repository at this point in the history
  • Loading branch information
harrism committed Sep 18, 2015
2 parents 0fc7ec7 + 6dc250e commit 8a684b0
Show file tree
Hide file tree
Showing 56 changed files with 2,946 additions and 517 deletions.
42 changes: 42 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Windows binaries
*.exe
*.exp

# Compiled Object files
*.slo
*.lo
Expand All @@ -16,11 +20,13 @@
*.lib

# visual studio files
*.deps
*.suo
*.pdb
*.opensdf
*.sdf
*.vcxproj.user
*.vcxproj.filters
Debug/
ipch/
Release/
Expand All @@ -31,3 +37,39 @@ Release/
# sublime
*.sublime-project
*.sublime-workspace

# tags
.tags
.tags_sorted_by_file

# executables
examples/blackscholes/bs_device
examples/blackscholes/bs_host
examples/blackscholes/bs_host_nvcc
examples/blackscholes_hemiarray/bs_device
examples/blackscholes_hemiarray/bs_host
examples/blackscholes_hemiarray/bs_host_nvcc
examples/blackscholes_hostdevice/blackscholes
examples/blackscholes_nohemi/bs_device
examples/blackscholes_nohemi/bs_host
examples/blackscholes_nohemi/bs_host_nvcc
examples/nbody_vec4/nbody_vec4
examples/parallel_for/parallel_for_device
examples/parallel_for/parallel_for_host
examples/parallel_for/parallel_for_host_nvcc
examples/simple/hello_device
examples/simple/hello_global
examples/simple/hello_host
examples/simple/hello_host_nvcc
examples/simple/hello_lambda_device
examples/simple/hello_lambda_host
examples/simple/hello_lambda_host_nvcc
examples/simple/saxpy_host
examples/simple/saxpy_device

test/test_hemi_device
test/test_hemi_host

*.plist
*.dSYM

6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[submodule "hemi/range"]
path = hemi/range
url = https://github.com/harrism/cpp11-range.git
[submodule "test/googletest"]
path = test/googletest
url = https://github.com/google/googletest.git
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down
207 changes: 157 additions & 50 deletions README.md

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
- [ ] Consider abstraction for __shared__ memory
- [ ] Make nbody example work without CUDA
- [ ] Consider __launch_bounds__ support...
- [ ] Multi-dimensional thread/block accessors
- [x] Add version of parallel_for with an ExecutionPolicy
- [x] Combine tests into small number of binaries
- [x] Add streams to ExecutionPolicy
- [x] Tests for cudaLaunch with and without nvcc
- [x] Tests for other APIs
- [x] Provide portable utility functions for cudaDeviceReset, etc.
- [x] Fix/rename index accessors
- [x] Move accessors to device_api.h
47 changes: 27 additions & 20 deletions examples/blackscholes/Makefile
Original file line number Diff line number Diff line change
@@ -1,36 +1,43 @@
CUDA_PATH := /usr/local/cuda
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")

CC_FLAGS := -I../ -I../../ -I$(CUDA_PATH)/include
# architecture
ARCH := $(shell getconf LONG_BIT)

# uncomment for debug
#DEBUG_FLAGS := -g -DDEBUG
#DEBUG_FLAGS_NVCC := -G
NVCC := nvcc

# comment for debug
CC_FLAGS += -O3
ifeq ($(HOST_OS),darwin)
CXX := clang++
else
CXX := g++
endif

CC_FLAGS += $(DEBUG_FLAGS)
NVCC_FLAGS := $(CC_FLAGS) $(DEBUG_FLAGS_NVCC)
STD := -std=c++11

NVCC_FLAGS := $(CC_FLAGS) $(DEBUG_FLAGS_NVCC)
CXX_FLAGS := $(STD) -I../ -I../../

ARCH := $(shell getconf LONG_BIT)
HOST_ONLY_FLAGS := -DHEMI_CUDA_DISABLE

LIB_FLAGS_32 := -L$(CUDA_PATH)/lib
LIB_FLAGS_64 := -L$(CUDA_PATH)/lib64
# uncomment for debug
#DEBUG_FLAGS := -g -DDEBUG
#DEBUG_FLAGS_NVCC := -G

# comment for debug
CXX_FLAGS += -O3

LIB_FLAGS := $(LIB_FLAGS_$(ARCH)) -lcudart
CXX_FLAGS += $(DEBUG_FLAGS)
NVCC_FLAGS := $(CXX_FLAGS) $(DEBUG_FLAGS_NVCC) --expt-extended-lambda

all: bs_device bs_host_nvcc bs_host_g++
all: bs_device bs_host_nvcc bs_host

bs_device: blackscholes.cpp
nvcc blackscholes.cpp $(NVCC_FLAGS) $(LIB_FLAGS) -x cu -o bs_device
$(NVCC) blackscholes.cpp $(NVCC_FLAGS) -x cu -o bs_device

bs_host_nvcc: blackscholes.cpp
nvcc blackscholes.cpp $(CC_FLAGS) $(LIB_FLAGS) -x c++ -o bs_host_nvcc
$(NVCC) blackscholes.cpp $(CXX_FLAGS) $(HOST_ONLY_FLAGS) -x c++ -o bs_host_nvcc

bs_host_g++: blackscholes.cpp
g++ blackscholes.cpp $(CC_FLAGS) $(LIB_FLAGS) -o bs_host_g++
bs_host: blackscholes.cpp
$(CXX) blackscholes.cpp $(CXX_FLAGS) $(HOST_ONLY_FLAGS) -o bs_host

clean:
rm -rf bs_device bs_host_nvcc bs_host_g++
rm -rf bs_device bs_host_nvcc bs_host
27 changes: 11 additions & 16 deletions examples/blackscholes/blackscholes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

#include "timer.h"
#include "hemi/hemi.h"
#include "hemi/parallel_for.h"
#include "hemi/device_api.h"

const float RISKFREE = 0.02f;
const float VOLATILITY = 0.30f;
Expand Down Expand Up @@ -42,15 +44,11 @@ float CND(float d)
}

// Black-Scholes formula for both call and put
HEMI_KERNEL(BlackScholes)
(float *callResult, float *putResult, const float *stockPrice,
const float *optionStrike, const float *optionYears, float Riskfree,
float Volatility, int optN)
void BlackScholes(float *callResult, float *putResult, const float *stockPrice,
const float *optionStrike, const float *optionYears, float Riskfree,
float Volatility, int optN)
{
int offset = hemiGetElementOffset();
int stride = hemiGetElementStride();

for(int opt = offset; opt < optN; opt += stride)
hemi::parallel_for(0, optN, [=] HEMI_LAMBDA (int opt)
{
float S = stockPrice[opt];
float X = optionStrike[opt];
Expand All @@ -68,7 +66,7 @@ HEMI_KERNEL(BlackScholes)
float expRT = expf(- R * T);
callResult[opt] = S * CNDD1 - X * expRT * CNDD2;
putResult[opt] = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
}
});
}

float RandFloat(float low, float high)
Expand Down Expand Up @@ -120,9 +118,6 @@ int main(int argc, char **argv)

initOptions(OPT_N, stockPrice, optionStrike, optionYears);

int blockDim = 128; // blockDim, gridDim ignored by host code
int gridDim = std::min<int>(1024, (OPT_N + blockDim - 1) / blockDim);

printf("Running %s Version...\n", HEMI_LOC_STRING);

StartTimer();
Expand All @@ -137,11 +132,11 @@ int main(int argc, char **argv)
d_stockPrice = stockPrice;
d_optionStrike = optionStrike;
d_optionYears = optionYears;
#endif
#endif


HEMI_KERNEL_LAUNCH(BlackScholes, gridDim, blockDim, 0, 0,
d_callResult, d_putResult, d_stockPrice, d_optionStrike,
d_optionYears, RISKFREE, VOLATILITY, OPT_N);
BlackScholes(d_callResult, d_putResult, (const float*)d_stockPrice, (const float*)d_optionStrike,
(const float*)d_optionYears, RISKFREE, VOLATILITY, OPT_N);

#ifdef HEMI_CUDA_COMPILER
checkCuda( cudaMemcpy(callResult, d_callResult, OPT_SZ, cudaMemcpyDeviceToHost) );
Expand Down
36 changes: 0 additions & 36 deletions examples/blackscholes/blackscholes.sln

This file was deleted.

47 changes: 27 additions & 20 deletions examples/blackscholes_hemiarray/Makefile
Original file line number Diff line number Diff line change
@@ -1,36 +1,43 @@
CUDA_PATH := /usr/local/cuda
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")

CC_FLAGS := -I../ -I../../ -I$(CUDA_PATH)/include
# architecture
ARCH := $(shell getconf LONG_BIT)

# uncomment for debug
#DEBUG_FLAGS := -g -DDEBUG
#DEBUG_FLAGS_NVCC := -G
NVCC := nvcc

# comment for debug
CC_FLAGS += -O3
ifeq ($(HOST_OS),darwin)
CXX := clang++
else
CXX := g++
endif

CC_FLAGS += $(DEBUG_FLAGS)
NVCC_FLAGS := $(CC_FLAGS) $(DEBUG_FLAGS_NVCC)
STD := -std=c++11

NVCC_FLAGS := $(CC_FLAGS) $(DEBUG_FLAGS_NVCC)
CXX_FLAGS := $(STD) -I../ -I../../

ARCH := $(shell getconf LONG_BIT)
HOST_ONLY_FLAGS := -DHEMI_CUDA_DISABLE

LIB_FLAGS_32 := -L$(CUDA_PATH)/lib
LIB_FLAGS_64 := -L$(CUDA_PATH)/lib64
# uncomment for debug
#DEBUG_FLAGS := -g -DDEBUG
#DEBUG_FLAGS_NVCC := -G

# comment for debug
CXX_FLAGS += -O3

LIB_FLAGS := $(LIB_FLAGS_$(ARCH)) -lcudart
CXX_FLAGS += $(DEBUG_FLAGS)
NVCC_FLAGS := $(CXX_FLAGS) $(DEBUG_FLAGS_NVCC) --expt-extended-lambda

all: bs_device bs_host_nvcc bs_host_g++
all: bs_device bs_host_nvcc bs_host

bs_device: blackscholes.cpp
nvcc blackscholes.cpp $(NVCC_FLAGS) $(LIB_FLAGS) -x cu -o bs_device
$(NVCC) blackscholes.cpp $(NVCC_FLAGS) -x cu -o bs_device

bs_host_nvcc: blackscholes.cpp
nvcc blackscholes.cpp $(CC_FLAGS) $(LIB_FLAGS) -x c++ -o bs_host_nvcc
$(NVCC) blackscholes.cpp $(CXX_FLAGS) $(HOST_ONLY_FLAGS) -x c++ -o bs_host_nvcc

bs_host_g++: blackscholes.cpp
g++ blackscholes.cpp $(CC_FLAGS) $(LIB_FLAGS) -o bs_host_g++
bs_host: blackscholes.cpp
$(CXX) blackscholes.cpp $(CXX_FLAGS) $(HOST_ONLY_FLAGS) -o bs_host

clean:
rm -rf bs_device bs_host_nvcc bs_host_g++
rm -rf bs_device bs_host_nvcc bs_host
28 changes: 11 additions & 17 deletions examples/blackscholes_hemiarray/blackscholes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

#include "timer.h"
#include "hemi/hemi.h"
#include "hemi/device_api.h"
#include "hemi/parallel_for.h"
#include "hemi/array.h"

const float RISKFREE = 0.02f;
Expand Down Expand Up @@ -43,15 +45,11 @@ float CND(float d)
}

// Black-Scholes formula for both call and put
HEMI_KERNEL(BlackScholes)
(float *callResult, float *putResult, const float *stockPrice,
void BlackScholes(float *callResult, float *putResult, const float *stockPrice,
const float *optionStrike, const float *optionYears, float Riskfree,
float Volatility, int optN)
{
int offset = hemiGetElementOffset();
int stride = hemiGetElementStride();

for(int opt = offset; opt < optN; opt += stride)
hemi::parallel_for(0, optN, [=] HEMI_LAMBDA (int opt)
{
float S = stockPrice[opt];
float X = optionStrike[opt];
Expand All @@ -69,7 +67,7 @@ HEMI_KERNEL(BlackScholes)
float expRT = expf(- R * T);
callResult[opt] = S * CNDD1 - X * expRT * CNDD2;
putResult[opt] = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
}
});
}

float RandFloat(float low, float high)
Expand Down Expand Up @@ -105,20 +103,16 @@ int main(int argc, char **argv)
optionStrike.writeOnlyHostPtr(),
optionYears.writeOnlyHostPtr());

int blockDim = 128; // blockDim, gridDim ignored by host code
int gridDim = std::min<int>(1024, (OPT_N + blockDim - 1) / blockDim);

printf("Running %s Version...\n", HEMI_LOC_STRING);

StartTimer();

HEMI_KERNEL_LAUNCH(BlackScholes, gridDim, blockDim, 0, 0,
callResult.writeOnlyPtr(),
putResult.writeOnlyPtr(),
stockPrice.readOnlyPtr(),
optionStrike.readOnlyPtr(),
optionYears.readOnlyPtr(),
RISKFREE, VOLATILITY, OPT_N);
BlackScholes(callResult.writeOnlyPtr(),
putResult.writeOnlyPtr(),
stockPrice.readOnlyPtr(),
optionStrike.readOnlyPtr(),
optionYears.readOnlyPtr(),
RISKFREE, VOLATILITY, OPT_N);

// force copy back to host if needed and print a sanity check
printf("Option 0 call: %f\n", callResult.readOnlyPtr(hemi::host)[0]);
Expand Down
Loading

0 comments on commit 8a684b0

Please sign in to comment.