Skip to content

Commit

Permalink
Merge pull request #2 from RadeonOpenCompute/master
Browse files Browse the repository at this point in the history
merge from upstream
  • Loading branch information
scchan committed Mar 30, 2016
2 parents 004c682 + 7d4afca commit e91562d
Show file tree
Hide file tree
Showing 7 changed files with 490 additions and 3 deletions.
42 changes: 42 additions & 0 deletions include/grid_launch.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,48 @@ typedef struct grid_launch_parm
// use acc_view for PFE in WrapperGen
hc::accelerator_view *av;
hc::completion_future *cf;

grid_launch_parm() = default;

// customized serialization: don't need av and cf in kernel
__attribute__((annotate("serialize")))
void __cxxamp_serialize(Kalmar::Serialize& s) const {
s.Append(sizeof(int), &gridDim.x);
s.Append(sizeof(int), &gridDim.y);
s.Append(sizeof(int), &gridDim.z);
s.Append(sizeof(int), &groupDim.x);
s.Append(sizeof(int), &groupDim.y);
s.Append(sizeof(int), &groupDim.z);
s.Append(sizeof(int), &groupId.x);
s.Append(sizeof(int), &groupId.y);
s.Append(sizeof(int), &groupId.z);
s.Append(sizeof(int), &threadId.x);
s.Append(sizeof(int), &threadId.y);
s.Append(sizeof(int), &threadId.z);
s.Append(sizeof(unsigned), &groupMemBytes);
}

__attribute__((annotate("user_deserialize")))
grid_launch_parm(int gridDim_x, int gridDim_y, int gridDim_z,
int groupDim_x, int groupDim_y, int groupDim_z,
int groupId_x, int groupId_y, int groupId_z,
int threadId_x, int threadId_y, int threadId_z,
unsigned groupMemBytes_) {
gridDim.x = gridDim_x;
gridDim.y = gridDim_y;
gridDim.z = gridDim_z;
groupDim.x = groupDim_x;
groupDim.y = groupDim_y;
groupDim.z = groupDim_z;
groupId.x = groupId_x;
groupId.y = groupId_y;
groupId.z = groupId_z;
threadId.x = threadId_x;
threadId.y = threadId_y;
threadId.z = threadId_z;
groupMemBytes = groupMemBytes_;
}

} grid_launch_parm;

// TODO: Will move to separate source file in the future
Expand Down
289 changes: 286 additions & 3 deletions lib/clamp-link.in
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,249 @@ _find_object() {

ARGS="$@"

# flag to switch to the new arg parsing algorithm
USE_NEW_LINK=1

NEW_LINK_KERNEL_ARGS=""
NEW_LINK_HOST_ARGS=""
NEW_LINK_OTHER_ARGS=""
NEW_LINK_CPU_ARG=""

STATIC_LIB_LIST=()
TEMP_AR_DIRS=()

if [ $USE_NEW_LINK == 1 ]; then

# detect the verbose flags before doing anything
if [[ "$ARGS" =~ --verbose ]]; then
VERBOSE=1
fi
#very verbose
#VERBOSE=2



# gather a list of library search paths
LIB_SEARCH_PATHS=()
for ARG in $ARGS
do
# matches -L<path>
if [[ "$ARG" =~ ^-L[^[:space:]]+$ ]]; then
REAL_PATH=$(readlink -f ${ARG:2})
if [ $VERBOSE == 2 ]; then
echo "add library path: ${ARG:2}, canonical path: $REAL_PATH"
fi
LIB_SEARCH_PATHS+=( $REAL_PATH )
fi
done




for ARG in $ARGS
do

####################################
# parse the simple switches first...
####################################

if [ $ARG == "--disable-opencl" ]; then
LOWER_OPENCL=0
continue
fi

if [ $ARG == "--disable-hsa" ]; then
LOWER_HSA=0
continue
fi



#####################################
# detect object or static library
#####################################

OBJS_TO_PROCESS=()


if [[ "$ARG" =~ [^[:space:]]+\.cpu$ ]]; then

cp $ARG $TEMP_DIR/kernel_cpu.o
NEW_LINK_CPU_ARG=$NEW_LINK_CPU_ARG" "$TEMP_DIR/kernel_cpu.o

elif [[ "$ARG" =~ [^[:space:]]+\.o$ ]]; then
# detected a .o file
if [ $VERBOSE == 2 ]; then
echo "detect object file to process further: $ARG"
fi

OBJS_TO_PROCESS+=( $ARG )
elif [[ "$ARG" =~ ^-l[^[:space:]]+$ ]] || [[ "$ARG" =~ [^[:space:]]+.a$ ]]; then

# proccess a static library

DETECTED_STATIC_LIBRARY=""

# detected whether it's an -l<library naem> option
if [[ "$ARG" =~ ^-l[^[:space:]]+$ ]]; then

# expand the option into a library name
STATIC_LIB_NAME="lib${ARG:2}.a"

if [ $VERBOSE == 2 ]; then
echo "looking for static library $STATIC_LIB_NAME"
fi

# look for the static library in the library search paths
for LIB_PATH in "${LIB_SEARCH_PATHS[@]}"
do
FULL_LIB_PATH=$LIB_PATH"/"$STATIC_LIB_NAME
FULL_LIB_PATH=$(readlink -f $FULL_LIB_PATH)
if [ $VERBOSE == 2 ]; then
echo "trying to detect $FULL_LIB_PATH"
fi
if [ -f $FULL_LIB_PATH ]; then
if [ $VERBOSE == 2 ]; then
echo "$FULL_LIB_PATH detected"
fi
DETECTED_STATIC_LIBRARY=$FULL_LIB_PATH;
break;
fi
done
else
# this is .a static library file specified at the commad line
if [ -f $ARG ]; then
FULL_LIB_PATH=$(readlink -f $ARG)
if [ $VERBOSE == 2 ]; then
echo "use .a specified at: $FULL_LIB_PATH"
fi
DETECTED_STATIC_LIBRARY=$FULL_LIB_PATH
fi
fi # if [[ "$ARG" =~ ^-l[^[:space:]]+$ ]]; then


# check for duplicated static library options
if [[ $DETECTED_STATIC_LIBRARY != "" ]]; then
for LIB in "${STATIC_LIB_LIST[@]}"
do
if [[ $LIB == $DETECTED_STATIC_LIBRARY ]]; then
# this library has already been looked at, skip it
DETECTED_STATIC_LIBRARY=""
break;
fi
done
if [[ $DETECTED_STATIC_LIBRARY != "" ]]; then
STATIC_LIB_LIST+=( $DETECTED_STATIC_LIBRARY )
fi
fi


KERNEL_UNDETECTED="1"
if [[ $DETECTED_STATIC_LIBRARY != "" ]]; then

# we found a static library library
if [ $VERBOSE == 2 ]; then
echo "processing static library $DETECTED_STATIC_LIBRARY"
fi

# detect whether the objects in the static library contain a .kernel section
KERNEL_UNDETECTED=`objdump -t $DETECTED_STATIC_LIBRARY | grep -q "\.kernel"; echo $?`
if [[ $KERNEL_UNDETECTED == "0" ]]; then

# .kernel section detected, extract the objects from the archieve

if [ $VERBOSE == 2 ]; then
echo "kernel detected in $DETECTED_STATIC_LIBRARY"
fi

CURRENT_DIR=$PWD
# extract the archive
FILE=`basename $DETECTED_STATIC_LIBRARY`
AR_TEMP_DIR=$TEMP_DIR"/"$FILE

if [ $VERBOSE == 2 ]; then
echo "creating temp dir: $AR_TEMP_DIR"
fi

mkdir -p $AR_TEMP_DIR
TEMP_AR_DIRS+=( $AR_TEMP_DIR )
cd $AR_TEMP_DIR
`ar x $DETECTED_STATIC_LIBRARY`

cd $CURRENT_DIR

# store all the extract objects to process further
OBJS_TO_PROCESS=($(ls $AR_TEMP_DIR/*.o))

fi # if [[ $KERNEL_UNDETECTED == "0" ]]; then
fi # if [[ $DETECTED_STATIC_LIBRARY != "" ]]; then
elif [ -f $ARG ]; then
# an object file but doesn't have an .o extension??
file_output=`file $ARG | grep 'ELF 64-bit LSB relocatable, x86-64'`
readelf_output=`readelf -h $ARG | grep 'Relocatable file'`
if [ ! -z "$file_output" ] && [ ! -z "$readelf_output" ]; then
OBJS_TO_PROCESS+=( $ARG )
fi
fi


# no objects to further process, pass the original args down to the host linker
if [ ${#OBJS_TO_PROCESS[@]} == 0 ]; then
# no objects to further process, pass the original args down to the host linker
if [ $VERBOSE == 2 ]; then
echo "passing down link args: $ARG"
fi
NEW_LINK_OTHER_ARGS=$NEW_LINK_OTHER_ARGS" "$ARG
continue
fi

# processs the objects we put aside
for OBJ in "${OBJS_TO_PROCESS[@]}"
do
if [ $VERBOSE == 2 ]; then
echo "processing $OBJ"
fi

# detect whether the objects in the static library contain a .kernel section
KERNEL_UNDETECTED=`objdump -t $OBJ | grep -q "\.kernel"; echo $?`
if [[ $KERNEL_UNDETECTED == "0" ]]; then

FILE=`basename $OBJ` # remove path
FILENAME=${FILE%.*}
KERNEL_FILE=$TEMP_DIR/$FILENAME.kernel.bc
HOST_FILE=$TEMP_DIR/$FILENAME.host.o

# extract kernel section
objcopy -O binary -j .kernel $OBJ $KERNEL_FILE

# extract host section
objcopy -R .kernel $OBJ $HOST_FILE

# strip all symbols specified in symbol.txt from $HOST_FILE
objcopy @$CXXAMP_SERIALIZE_SYMBOL_FILE $HOST_FILE $HOST_FILE.new 2> /dev/null
if [ -f $HOST_FILE.new ]; then
mv $HOST_FILE.new $HOST_FILE
fi

# find cxxamp_serialize symbols and save them into symbol.txt
objdump -t $HOST_FILE -j .text 2> /dev/null | grep "g.*__cxxamp_serialize" | awk '{print "-L"$6}' >> $CXXAMP_SERIALIZE_SYMBOL_FILE

NEW_LINK_KERNEL_ARGS=$NEW_LINK_KERNEL_ARGS" "$KERNEL_FILE
NEW_LINK_HOST_ARGS=$NEW_LINK_HOST_ARGS" "$HOST_FILE
else
NEW_LINK_OTHER_ARGS=$NEW_LINK_OTHER_ARGS" "$OBJ
fi


done # for OBJ in "${OBJS_TO_PROCESS[@]}"
done

else # if [ $USE_NEW_LINK == 1 ]; then

############################
# Old argument processing
############################

for ARG in $ARGS
do
if [ -f $ARG ]; then
Expand All @@ -127,6 +370,7 @@ do
CPUFILE=${FILE%.cpu}
ISCRT=${ARG#/usr} # exception for objects under /usr
ISLIB=${ARG#/lib} # exception for objects under /lib

if [ $FILENAME != $FILE ] && [ $ISCRT == $ARG ] && [ $ISLIB == $ARG ]; then
KERNEL_FILE=$TEMP_DIR/$FILENAME.kernel.bc
HOST_FILE=$TEMP_DIR/$FILENAME.host.o
Expand Down Expand Up @@ -164,9 +408,43 @@ do
LINK_OTHER_ARGS=$LINK_OTHER_ARGS" "$ARG
fi
done
#echo "kernel args:"$LINK_KERNEL_ARGS
#echo "host args:"$LINK_HOST_ARGS
#echo "other args:"$LINK_OTHER_ARGS

if [ $VERBOSE == 1 ]; then
echo "kernel args: "$LINK_KERNEL_ARGS
echo ""
echo "host args: "$LINK_HOST_ARGS
echo ""
echo "other args: "$LINK_OTHER_ARGS
echo ""
fi

###################################
# End of old argument processing
###################################

fi



if [ $USE_NEW_LINK == 1 ]; then

if [ $VERBOSE == 1 ]; then
echo "new kernel args: "$NEW_LINK_KERNEL_ARGS
echo ""
echo "new host args: "$NEW_LINK_HOST_ARGS
echo ""
echo "new other args: "$NEW_LINK_OTHER_ARGS
fi

if [ $VERBOSE == 1 ]; then
echo "replacing old link args with new link args"
fi
LINK_KERNEL_ARGS=$NEW_LINK_KERNEL_ARGS
LINK_HOST_ARGS=$NEW_LINK_HOST_ARGS
LINK_OTHER_ARGS=$NEW_LINK_OTHER_ARGS

fi


# linker return value
ret=0
Expand Down Expand Up @@ -339,6 +617,11 @@ if [ -e $CXXAMP_SERIALIZE_SYMBOL_FILE ]; then
rm $CXXAMP_SERIALIZE_SYMBOL_FILE # __cxxamp_serialize symbols
fi

for TD in "${TEMP_AR_DIRS[@]}"
do
rm -rf $TD
done

if [ -d $TEMP_DIR ]; then
rm -f $TEMP_DIR/*
rmdir $TEMP_DIR
Expand Down
19 changes: 19 additions & 0 deletions lib/hc-kernel-assemble.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,25 @@ LLVM_DIS=$BINDIR/llvm-dis
CLAMP_ASM=$BINDIR/clamp-assemble
LIBPATH=$BINDIR/../lib
CXXFLAGS="-std=c++amp -stdlib=libc++ -I$BINDIR/../../include"
# Set additional CXXFLAGS based on CMAKE_BUILD_TYPE
shopt -s nocasematch
CMAKE_BUILD_TYPE="@CMAKE_BUILD_TYPE@"
case $CMAKE_BUILD_TYPE in
release)
CXXFLAGS=$CXXFLAGS" -O3"
;;
relwithdebinfo)
CXXFLAGS=$CXXFLAGS" -O2 -g"
;;
minsizerel)
CXXFLAGS=$CXXFLAGS" -Os"
;;
debug)
CXXFLAGS=$CXXFLAGS" -g"
;;
*)
CXXFLAGS=$CXXFLAGS
esac

if [ "$#" -ne 2 ]; then
echo "Usage: $0 kernel-bitcode object" >&2
Expand Down
Loading

0 comments on commit e91562d

Please sign in to comment.