Skip to content

Commit

Permalink
[PTI-LIB] Fix Race Condition Detected by TSan (#268)
Browse files Browse the repository at this point in the history
* Fix race condition in ze_collector detected by ThreadSanitizer.
  * Unify locking across file (use lockguard instead of manually locking
    and unlocking).
* Fix crash in dpc_gemm_threaded on icpx 2024.2.1
* Fix missing header for std::fabs on dpc_gemm* samples.
* Update header for SYCL in dlworkloads sample.

Signed-off-by: Schilling, Matthew <matthew.schilling@intel.com>
Co-authored-by: jfedorov <julia.fedorova@intel.com>
  • Loading branch information
mschilling0 and jfedorov authored Sep 9, 2024
1 parent c8059bc commit 13c62d2
Show file tree
Hide file tree
Showing 18 changed files with 130 additions and 95 deletions.
3 changes: 2 additions & 1 deletion sdk/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@
"displayName": "Linux Debug x64 Clang Config",
"description": "Linux Debug x64 Config using Ninja generator and clang compiler",
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/toolchains/clang_toolchain.cmake"
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/toolchains/clang_toolchain.cmake",
"PTI_ENABLE_LOGGING": "ON"
}
},
{
Expand Down
15 changes: 15 additions & 0 deletions sdk/cmake/Modules/FindXpti.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,26 @@ The following cache variables may also be set:

# Based on tutorial found in CMake manual:
# https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html

#
# Note about PATHS:
#
# These are specifically hardcoded fallbacks.
#
# * `/opt/intel/oneapi/compiler/latest` - common oneAPI installation location on
# Linux.
# * `/opt/sycl` - Intel/LLVM open-source compiler container installation location
# on Linux.
#

find_path(
Xpti_INCLUDE_DIR
NAMES xpti/xpti_trace_framework.h
HINTS ENV CMPLR_ROOT
ENV CPATH
PATHS /opt/intel/oneapi/compiler/latest
/opt/intel/oneapi/compiler/latest/linux
/opt/sycl
PATH_SUFFIXES include
linux/include
)
Expand All @@ -67,6 +80,7 @@ find_library(
ENV LIBRARY_PATH
PATHS /opt/intel/oneapi/compiler/latest
/opt/intel/oneapi/compiler/latest/linux
/opt/sycl
PATH_SUFFIXES lib
linux/lib
)
Expand All @@ -78,6 +92,7 @@ find_library(
ENV LIBRARY_PATH
PATHS /opt/intel/oneapi/compiler/latest
/opt/intel/oneapi/compiler/latest/linux
/opt/sycl
PATH_SUFFIXES lib
linux/lib
)
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/device_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#define DEVICE_MEMORY_H_

#include <vector>
#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>

// In IPEX/ITEX, device memory are allocated and reused, and released at last.
// here is a very simple mock for this behavior.
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// =============================================================
#include <iostream>
#include <vector>
#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>

#include "queue.h"
#include "device_memory.h"
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/model_mixedprogramming.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#ifndef MODEL_MIXEDPROGRAMMING_H_
#define MODEL_MIXEDPROGRAMMING_H_

#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>
#include "tiny_tensor.h"

TinyTensor run_model_mixedprogramming(TinyTensor inp, sycl::queue *q);
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/operation_onednn.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#ifndef OPERATION_ONEDNN_H_
#define OPERATION_ONEDNN_H_

#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>
#include "tiny_tensor.h"

void onednn_prepare_weights(int oc, int ic, int ks, sycl::queue *q);
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/operation_onedpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ TinyTensor run_onedpl_operation_cos(const TinyTensor& inp, sycl::queue *q)
float *dst = outp.data;

q->submit([&](sycl::handler &h) {
h.parallel_for(outp.count(), [=](cl::sycl::item<1> item) {
h.parallel_for(outp.count(), [=](sycl::item<1> item) {
int idx = item.get_id(0);
dst[idx] = oneapi::dpl::cos(src[idx]);
});
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/operation_onedpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#ifndef OPERATION_ONEDPL_H_
#define OPERATION_ONEDPL_H_

#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>
#include "tiny_tensor.h"

TinyTensor run_onedpl_operation_cos(const TinyTensor& inp, sycl::queue *q);
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/operation_onemkl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#ifndef OPERATION_ONEMKL_H_
#define OPERATION_ONEMKL_H_

#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>
#include "tiny_tensor.h"

TinyTensor run_onemkl_operation_fft(const TinyTensor& inp, sycl::queue *q);
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/operation_syclkernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ TinyTensor run_syclkernel_operation_scaledown(const TinyTensor& inp, sycl::queue
float *dst = outp.data;

q->submit([&](sycl::handler &h) {
h.parallel_for(outp.count(), [=](cl::sycl::item<1> item) {
h.parallel_for(outp.count(), [=](sycl::item<1> item) {
int idx = item.get_id(0);
dst[idx] = src[idx*4];
});
Expand Down
2 changes: 1 addition & 1 deletion sdk/samples/dlworkloads/operation_syclkernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#ifndef OPERATION_SYCLKERNEL_H_
#define OPERATION_SYCLKERNEL_H_

#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>
#include "tiny_tensor.h"

TinyTensor run_syclkernel_operation_scaledown(const TinyTensor& inp, sycl::queue *q);
Expand Down
4 changes: 2 additions & 2 deletions sdk/samples/dpc_gemm/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

#include <string.h>

#include <cmath>
#include <cstdlib>
#include <memory>
#include <sycl/sycl.hpp>

#include "pti/pti_view.h"
Expand Down Expand Up @@ -160,7 +160,7 @@ static float Check(const std::vector<float> &a, float value) {

float eps = 0.0f;
for (size_t i = 0; i < a.size(); ++i) {
eps += fabs((a[i] - value) / value);
eps += std::fabs((a[i] - value) / value);
}

return eps / a.size();
Expand Down
13 changes: 7 additions & 6 deletions sdk/samples/dpc_gemm_threaded/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <stdarg.h>
#include <string.h>

#include <cmath>
#include <cstdlib>
#include <memory>
#include <sycl/sycl.hpp>
Expand All @@ -31,7 +32,7 @@ static float Check(const std::vector<float>& a, float value) {

float eps = 0.0f;
for (size_t i = 0; i < a.size(); ++i) {
eps += fabs((a[i] - value) / value);
eps += std::fabs((a[i] - value) / value);
}

return eps / a.size();
Expand Down Expand Up @@ -277,19 +278,18 @@ int main(int argc, char* argv[]) {
unsigned size = default_size;

try {
unsigned temp;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-s") == 0 || strcmp(argv[i], "--size") == 0) {
i++;
temp = std::stoul(argv[i]);
auto temp = std::stoul(argv[i]);
size = (temp < min_size) ? min_size : (temp > max_size) ? max_size : temp;
} else if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--threads") == 0) {
i++;
temp = std::stoul(argv[i]);
auto temp = std::stoul(argv[i]);
thread_count = (temp < 1) ? 1 : (temp > max_thread_count) ? max_thread_count : temp;
} else if (strcmp(argv[i], "-r") == 0 || strcmp(argv[i], "--repeat") == 0) {
i++;
temp = std::stoul(argv[i]);
auto temp = std::stoul(argv[i]);
repeat_count = (temp < 1) ? 1 : temp;
} else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
// verbosity off makes minimal the sample self output -
Expand All @@ -312,7 +312,8 @@ int main(int argc, char* argv[]) {
StartTracing();
sycl::device dev;
dev = sycl::device(sycl::gpu_selector_v);
sycl::property_list prop_list{sycl::property::queue::enable_profiling()};
sycl::property_list prop_list{sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()};
sycl::queue queue(dev, sycl::async_handler{}, prop_list);
if (argc > 1 && strcmp(argv[1], "cpu") == 0) {
dev = sycl::device(sycl::cpu_selector_v);
Expand Down
13 changes: 7 additions & 6 deletions sdk/samples/vector_sq_add/vector_sq_add.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <level_zero/ze_api.h>

#include <cmath>
#include <iostream>
#include <map>
#include <set>
Expand Down Expand Up @@ -132,12 +133,12 @@ void RunProfiledVecSqAdd(sycl::queue &sycl_queue) {
uint64_t corr_id = 0;

for (size_t i = 0; i < kVectorSize; i++) {
a[i] = sin(i);
b[i] = cos(i);
c[2 * i] = sin(i) * sin(i);
c[2 * i + 1] = sin(i);
d[2 * i] = cos(i) * cos(i);
d[2 * i + 1] = cos(i);
a[i] = std::sin(i);
b[i] = std::cos(i);
c[2 * i] = std::sin(i) * std::sin(i);
c[2 * i + 1] = std::sin(i);
d[2 * i] = std::cos(i) * std::cos(i);
d[2 * i + 1] = std::cos(i);
}

VecSq(sycl_queue, a, b);
Expand Down
Loading

0 comments on commit 13c62d2

Please sign in to comment.