Skip to content

Commit

Permalink
Fixed warning message with dslash_quda and fixed clover direct access…
Browse files Browse the repository at this point in the history
… option. Added streaming double2 load, which is utilized for direct clover reading in double precision (disabled by default).
  • Loading branch information
Mike Clark committed Sep 24, 2012
1 parent 58e004b commit d6f5322
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 6 deletions.
14 changes: 14 additions & 0 deletions include/inline_ptx.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ namespace quda {

#if (POINTER_SIZE==8) // 64-bit pointers

__device__ inline void load_streaming_double2(double2 &a, const double2* addr)
{
double x, y;
asm("ld.cs.global.v2.f64 {%0, %1}, [%2+0];" : "=d"(x), "=d"(y) : "l"(addr));
a.x = x; a.y = y;
}

__device__ inline void load_streaming_float4(float4 &a, const float4* addr)
{
float x, y, z, w;
Expand Down Expand Up @@ -51,6 +58,13 @@ namespace quda {

#else // 32-bit pointers

__device__ inline void load_streaming_double2(double2 &a, const double2* addr)
{
double x, y;
asm("ld.cs.global.v2.f64 {%0, %1}, [%2+0];" : "=d"(x), "=d"(y) : "r"(addr));
a.x = x; a.y = y;
}

__device__ inline void load_streaming_float4(float4 &a, const float4* addr)
{
float x, y, z, w;
Expand Down
13 changes: 9 additions & 4 deletions lib/dslash_quda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
#include <clover_field.h>

// these control the Wilson-type actions
#ifdef GPU_WILSON_DIRAC
//#define DIRECT_ACCESS_LINK
//#define DIRECT_ACCESS_WILSON_SPINOR
//#define DIRECT_ACCESS_WILSON_ACCUM
//#define DIRECT_ACCESS_WILSON_INTER
//#define DIRECT_ACCESS_WILSON_PACK_SPINOR
//#define DIRECT_ACCESS_CLOVER
#endif // GPU_WILSON_DIRAC

//these are access control for staggered action
#ifdef GPU_STAGGERED_DIRAC
#if (__COMPUTE_CAPABILITY__ >= 200)
//#define DIRECT_ACCESS_FAT_LINK
//#define DIRECT_ACCESS_LONG_LINK
Expand All @@ -30,6 +33,7 @@
//#define DIRECT_ACCESS_INTER
//#define DIRECT_ACCESS_PACK
#endif
#endif // GPU_STAGGERED_DIRAC

#include <quda_internal.h>
#include <dslash_quda.h>
Expand Down Expand Up @@ -134,6 +138,11 @@ namespace quda {
#include <dslash_textures.h>
#include <dslash_constants.h>

#if defined(DIRECT_ACCESS_LINK) || defined(DIRECT_ACCESS_WILSON_SPINOR) || \
defined(DIRECT_ACCESS_WILSON_ACCUM) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \
defined(DIRECT_ACCESS_WILSON_INTER) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \
defined(DIRECT_ACCESS_CLOVER)

static inline __device__ float short2float(short a) {
return (float)a/MAX_SHORT;
}
Expand All @@ -146,10 +155,6 @@ namespace quda {
return make_short2((short)(a.x*c*MAX_SHORT), (short)(a.y*c*MAX_SHORT));
}

#if defined(DIRECT_ACCESS_LINK) || defined(DIRECT_ACCESS_WILSON_SPINOR) || \
defined(DIRECT_ACCESS_WILSON_ACCUM) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \
defined(DIRECT_ACCESS_WILSON_INTER) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR)

static inline __device__ short4 float42short4(float c, float4 a) {
return make_short4(float2short(c, a.x), float2short(c, a.y), float2short(c, a.z), float2short(c, a.w));
}
Expand Down
3 changes: 2 additions & 1 deletion lib/pack_face_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,8 @@ void packFaceWilson(void *ghost_buf, cudaColorSpinorField &in, const int dim, co
}
}

#ifdef GPU_STAGGERED_DIRAC

#if (defined DIRECT_ACCESS_PACK) || (defined FERMI_NO_DBLE_TEX)
template <typename Float2>
__device__ void packSpinor(Float2 *out, float *outNorm, int out_idx, int out_stride,
Expand Down Expand Up @@ -804,7 +806,6 @@ __device__ void packSpinor(short2 *out, float *outNorm, int out_idx, int out_str
//
// TODO: add support for textured reads

#ifdef GPU_STAGGERED_DIRAC
template <int dim, int ishalf, typename Float2>
__global__ void packFaceAsqtadKernel(Float2 *out, float *outNorm, const Float2 *in,
const float *inNorm, const int parity)
Expand Down
22 changes: 22 additions & 0 deletions lib/read_clover.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,28 @@
double2 C16 = clover[sid + (18*chi+16)*cl_stride]; \
double2 C17 = clover[sid + (18*chi+17)*cl_stride];

#define READ_CLOVER_DOUBLE_STR(clover, chi) \
double2 C0, C1, C2, C3, C4, C5, C6, C7, C8, C9; \
double2 C10, C11, C12, C13, C14, C15, C16, C17; \
load_streaming_double2(C0, &clover[sid + (18*chi+0)*cl_stride]); \
load_streaming_double2(C1, &clover[sid + (18*chi+1)*cl_stride]); \
load_streaming_double2(C2, &clover[sid + (18*chi+2)*cl_stride]); \
load_streaming_double2(C3, &clover[sid + (18*chi+3)*cl_stride]); \
load_streaming_double2(C4, &clover[sid + (18*chi+4)*cl_stride]); \
load_streaming_double2(C5, &clover[sid + (18*chi+5)*cl_stride]); \
load_streaming_double2(C6, &clover[sid + (18*chi+6)*cl_stride]); \
load_streaming_double2(C7, &clover[sid + (18*chi+7)*cl_stride]); \
load_streaming_double2(C8, &clover[sid + (18*chi+8)*cl_stride]); \
load_streaming_double2(C9, &clover[sid + (18*chi+9)*cl_stride]); \
load_streaming_double2(C10, &clover[sid + (18*chi+10)*cl_stride]); \
load_streaming_double2(C11, &clover[sid + (18*chi+11)*cl_stride]); \
load_streaming_double2(C12, &clover[sid + (18*chi+12)*cl_stride]); \
load_streaming_double2(C13, &clover[sid + (18*chi+13)*cl_stride]); \
load_streaming_double2(C14, &clover[sid + (18*chi+14)*cl_stride]); \
load_streaming_double2(C15, &clover[sid + (18*chi+15)*cl_stride]); \
load_streaming_double2(C16, &clover[sid + (18*chi+16)*cl_stride]); \
load_streaming_double2(C17, &clover[sid + (18*chi+17)*cl_stride]);

#define READ_CLOVER_SINGLE(clover, chi) \
float4 C0 = clover[sid + (9*chi+0)*cl_stride]; \
float4 C1 = clover[sid + (9*chi+1)*cl_stride]; \
Expand Down
2 changes: 1 addition & 1 deletion lib/wilson_dslash_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
#endif
#if (defined DIRECT_ACCESS_CLOVER) || (defined FERMI_NO_DBLE_TEX)
#define CLOVERTEX clover
#define READ_CLOVER READ_CLOVER_DOUBLE
#define READ_CLOVER READ_CLOVER_DOUBLE_STR
#else
#define CLOVERTEX cloverTexDouble
#define READ_CLOVER READ_CLOVER_DOUBLE_TEX
Expand Down

0 comments on commit d6f5322

Please sign in to comment.