QuEST-Kit · TysonRayJones · Sep 22, 2023 · Aug 17, 2023 · Aug 19, 2023 · Aug 19, 2023
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -7,7 +7,8 @@ on:
       - develop
   pull_request:
     branches:
-      - '**'
+      - master
+      - develop
 
 jobs:
 

diff --git a/.github/workflows/llvm-asan.yml b/.github/workflows/llvm-asan.yml
@@ -7,7 +7,8 @@ on:
       - develop
   pull_request:
     branches:
-      - '**'
+      - master
+      - develop
 
 jobs:
 

diff --git a/.github/workflows/macos-unit.yml b/.github/workflows/macos-unit.yml
@@ -7,7 +7,8 @@ on:
       - develop
   pull_request:
     branches:
-      - '**'
+      - master
+      - develop
 
 jobs:
 

diff --git a/.github/workflows/ubuntu-unit.yml b/.github/workflows/ubuntu-unit.yml
@@ -7,7 +7,8 @@ on:
       - develop
   pull_request:
     branches:
-      - '**'
+      - master
+      - develop
 
 jobs:
 

diff --git a/.github/workflows/windows-build.yml b/.github/workflows/windows-build.yml
@@ -7,7 +7,8 @@ on:
       - develop
   pull_request:
     branches:
-      - '**'
+      - master
+      - develop
 
 jobs:
 

diff --git a/.github/workflows/windows-unit.yml b/.github/workflows/windows-unit.yml
@@ -7,7 +7,8 @@ on:
       - develop
   pull_request:
     branches:
-      - '**'
+      - master
+      - develop
 
 jobs:
 

diff --git a/AUTHORS.txt b/AUTHORS.txt
@@ -33,6 +33,8 @@ Dr Mihai Duta [developer]
     original prototyping
 
 External contributors:
+Jakub Adamski
+    optimised distributed communication by sending max-size messages asynchronously
 Bruno Villasenor Alvarez on behalf of AMD
     ported the GPU backend to HIP, for AMD GPU compatibility
 Dr Nicolas Vogt on behalf of HQS Quantum Simulations

diff --git a/QuEST/CMakeLists.txt b/QuEST/CMakeLists.txt
@@ -37,6 +37,8 @@ option(USE_HIP "Whether to use HIP for GPU code compilation for AMD GPUs. Set to
 
 set(GPU_ARCH gfx90 CACHE STRING "GPU hardware dependent, used for AMD GPUs when USE_HIP=1. Lookup at https://llvm.org/docs/AMDGPUUsage.html#amdgpu-processor-table. Write without fullstop")
 
+option(USE_CUQUANTUM "Whether to use NVIDIA's cuQuantum library (requires prior installation) in lieu of QuEST's bespoke GPU kernel. Set to 1 to enable." 0)
+
 
 # *****************************************************************************
 # ***** NO CHANGES SHOULD BE REQUIRED FROM THE USER BEYOND THIS POINT *********
@@ -49,6 +51,7 @@ message(STATUS "OMP acceleration is ${MULTITHREADED}")
 message(STATUS "MPI distribution is ${DISTRIBUTED}")
 if (${GPUACCELERATED})
   message(STATUS "HIP compilation is ${USE_HIP}")
+  message(STATUS "cuQuantum compilation is ${USE_CUQUANTUM}")
 endif()
 
 
@@ -119,25 +122,28 @@ endif()
 if (GPUACCELERATED)
   if (USE_HIP)
 
-  if(NOT DEFINED HIP_PATH)
-    if(NOT DEFINED ENV{HIP_PATH})
-      message(WARNING "WARNING: HIP_PATH is not defiend. Using default HIP_PATH=/opt/rocm/hip    " ${HIP_VERSION})
-      set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
-    else()
-      set(HIP_PATH $ENV{HIP_PATH}	CACHE PATH "Path to which HIP has been installed")
+    if(NOT DEFINED HIP_PATH)
+      if(NOT DEFINED ENV{HIP_PATH})
+        message(WARNING "WARNING: HIP_PATH is not defiend. Using default HIP_PATH=/opt/rocm/hip    " ${HIP_VERSION})
+        set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
+      else()
+        set(HIP_PATH $ENV{HIP_PATH}	CACHE PATH "Path to which HIP has been installed")
+      endif()
     endif()
-  endif()
 
-  if(EXISTS "${HIP_PATH}")
-    set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
-    find_package(HIP REQUIRED)
-    message(STATUS "Found HIP: " ${HIP_VERSION})
-    message(STATUS "HIP PATH: " ${HIP_PATH})
-  endif()
-
-  ADD_DEFINITIONS( -DUSE_HIP )
-  ADD_DEFINITIONS( -D__HIP_PLATFORM_AMD__ )
+    if(EXISTS "${HIP_PATH}")
+      set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
+      find_package(HIP REQUIRED)
+      message(STATUS "Found HIP: " ${HIP_VERSION})
+      message(STATUS "HIP PATH: " ${HIP_PATH})
+    endif()
+      
+    ADD_DEFINITIONS( -DUSE_HIP )
+    ADD_DEFINITIONS( -D__HIP_PLATFORM_AMD__ )
 
+  elseif (USE_CUQUANTUM)
+    find_package(CUDA REQUIRED)
+    ADD_DEFINITIONS( -DUSE_CUQUANTUM )
   else()
     find_package(CUDA REQUIRED)
   endif()  
@@ -280,7 +286,12 @@ endif()
 # ----- C++ COMPILER FLAGS --------------------------------------------------
 
 # set C++ flags that are common between compilers and build types
-set (CMAKE_CXX_STANDARD 98)
+if (USE_CUQUANTUM)
+  set(CMAKE_CXX_STANDARD 14)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+else ()
+  set (CMAKE_CXX_STANDARD 98)
+endif ()
 
 # Use -O2 for all but debug mode by default 
 if (NOT("${CMAKE_BUILD_TYPE}" STREQUAL "Debug"))
@@ -412,6 +423,14 @@ target_link_libraries(QuEST PUBLIC ${MPI_C_LIBRARIES})
 # ----- GPU -------------------------------------------------------------------
 if (USE_HIP)
   target_link_libraries(QuEST PUBLIC  ${HIP_PATH}/lib/libamdhip64.so )
+elseif (USE_CUQUANTUM)
+  find_library(CUQUANTUM_LIBRARIES custatevec)
+  if (NOT CUQUANTUM_LIBRARIES)
+    message(FATAL_ERROR "cuQuantum library (specifically custatevec) not found")
+  endif ()
+
+  target_link_libraries(QuEST ${CUDA_LIBRARIES} ${CUQUANTUM_LIBRARIES})
+  target_include_directories(QuEST PUBLIC "/usr/local/cuda/include")
 else()
   target_link_libraries(QuEST ${CUDA_LIBRARIES})
 endif()

diff --git a/QuEST/include/QuEST.h b/QuEST/include/QuEST.h
@@ -34,6 +34,23 @@
 
 # include "QuEST_precision.h"
 
+
+
+// ensure custatevecHandle_t is defined, even if no GPU
+# ifdef USE_CUQUANTUM
+# include <custatevec.h>
+typedef struct CuQuantumConfig {
+    cudaMemPool_t cuMemPool;
+    cudaStream_t cuStream;
+    custatevecHandle_t cuQuantumHandle;
+    custatevecDeviceMemHandler_t cuMemHandler;
+} CuQuantumConfig;
+# else
+# define CuQuantumConfig void*
+# endif
+
+
+
 // prevent C++ name mangling
 #ifdef __cplusplus
 extern "C" {
@@ -368,6 +385,11 @@ typedef struct Qureg
     //! Storage for reduction of probabilities on GPU
     qreal *firstLevelReduction, *secondLevelReduction;
 
+    //! Storage for wavefunction amplitues and config (copy of QuESTEnv's handle) in cuQuantum deployment
+    cuAmp* cuStateVec;
+    cuAmp* deviceCuStateVec;
+    CuQuantumConfig* cuConfig;
+
     //! Storage for generated QASM output
     QASMLogger* qasmLog;
 
@@ -386,6 +408,10 @@ typedef struct QuESTEnv
     int numRanks;
     unsigned long int* seeds;
     int numSeeds;
+
+    // a copy of the QuESTEnv's config, used only in cuQuantum deployment
+    CuQuantumConfig* cuConfig;
+
 } QuESTEnv;
 
 
@@ -4236,6 +4262,10 @@ qreal calcPurity(Qureg qureg);
  * linear algebra calculation.
  *
  * The number of qubits represented in \p qureg and \p pureState must match.
+ *
+ * > In the GPU-accelerated cuQuantum backend, this function further assumes that
+ * > the density matrix \p qureg is correctly normalised, and otherwise returns the 
+ * > fidelity of the conjugate-transpose of \p qureg.
  * 
  * @see
  * - calcHilbertSchmidtDistance()

diff --git a/QuEST/include/QuEST_precision.h b/QuEST/include/QuEST_precision.h
@@ -10,15 +10,27 @@
  * @author Tyson Jones (doc)
  */
 
-# include <math.h>
-
 # ifndef QUEST_PRECISION_H
 # define QUEST_PRECISION_H
 
+# include <math.h>
+
+
+// define CUDA complex types as void if not using cuQuantum.
+// note we used cuComplex.h for complex numbers, in lieu of
+// Thrust's complex<qreal>, so that the QuEST.h header can
+// always be compiled with C99, rather than C++14.
+# ifdef USE_CUQUANTUM
+    # include <cuComplex.h>
+# else
+    # define cuFloatComplex void
+    # define cuDoubleComplex void
+# endif
+
 
 // set default double precision if not set during compilation
 # ifndef QuEST_PREC
-# define QuEST_PREC 2
+    # define QuEST_PREC 2
 # endif
 
 
@@ -28,6 +40,7 @@
 # if QuEST_PREC==1
     # define qreal float
     // \cond HIDDEN_SYMBOLS   
+    # define cuAmp cuFloatComplex
     # define MPI_QuEST_REAL MPI_FLOAT
     # define MPI_MAX_AMPS_IN_MSG (1LL<<29) // must be 2^int
     # define REAL_STRING_FORMAT "%.8f"
@@ -41,7 +54,8 @@
  */
 # elif QuEST_PREC==2
     # define qreal double
-    // \cond HIDDEN_SYMBOLS   
+    // \cond HIDDEN_SYMBOLS
+    # define cuAmp cuDoubleComplex
     # define MPI_QuEST_REAL MPI_DOUBLE
     # define MPI_MAX_AMPS_IN_MSG (1LL<<28) // must be 2^int
     # define REAL_STRING_FORMAT "%.14f"
@@ -57,6 +71,7 @@
 # elif QuEST_PREC==4
     # define qreal long double
     // \cond HIDDEN_SYMBOLS   
+    # define cuAmp void // invalid
     # define MPI_QuEST_REAL MPI_LONG_DOUBLE
     # define MPI_MAX_AMPS_IN_MSG (1LL<<27) // must be 2^int
     # define REAL_STRING_FORMAT "%.17Lf"

diff --git a/QuEST/src/CPU/QuEST_cpu.c b/QuEST/src/CPU/QuEST_cpu.c
@@ -1641,53 +1641,6 @@ void statevec_cloneQureg(Qureg targetQureg, Qureg copyQureg) {
     }
 }
 
-/**
- * Initialise the state vector of probability amplitudes such that one qubit is set to 'outcome' and all other qubits are in an equal superposition of zero and one.
- * @param[in,out] qureg object representing the set of qubits to be initialised
- * @param[in] qubitId id of qubit to set to state 'outcome'
- * @param[in] outcome of qubit 'qubitId'
- */
-void statevec_initStateOfSingleQubit(Qureg *qureg, int qubitId, int outcome)
-{
-    long long int chunkSize, stateVecSize;
-    long long int index;
-    int bit;
-    long long int chunkId=qureg->chunkId;
-
-    // dimension of the state vector
-    chunkSize = qureg->numAmpsPerChunk;
-    stateVecSize = chunkSize*qureg->numChunks;
-    qreal normFactor = 1.0/sqrt((qreal)stateVecSize/2.0);
-
-    // Can't use qureg->stateVec as a private OMP var
-    qreal *stateVecReal = qureg->stateVec.real;
-    qreal *stateVecImag = qureg->stateVec.imag;
-
-    // initialise the state to |0000..0000>
-# ifdef _OPENMP
-# pragma omp parallel \
-    default  (none) \
-    shared   (chunkSize, stateVecReal, stateVecImag, normFactor, qubitId, outcome, chunkId) \
-    private  (index, bit)
-# endif
-    {
-# ifdef _OPENMP
-# pragma omp for schedule (static)
-# endif
-        for (index=0; index<chunkSize; index++) {
-            bit = extractBit(qubitId, index+chunkId*chunkSize);
-            if (bit==outcome) {
-                stateVecReal[index] = normFactor;
-                stateVecImag[index] = 0.0;
-            } else {
-                stateVecReal[index] = 0.0;
-                stateVecImag[index] = 0.0;
-            }
-        }
-    }
-}
-
-
 /**
  * Initialise the state vector of probability amplitudes to an (unphysical) state with
  * each component of each probability amplitude a unique floating point value. For debugging processes
@@ -1726,62 +1679,6 @@ void statevec_initDebugState (Qureg qureg)
     }
 }
 
-// returns 1 if successful, else 0
-int statevec_initStateFromSingleFile(Qureg *qureg, char filename[200], QuESTEnv env){
-    long long int chunkSize, stateVecSize;
-    long long int indexInChunk, totalIndex;
-
-    chunkSize = qureg->numAmpsPerChunk;
-    stateVecSize = chunkSize*qureg->numChunks;
-
-    qreal *stateVecReal = qureg->stateVec.real;
-    qreal *stateVecImag = qureg->stateVec.imag;
-
-    FILE *fp;
-    char line[200];
-
-    for (int rank=0; rank<(qureg->numChunks); rank++){
-        if (rank==qureg->chunkId){
-            fp = fopen(filename, "r");
-
-            // indicate file open failure
-            if (fp == NULL)
-                return 0;
-
-            indexInChunk = 0; totalIndex = 0;
-            while (fgets(line, sizeof(char)*200, fp) != NULL && totalIndex<stateVecSize){
-                if (line[0]!='#'){
-                    int chunkId = (int) (totalIndex/chunkSize);
-                    if (chunkId==qureg->chunkId){
-                        sscanf(line, REAL_SPECIFIER ", " REAL_SPECIFIER, &(stateVecReal[indexInChunk]),
-                                &(stateVecImag[indexInChunk])); 
-                        indexInChunk += 1;
-                    }
-                    totalIndex += 1;
-                }
-            }   
-            fclose(fp);
-        }
-        syncQuESTEnv(env);
-    }
-
-    // indicate success
-    return 1;
-}
-
-int statevec_compareStates(Qureg mq1, Qureg mq2, qreal precision){
-    qreal diff;
-    long long int chunkSize = mq1.numAmpsPerChunk;
-
-    for (long long int i=0; i<chunkSize; i++){
-        diff = absReal(mq1.stateVec.real[i] - mq2.stateVec.real[i]);
-        if (diff>precision) return 0;
-        diff = absReal(mq1.stateVec.imag[i] - mq2.stateVec.imag[i]);
-        if (diff>precision) return 0;
-    }
-    return 1;
-}
-
 void statevec_compactUnitaryLocal (Qureg qureg, int targetQubit, Complex alpha, Complex beta)
 {
     long long int sizeBlock, sizeHalfBlock;
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,7 +7,8 @@ on: @@
           - develop
       pull_request:
         branches:
-          - '**'
+          - master
+          - develop
     jobs:
@@ Expand Down @@