diff --git a/Benchmark/src/test/scala/com/thoughtworks/compute/OpenCLBenchmark.scala b/Benchmark/src/test/scala/com/thoughtworks/compute/OpenCLBenchmark.scala index c45f9000..1c07196d 100644 --- a/Benchmark/src/test/scala/com/thoughtworks/compute/OpenCLBenchmark.scala +++ b/Benchmark/src/test/scala/com/thoughtworks/compute/OpenCLBenchmark.scala @@ -32,11 +32,13 @@ object OpenCLBenchmark { trait TestKernels extends OpenCL with OpenCL.CommandQueuePool { + private implicit def witnessSelf: Witness.Aux[this.type] = Witness.mkWitness(this) + @transient private[OpenCLBenchmark] lazy val compiledProgram: Program = { val program = createProgramWithSource(fastraw""" - float sample(global const float* restrict input, const size_t image_index, const ptrdiff_t x, const ptrdiff_t y, const ptrdiff_t width, const ptrdiff_t height) { + float sample(global const float* /*restrict*/ input, const size_t image_index, const ptrdiff_t x, const ptrdiff_t y, const ptrdiff_t width, const ptrdiff_t height) { if (x >= 0 && x < width && y >= 0 && y < height) { return input[image_index * width * height, y * width + x]; } else { @@ -44,7 +46,7 @@ object OpenCLBenchmark { } } - kernel void benchmark(global const float* restrict input, global float* restrict output, global const float* restrict weight) { + kernel void benchmark(global const float* /*restrict*/ input, global float* restrict output, global const float* /*restrict*/ weight) { const size_t image_index = get_global_id(0); const size_t batch_size = get_global_size(0); const size_t x = get_global_id(1); diff --git a/Memory/src/main/scala/com/thoughtworks/compute/Memory.scala b/Memory/src/main/scala/com/thoughtworks/compute/Memory.scala index 82328c59..39914db2 100644 --- a/Memory/src/main/scala/com/thoughtworks/compute/Memory.scala +++ b/Memory/src/main/scala/com/thoughtworks/compute/Memory.scala @@ -75,8 +75,10 @@ object Memory extends LowPriorityMemory { override def free(buffer: PointerBuffer): Unit = MemoryUtil.memFree(buffer) override def toArray(buffer: PointerBuffer): Array[Pointer] = { - val bufferToArray = Array.ofDim[Long](buffer.limit()) + val oldPosition = buffer.position() + val bufferToArray = Array.ofDim[Long](buffer.remaining()) buffer.get(bufferToArray, 0, bufferToArray.length) + buffer.position(oldPosition) bufferToArray.map { long => new Pointer.Default(long) {} } @@ -120,8 +122,10 @@ object Memory extends LowPriorityMemory { override def put(buffer: IntBuffer, index: Int, value: Int): Unit = buffer.put(index, value) override def toArray(buffer: IntBuffer): Array[Int] = { - val bufferToArray = Array.ofDim[Int](buffer.limit()) + val oldPosition = buffer.position() + val bufferToArray = Array.ofDim[Int](buffer.remaining()) buffer.get(bufferToArray, 0, bufferToArray.length) + buffer.position(oldPosition) bufferToArray } } @@ -144,8 +148,10 @@ object Memory extends LowPriorityMemory { override def put(buffer: LongBuffer, index: Int, value: Long): Unit = buffer.put(index, value) override def toArray(buffer: LongBuffer): Array[Long] = { - val bufferToArray = Array.ofDim[Long](buffer.limit()) + val oldPosition = buffer.position() + val bufferToArray = Array.ofDim[Long](buffer.remaining()) buffer.get(bufferToArray, 0, bufferToArray.length) + buffer.position(oldPosition) bufferToArray } } @@ -168,8 +174,10 @@ object Memory extends LowPriorityMemory { override def put(buffer: DoubleBuffer, index: Int, value: Double): Unit = buffer.put(index, value) override def toArray(buffer: DoubleBuffer): Array[Double] = { - val bufferToArray = Array.ofDim[Double](buffer.limit()) + val oldPosition = buffer.position() + val bufferToArray = Array.ofDim[Double](buffer.remaining()) buffer.get(bufferToArray, 0, bufferToArray.length) + buffer.position(oldPosition) bufferToArray } } @@ -192,8 +200,10 @@ object Memory extends LowPriorityMemory { override def put(buffer: FloatBuffer, index: Int, value: Float): Unit = buffer.put(index, value) override def toArray(buffer: FloatBuffer): Array[Float] = { - val bufferToArray = new Array[Float](buffer.limit()) + val oldPosition = buffer.position() + val bufferToArray = new Array[Float](buffer.remaining()) buffer.get(bufferToArray, 0, bufferToArray.length) + buffer.position(oldPosition) bufferToArray } } @@ -216,8 +226,10 @@ object Memory extends LowPriorityMemory { override def put(buffer: ByteBuffer, index: Int, value: Byte): Unit = buffer.put(index, value) override def toArray(buffer: ByteBuffer): Array[Byte] = { - val bufferToArray = Array.ofDim[Byte](buffer.limit()) + val oldPosition = buffer.position() + val bufferToArray = Array.ofDim[Byte](buffer.remaining()) buffer.get(bufferToArray, 0, bufferToArray.length) + buffer.position(oldPosition) bufferToArray } } @@ -240,8 +252,10 @@ object Memory extends LowPriorityMemory { override def put(buffer: ShortBuffer, index: Int, value: Short): Unit = buffer.put(index, value) override def toArray(buffer: ShortBuffer): Array[Short] = { - val bufferToArray = Array.ofDim[Short](buffer.limit()) + val oldPosition = buffer.position() + val bufferToArray = Array.ofDim[Short](buffer.remaining()) buffer.get(bufferToArray, 0, bufferToArray.length) + buffer.position(oldPosition) bufferToArray } } diff --git a/OpenCL/src/main/scala/com/thoughtworks/compute/OpenCL.scala b/OpenCL/src/main/scala/com/thoughtworks/compute/OpenCL.scala index c46a8b4a..f76bac7b 100644 --- a/OpenCL/src/main/scala/com/thoughtworks/compute/OpenCL.scala +++ b/OpenCL/src/main/scala/com/thoughtworks/compute/OpenCL.scala @@ -308,7 +308,7 @@ object OpenCL { } - trait UseAllGPUDevice { + trait UseAllGpuDevice { protected val platformId: Long @@ -318,7 +318,7 @@ object OpenCL { } } - trait UseFirstGPUDevice { + trait UseFirstGpuDevice { protected val platformId: Long @@ -328,8 +328,18 @@ object OpenCL { Seq(allDeviceIds.head) } } + trait UseFirstCpuDevice { - trait UseAllCPUDevice { + protected val platformId: Long + + @transient + protected lazy val deviceIds: Seq[Long] = { + val allDeviceIds = deviceIdsByType(platformId, CL_DEVICE_TYPE_CPU) + Seq(allDeviceIds.head) + } + } + + trait UseAllCpuDevice { protected val platformId: Long @@ -435,15 +445,6 @@ object OpenCL { } object DeviceBuffer { - private[OpenCL] def delay[Owner <: Singleton with OpenCL, Element]( - handle: => Long): Do[DeviceBuffer[Owner, Element]] = { - val bufferContinuation = UnitContinuation.delay { - Resource(value = Success(DeviceBuffer[Owner, Element](handle)), release = UnitContinuation.delay { - checkErrorCode(clReleaseMemObject(handle)) - }) - } - Do(TryT(ResourceT(bufferContinuation))) - } implicit def bufferBox[Owner <: Singleton with OpenCL, Element]: Box.Aux[DeviceBuffer[Owner, Element], Pointer] = new Box[DeviceBuffer[Owner, Element]] { @@ -462,11 +463,18 @@ object OpenCL { * @param handle The underlying `cl_mem`. * @note comment out extends AnyVal in case of https://github.com/scala/bug/issues/10647 */ - final case class DeviceBuffer[Owner <: OpenCL with Singleton, Element](handle: Long) /* extends AnyVal */ { + final case class DeviceBuffer[Owner <: OpenCL with Singleton, Element](handle: Long) /* extends AnyVal */ + extends MonadicCloseable[UnitContinuation] { deviceBuffer => + + override def monadicClose: UnitContinuation[Unit] = UnitContinuation.delay { + checkErrorCode(clReleaseMemObject(handle)) + } + def slice(offset: Int, size: Int)(implicit memory: Memory[Element]): Do[DeviceBuffer[Owner, Element]] = { - DeviceBuffer.delay { + + Do.monadicCloseable { val stack = stackPush() try { val errorCode = stack.ints(0) @@ -478,7 +486,7 @@ object OpenCL { region.address(), memAddress(errorCode)) checkErrorCode(errorCode.get(0)) - newHandle + DeviceBuffer[Owner, Element](newHandle) } finally { stack.close() } @@ -585,6 +593,26 @@ object OpenCL { } + def functionName: String = { + val stack = stackPush() + + try { + + val functionNameSizePointer = stack.mallocPointer(1) + + checkErrorCode( + clGetKernelInfo(this.handle, CL_KERNEL_FUNCTION_NAME, null: PointerBuffer, functionNameSizePointer)) + val functionNameSize = functionNameSizePointer.get(0).toInt + val functionNameBuffer = stack.malloc(functionNameSize) + + checkErrorCode( + clGetKernelInfo(this.handle, CL_KERNEL_FUNCTION_NAME, functionNameBuffer, functionNameSizePointer)) + decodeString(functionNameBuffer) + } finally { + stack.close() + } + } + def enqueue(globalWorkSize: Long*)(implicit witnessOwner: Witness.Aux[Owner]): Do[Event[Owner]] = { witnessOwner.value.acquireCommandQueue.flatMap { commandQueue => Do.monadicCloseable { @@ -841,14 +869,14 @@ trait OpenCL extends MonadicCloseable[UnitContinuation] with ImplicitsSingleton /** Returns an uninitialized buffer of `Element` on device. */ def allocateBuffer[Element](size: Long)(implicit memory: Memory[Element]): Do[DeviceBuffer[Element]] = - DeviceBuffer.delay[this.type, Element] { + Do.monadicCloseable { val stack = stackPush() try { val errorCodeBuffer = stack.ints(CL_SUCCESS) val buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, memory.numberOfBytesPerElement * size, errorCodeBuffer) checkErrorCode(errorCodeBuffer.get(0)) - buffer + DeviceBuffer[this.type, Element](buffer) } finally { stack.pop() } @@ -858,7 +886,7 @@ trait OpenCL extends MonadicCloseable[UnitContinuation] with ImplicitsSingleton */ def allocateBufferFrom[Element, HostBuffer](hostBuffer: HostBuffer)( implicit memory: Memory.Aux[Element, HostBuffer]): Do[DeviceBuffer[Element]] = - DeviceBuffer.delay[this.type, Element] { + Do.monadicCloseable { val stack = stackPush() try { val errorCodeBuffer = stack.ints(CL_SUCCESS) @@ -868,7 +896,7 @@ trait OpenCL extends MonadicCloseable[UnitContinuation] with ImplicitsSingleton memory.address(hostBuffer), memAddress(errorCodeBuffer)) checkErrorCode(errorCodeBuffer.get(0)) - buffer + DeviceBuffer[this.type, Element](buffer) } finally { stack.pop() }