Skip to content

Commit

Permalink
#485 first part, don't parallelize setZero
Browse files Browse the repository at this point in the history
  • Loading branch information
mratsim committed Jan 3, 2021
1 parent d8d986d commit 69efb08
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
21 changes: 13 additions & 8 deletions src/arraymancer/laser/tensor/initialization.nim
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,19 @@ proc setZero*[T](t: var Tensor[T], check_contiguous: static bool = true) =
when not (T is KnownSupportsCopyMem):
t.storage.raw_buffer.reset()
t.storage.raw_buffer.setLen(t.size)
else:
omp_parallel_chunks(
t.size, chunk_offset, chunk_size,
OMP_MEMORY_BOUND_GRAIN_SIZE * 4):
zeroMem(
t.unsafe_raw_offset[chunk_offset].addr,
chunk_size * sizeof(T)
)
else: # if setZero or newTensor are used in OpenMP parallel regions
# making this parallel will kill performance.
# omp_parallel_chunks(
# t.size, chunk_offset, chunk_size,
# OMP_MEMORY_BOUND_GRAIN_SIZE * 4):
# zeroMem(
# t.unsafe_raw_offset[chunk_offset].addr,
# chunk_size * sizeof(T)
# )
zeroMem(
t.unsafe_raw_offset[0].addr,
t.size * sizeof(T)
)

proc newTensor*[T](shape: varargs[int]): Tensor[T] =
var size: int
Expand Down
1 change: 0 additions & 1 deletion src/arraymancer/nn_primitives/nnp_linear.nim
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ proc linear*[T](input, weight: Tensor[T], bias: Tensor[T], output: var Tensor[T]
# - bias tensor shape [1, out_features]
# Output does not need to be initialized to 0 or the proper shape, data will be overwritten
# Output is: Y = x * W.transpose + b

output = input * weight.transpose # TODO: with the transpose the non-matching rows and cols is confusing
output +.= bias

Expand Down

0 comments on commit 69efb08

Please sign in to comment.