Skip to content
This repository was archived by the owner on Nov 15, 2022. It is now read-only.

Fixed cuda sync #183

Merged
merged 3 commits into from
Jun 18, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 32 additions & 29 deletions benchmarks/segmentation_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def register_benchmark(fn):
def relu__tensor_iter(self):
def _relu_tensor_iter():
for t in self.inputs:
torch.nn.functional.relu_(t)
res = torch.nn.functional.relu_(t)

return _relu_tensor_iter

Expand All @@ -31,7 +31,7 @@ def relu__tensor_pad(self):
tensor, _ = nestedtensor.nested_tensor(self.inputs).to_tensor_mask()

def _relu_tensor_pad():
torch.nn.functional.relu_(tensor)
res = torch.nn.functional.relu_(tensor)

return _relu_tensor_pad

Expand All @@ -40,15 +40,15 @@ def relu__nt(self):
nt = nestedtensor.nested_tensor(self.inputs)

def _relu_nt():
torch.nn.functional.relu_(nt)
res = torch.nn.functional.relu_(nt)

return _relu_nt

@register_benchmark
def relu_tensor_iter(self):
def _relu_tensor_iter():
for t in self.inputs:
torch.nn.functional.relu(t)
res = torch.nn.functional.relu(t)

return _relu_tensor_iter

Expand All @@ -57,7 +57,7 @@ def relu_tensor_pad(self):
tensor, _ = nestedtensor.nested_tensor(self.inputs).to_tensor_mask()

def _relu_tensor_pad():
torch.nn.functional.relu(tensor)
res = torch.nn.functional.relu(tensor)

return _relu_tensor_pad

Expand All @@ -66,7 +66,7 @@ def relu_nt(self):
nt = nestedtensor.nested_tensor(self.inputs)

def _relu_nt():
torch.nn.functional.relu(nt)
res = torch.nn.functional.relu(nt)

return _relu_nt

Expand All @@ -77,7 +77,7 @@ def _relu_nt():
def conv2d_iter(self, module):
def _conv2d_tensor_iter():
for t in self.inputs:
module(t.unsqueeze(0)).squeeze(0)
res = module(t.unsqueeze(0)).squeeze(0)

return _conv2d_tensor_iter

Expand All @@ -86,7 +86,7 @@ def conv2d_pad(self, module):
tensor, _ = nestedtensor.nested_tensor(self.inputs).to_tensor_mask()

def _conv2d_tensor():
module(tensor)
res = module(tensor)

return _conv2d_tensor

Expand All @@ -95,7 +95,7 @@ def conv2d_nt(self, module):
nt = nestedtensor.nested_tensor(self.inputs)

def _conv2d():
module(nt)
res = module(nt)

return _conv2d

Expand All @@ -106,7 +106,7 @@ def _conv2d():
def batch_norm_tensor_iter(self, module):
def _batch_norm_tensor_iter():
for t in self.inputs:
module(t.unsqueeze(0)).squeeze(0)
res = module(t.unsqueeze(0)).squeeze(0)

return _batch_norm_tensor_iter

Expand All @@ -115,7 +115,7 @@ def batch_norm_tensor_pad(self, module):
tensor, _ = nestedtensor.nested_tensor(self.inputs).to_tensor_mask()

def _batch_norm_tensor_pad():
module(tensor)
res = module(tensor)

return _batch_norm_tensor_pad

Expand All @@ -124,7 +124,7 @@ def batch_norm_nt(self, module):
nt = nestedtensor.nested_tensor(self.inputs)

def _batch_norm_nt():
module(nt)
res = module(nt)

return _batch_norm_nt

Expand All @@ -135,7 +135,7 @@ def _batch_norm_nt():
def max_pool2d_tensor_iter(self, module):
def _max_pool2d_tensor_iter():
for t in self.inputs:
module(t.unsqueeze(0)).squeeze(0)
res = module(t.unsqueeze(0)).squeeze(0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not accumulate in a list? I'd imagine the garbage collector could still get rid of this the next iteration around since it's not referenced anymore.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm. fair enough. let me try and see if behavior changes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't show any effect yet so removing these changes for now.


return _max_pool2d_tensor_iter

Expand All @@ -144,7 +144,7 @@ def max_pool2d_tensor_pad(self, module):
tensor, _ = nestedtensor.nested_tensor(self.inputs).to_tensor_mask()

def _max_pool2d_tensor_pad():
module(tensor)
res = module(tensor)

return _max_pool2d_tensor_pad

Expand All @@ -153,7 +153,7 @@ def max_pool2d_nt(self, module):
nt = nestedtensor.nested_tensor(self.inputs)

def _max_pool2d_nt():
module(nt)
res = module(nt)

return _max_pool2d_nt

Expand All @@ -164,7 +164,7 @@ def _max_pool2d_nt():
def cross_entropy_tensor_iter(self):
def _cross_entropy_tensor_iter():
for a, b in zip(self.inputs, self.targets):
torch.nn.functional.cross_entropy(
res = torch.nn.functional.cross_entropy(
a.unsqueeze(0), b.unsqueeze(0)
).squeeze(0)

Expand All @@ -176,7 +176,7 @@ def cross_entropy_tensor_pad(self):
targets, _ = nestedtensor.nested_tensor(self.targets).to_tensor_mask()

def _cross_entropy_tensor_pad():
torch.nn.functional.cross_entropy(tensor, targets)
res = torch.nn.functional.cross_entropy(tensor, targets)

return _cross_entropy_tensor_pad

Expand All @@ -186,7 +186,7 @@ def cross_entropy_nt(self):
nt_targets = nestedtensor.nested_tensor(self.targets)

def _cross_entropy_nt():
torch.nn.functional.cross_entropy(nt_input, nt_targets)
res = torch.nn.functional.cross_entropy(nt_input, nt_targets)

return _cross_entropy_nt

Expand All @@ -197,7 +197,7 @@ def _cross_entropy_nt():
def dropout_tensor_iter(self):
def _dropout_tensor_iter():
for t in self.inputs:
torch.nn.functional.dropout(t.unsqueeze(0)).squeeze(0)
res = torch.nn.functional.dropout(t.unsqueeze(0)).squeeze(0)

return _dropout_tensor_iter

Expand All @@ -206,7 +206,7 @@ def dropout_tensor_pad(self):
tensor, _ = nestedtensor.nested_tensor(self.inputs).to_tensor_mask()

def _dropout_tensor_pad():
torch.nn.functional.dropout(tensor)
res = torch.nn.functional.dropout(tensor)

return _dropout_tensor_pad

Expand All @@ -215,7 +215,7 @@ def dropout_nt(self):
nt = nestedtensor.nested_tensor(self.inputs)

def _dropout_nt():
torch.nn.functional.dropout(nt)
res = torch.nn.functional.dropout(nt)

return _dropout_nt

Expand All @@ -226,7 +226,7 @@ def _dropout_nt():
def interpolate_tensor_iter(self):
def _interpolate_tensor_iter():
for t in self.inputs:
torch.nn.functional.interpolate(t, t.unsqueeze(0).shape[-2])
res = torch.nn.functional.interpolate(t, t.unsqueeze(0).shape[-2])

return _interpolate_tensor_iter

Expand All @@ -235,7 +235,7 @@ def interpolate_tensor_pad(self):
tensor, _ = nestedtensor.nested_tensor(self.inputs).to_tensor_mask()

def _interpolate_tensor_pad():
torch.nn.functional.interpolate(tensor, tensor[0].unsqueeze(0).shape[-2])
res = torch.nn.functional.interpolate(tensor, tensor[0].unsqueeze(0).shape[-2])

return _interpolate_tensor_pad

Expand All @@ -244,7 +244,7 @@ def interpolate_nt(self):
nt = nestedtensor.nested_tensor(self.inputs)
input_shape = [y[-2:] for y in nt.nested_size().unbind()]
def _interpolate_nt():
torch.nn.functional.interpolate(nt, input_shape)
res = torch.nn.functional.interpolate(nt, input_shape)

return _interpolate_nt

Expand Down Expand Up @@ -311,7 +311,7 @@ def run(self):

benchmarks = [(layer, self.get_benchmark(c, layer, cuda)) for layer in self.args.layers]
for layer, benchmark in benchmarks:
result = utils.benchmark_fn(benchmark, run_time=self.args.run_time, warmup=self.args.warmup)
result = utils.benchmark_fn(benchmark, cuda=cuda, run_time=self.args.run_time, warmup=self.args.warmup, cuda=cuda)
result["#"] = str(i) + "/" + str(len(benchmarks) * len(params))
result["N"] = n
result["C"] = c
Expand All @@ -336,6 +336,9 @@ def run(self):
def get_input(self, cuda, n, c, h, w, h_var, w_var, seed):
inputs = []
targets = []
device = 'cpu'
if cuda:
device = 'cuda'

torch.manual_seed(seed)
random.seed(seed)
Expand All @@ -344,10 +347,10 @@ def get_input(self, cuda, n, c, h, w, h_var, w_var, seed):
for i in range(n):
h_res = max(1, int(random.gauss(h, h_var)))
w_res = max(1, int(random.gauss(w, w_var)))
input_i = torch.randn(c, h_res, w_res)
target_i = torch.randint(1, (h_res, w_res), dtype=torch.int64)
inputs.append(input_i.cuda() if cuda else input_i)
targets.append(target_i.cuda() if cuda else target_i)
input_i = torch.randn(c, h_res, w_res, device=device)
target_i = torch.randint(1, (h_res, w_res), dtype=torch.int64, device=device)
inputs.append(input_i)
targets.append(target_i)
if cuda:
# Synchronize copy operations so they don't influence the benchmark
torch.cuda.synchronize()
Expand Down