From aab5714becd08d6d464d3d1e7d2eba3e1a83c588 Mon Sep 17 00:00:00 2001 From: Narine Kokhlikyan Date: Mon, 5 Dec 2022 10:01:08 -0800 Subject: [PATCH 1/5] Fix failing GPU errors --- captum/influence/_utils/common.py | 4 ++-- .../influence/_core/test_tracin_self_influence.py | 5 ++--- tests/influence/_utils/common.py | 14 ++++++++------ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index e1a6e27f8f..e0ec7a3340 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -36,9 +36,9 @@ def _tensor_batch_dot(t1: Tensor, t2: Tensor) -> Tensor: ) assert torch.numel(t1) / t1.shape[0] == torch.numel(t2) / t2.shape[0], msg - return torch.mm( + return torch.einsum("id,jd -> ij", t1.view(t1.shape[0], -1), - t2.view(t2.shape[0], -1).T, + t2.view(t2.shape[0], -1), ) diff --git a/tests/influence/_core/test_tracin_self_influence.py b/tests/influence/_core/test_tracin_self_influence.py index 924f977cc8..8be1450466 100644 --- a/tests/influence/_core/test_tracin_self_influence.py +++ b/tests/influence/_core/test_tracin_self_influence.py @@ -31,7 +31,7 @@ class TestTracInSelfInfluence(BaseTest): "none", DataInfluenceConstructor(TracInCP, name="TracInCP_all_layers"), ), - ( + ( "none", DataInfluenceConstructor( TracInCP, @@ -68,7 +68,7 @@ class TestTracInSelfInfluence(BaseTest): DataInfluenceConstructor( TracInCPFast, "TracInCPFast_last_fc_layer" ), - ), + ), ]: if not ( "sample_wise_grads_per_batch" in constructor.kwargs @@ -95,7 +95,6 @@ def test_tracin_self_influence( False, use_gpu, ) - # compute tracin_scores of training data on training data criterion = nn.MSELoss(reduction=reduction) batch_size = 5 diff --git a/tests/influence/_utils/common.py b/tests/influence/_utils/common.py index dbfc0de550..6c364f4c09 100644 --- a/tests/influence/_utils/common.py +++ b/tests/influence/_utils/common.py @@ -190,35 +190,37 @@ def get_random_model_and_data( BasicLinearNet(in_features, hidden_nodes, out_features) if not unpack_inputs else MultLinearNet(in_features, hidden_nodes, out_features, num_inputs) - ) + ).double() num_checkpoints = 5 for i in range(num_checkpoints): - net.linear1.weight.data = torch.normal(3, 4, (hidden_nodes, in_features)) - net.linear2.weight.data = torch.normal(5, 6, (out_features, hidden_nodes)) + net.linear1.weight.data = torch.normal(3, 4, (hidden_nodes, in_features)).double() + net.linear2.weight.data = torch.normal(5, 6, (out_features, hidden_nodes)).double() if unpack_inputs: net.pre.weight.data = torch.normal( 3, 4, (in_features, in_features * num_inputs) ) + if hasattr(net, 'pre'): + net.pre.weight.data = net.pre.weight.data.double() checkpoint_name = "-".join(["checkpoint-reg", str(i + 1) + ".pt"]) net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) num_samples = 50 num_train = 32 - all_labels = torch.normal(1, 2, (num_samples, out_features)) + all_labels = torch.normal(1, 2, (num_samples, out_features)).double() train_labels = all_labels[:num_train] test_labels = all_labels[num_train:] if unpack_inputs: all_samples = [ - torch.normal(0, 1, (num_samples, in_features)) for _ in range(num_inputs) + torch.normal(0, 1, (num_samples, in_features)).double() for _ in range(num_inputs) ] train_samples = [ts[:num_train] for ts in all_samples] test_samples = [ts[num_train:] for ts in all_samples] else: - all_samples = torch.normal(0, 1, (num_samples, in_features)) + all_samples = torch.normal(0, 1, (num_samples, in_features)).double() train_samples = all_samples[:num_train] test_samples = all_samples[num_train:] From a8eca00f34dba7e928007b0ff484f631e11d471c Mon Sep 17 00:00:00 2001 From: Narine Kokhlikyan Date: Mon, 5 Dec 2022 11:47:09 -0800 Subject: [PATCH 2/5] Fix dtype and formatting Merge confilict resolved --- captum/influence/_core/tracincp_fast_rand_proj.py | 3 ++- captum/influence/_utils/common.py | 3 ++- tests/influence/_core/test_tracin_self_influence.py | 4 ++-- tests/influence/_utils/common.py | 13 +++++++++---- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py index dec58914f3..f4227463d1 100644 --- a/captum/influence/_core/tracincp_fast_rand_proj.py +++ b/captum/influence/_core/tracincp_fast_rand_proj.py @@ -1380,6 +1380,7 @@ def _set_projections_tracincp_fast_rand_proj( 1 ] # this is the dimension of the input of the last fully-connected layer device = batch_jacobians.device + dtype = batch_jacobians.dtype # choose projection if needed # without projection, the dimension of the intermediate quantities returned @@ -1410,7 +1411,7 @@ def _set_projections_tracincp_fast_rand_proj( projection_quantities = jacobian_projection.to( device - ), layer_input_projection.to(device) + ), layer_input_projection.to(device=device, dtype=dtype) return projection_quantities diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index e0ec7a3340..e9113d9142 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -36,7 +36,8 @@ def _tensor_batch_dot(t1: Tensor, t2: Tensor) -> Tensor: ) assert torch.numel(t1) / t1.shape[0] == torch.numel(t2) / t2.shape[0], msg - return torch.einsum("id,jd -> ij", + return torch.einsum( + "id,jd -> ij", t1.view(t1.shape[0], -1), t2.view(t2.shape[0], -1), ) diff --git a/tests/influence/_core/test_tracin_self_influence.py b/tests/influence/_core/test_tracin_self_influence.py index 8be1450466..d4e3587fb1 100644 --- a/tests/influence/_core/test_tracin_self_influence.py +++ b/tests/influence/_core/test_tracin_self_influence.py @@ -31,7 +31,7 @@ class TestTracInSelfInfluence(BaseTest): "none", DataInfluenceConstructor(TracInCP, name="TracInCP_all_layers"), ), - ( + ( "none", DataInfluenceConstructor( TracInCP, @@ -68,7 +68,7 @@ class TestTracInSelfInfluence(BaseTest): DataInfluenceConstructor( TracInCPFast, "TracInCPFast_last_fc_layer" ), - ), + ), ]: if not ( "sample_wise_grads_per_batch" in constructor.kwargs diff --git a/tests/influence/_utils/common.py b/tests/influence/_utils/common.py index 6c364f4c09..26a5146785 100644 --- a/tests/influence/_utils/common.py +++ b/tests/influence/_utils/common.py @@ -195,13 +195,17 @@ def get_random_model_and_data( num_checkpoints = 5 for i in range(num_checkpoints): - net.linear1.weight.data = torch.normal(3, 4, (hidden_nodes, in_features)).double() - net.linear2.weight.data = torch.normal(5, 6, (out_features, hidden_nodes)).double() + net.linear1.weight.data = torch.normal( + 3, 4, (hidden_nodes, in_features) + ).double() + net.linear2.weight.data = torch.normal( + 5, 6, (out_features, hidden_nodes) + ).double() if unpack_inputs: net.pre.weight.data = torch.normal( 3, 4, (in_features, in_features * num_inputs) ) - if hasattr(net, 'pre'): + if hasattr(net, "pre"): net.pre.weight.data = net.pre.weight.data.double() checkpoint_name = "-".join(["checkpoint-reg", str(i + 1) + ".pt"]) net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net @@ -215,7 +219,8 @@ def get_random_model_and_data( if unpack_inputs: all_samples = [ - torch.normal(0, 1, (num_samples, in_features)).double() for _ in range(num_inputs) + torch.normal(0, 1, (num_samples, in_features)).double() + for _ in range(num_inputs) ] train_samples = [ts[:num_train] for ts in all_samples] test_samples = [ts[num_train:] for ts in all_samples] From de2d9bc66e8ce7b17fa7dff9b6899d1bed8a5416 Mon Sep 17 00:00:00 2001 From: Narine Kokhlikyan Date: Mon, 5 Dec 2022 13:57:05 -0800 Subject: [PATCH 3/5] add dtype to jacobian_projection --- captum/influence/_core/tracincp_fast_rand_proj.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py index f4227463d1..5b3c309ec4 100644 --- a/captum/influence/_core/tracincp_fast_rand_proj.py +++ b/captum/influence/_core/tracincp_fast_rand_proj.py @@ -1410,7 +1410,7 @@ def _set_projections_tracincp_fast_rand_proj( ) projection_quantities = jacobian_projection.to( - device + device=device, dtype=dtype ), layer_input_projection.to(device=device, dtype=dtype) return projection_quantities From 7cc1066b5f6d61d8c938db26da59d9d8b436907b Mon Sep 17 00:00:00 2001 From: Narine Kokhlikyan Date: Sun, 11 Dec 2022 12:35:36 -0800 Subject: [PATCH 4/5] Remove einsum and bring back previous changes Merge with master --- captum/influence/_core/tracincp.py | 1 + captum/influence/_utils/common.py | 5 ++--- tests/influence/_core/test_tracin_self_influence.py | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py index 02c388378c..edea62bb16 100644 --- a/captum/influence/_core/tracincp.py +++ b/captum/influence/_core/tracincp.py @@ -1243,6 +1243,7 @@ def get_checkpoint_contribution(checkpoint): if _parse_version(torch.__version__) >= (1, 10, 0): calculate_fn = calculate_via_vector_norm + # sum the contribution over all layers. checkpoint_contribution.append( torch.sum( torch.stack( diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index e9113d9142..e1a6e27f8f 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -36,10 +36,9 @@ def _tensor_batch_dot(t1: Tensor, t2: Tensor) -> Tensor: ) assert torch.numel(t1) / t1.shape[0] == torch.numel(t2) / t2.shape[0], msg - return torch.einsum( - "id,jd -> ij", + return torch.mm( t1.view(t1.shape[0], -1), - t2.view(t2.shape[0], -1), + t2.view(t2.shape[0], -1).T, ) diff --git a/tests/influence/_core/test_tracin_self_influence.py b/tests/influence/_core/test_tracin_self_influence.py index d4e3587fb1..924f977cc8 100644 --- a/tests/influence/_core/test_tracin_self_influence.py +++ b/tests/influence/_core/test_tracin_self_influence.py @@ -95,6 +95,7 @@ def test_tracin_self_influence( False, use_gpu, ) + # compute tracin_scores of training data on training data criterion = nn.MSELoss(reduction=reduction) batch_size = 5 From c53834584a99c78ec85460f5fe3390f8f79851e6 Mon Sep 17 00:00:00 2001 From: Narine Kokhlikyan Date: Sun, 11 Dec 2022 13:36:25 -0800 Subject: [PATCH 5/5] Remove unnecessary comment --- captum/influence/_core/tracincp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py index edea62bb16..02c388378c 100644 --- a/captum/influence/_core/tracincp.py +++ b/captum/influence/_core/tracincp.py @@ -1243,7 +1243,6 @@ def get_checkpoint_contribution(checkpoint): if _parse_version(torch.__version__) >= (1, 10, 0): calculate_fn = calculate_via_vector_norm - # sum the contribution over all layers. checkpoint_contribution.append( torch.sum( torch.stack(