Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow mini-batching of uncoalesced sparse matrices #9099

Merged
merged 4 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Changed

- Allow mini-batching of uncoalesced sparse matrices ([#9099](https://github.com/pyg-team/pytorch_geometric/pull/9099))
- Improvements to multi-node `ogbn-papers100m` default hyperparameters and adding evaluation on all ranks ([#8823](https://github.com/pyg-team/pytorch_geometric/pull/8823))
- Changed distributed sampler and loader tests to correctly report failures in subprocesses to `pytest` ([#8978](https://github.com/pyg-team/pytorch_geometric/pull/8978))
- Remove filtering of node/edge types in `trim_to_layer` functionality ([#9021](https://github.com/pyg-team/pytorch_geometric/pull/9021))
Expand Down
13 changes: 13 additions & 0 deletions test/loader/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,19 @@ def test_dataloader_tensor_frame():
assert batch.edge_index.max() >= 10


def test_dataloader_sparse():
adj_t = torch.sparse_coo_tensor(
indices=torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]),
values=torch.randn(4),
size=(3, 3),
)
data = Data(adj_t=adj_t)

loader = DataLoader([data, data], batch_size=2)
for batch in loader:
assert batch.adj_t.size() == (6, 6)


if __name__ == '__main__':
import argparse
import time
Expand Down
29 changes: 20 additions & 9 deletions torch_geometric/utils/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,48 +502,59 @@

indices, values = [], []
num_rows = num_cols = 0
is_coalesced = True

if dim == 0:
for i, tensor in enumerate(tensors):
if i == 0:
indices.append(tensor.indices())
indices.append(tensor._indices())
else:
offset = torch.tensor([[num_rows], [0]], device=tensor.device)
indices.append(tensor.indices() + offset)
values.append(tensor.values())
indices.append(tensor._indices() + offset)
values.append(tensor._values())
num_rows += tensor.size(0)
num_cols = max(num_cols, tensor.size(1))
if not tensor.is_coalesced():
is_coalesced = False

Check warning on line 518 in torch_geometric/utils/sparse.py

View check run for this annotation

Codecov / codecov/patch

torch_geometric/utils/sparse.py#L518

Added line #L518 was not covered by tests

elif dim == 1:
for i, tensor in enumerate(tensors):
if i == 0:
indices.append(tensor.indices())
indices.append(tensor._indices())
else:
offset = torch.tensor([[0], [num_cols]], device=tensor.device)
indices.append(tensor.indices() + offset)
values.append(tensor.values())
values.append(tensor._values())
num_rows = max(num_rows, tensor.size(0))
num_cols += tensor.size(1)
is_coalesced = False

else:
for i, tensor in enumerate(tensors):
if i == 0:
indices.append(tensor.indices())
indices.append(tensor._indices())
else:
offset = torch.tensor([[num_rows], [num_cols]],
device=tensor.device)
indices.append(tensor.indices() + offset)
values.append(tensor.values())
indices.append(tensor._indices() + offset)
values.append(tensor._values())
num_rows += tensor.size(0)
num_cols += tensor.size(1)
if not tensor.is_coalesced():
is_coalesced = False

return torch.sparse_coo_tensor(
out = torch.sparse_coo_tensor(
indices=torch.cat(indices, dim=-1),
values=torch.cat(values),
size=(num_rows, num_cols) + values[-1].size()[1:],
device=tensor.device,
)

if is_coalesced:
out = out._coalesced_(True)

return out


def cat_csr(tensors: List[Tensor], dim: Union[int, Tuple[int, int]]) -> Tensor:
assert dim in {0, 1, (0, 1)}
Expand Down
Loading