pyg-team · rusty1s · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- Allow mini-batching of uncoalesced sparse matrices ([#9099](https://github.com/pyg-team/pytorch_geometric/pull/9099))
 - Improvements to multi-node `ogbn-papers100m` default hyperparameters and adding evaluation on all ranks ([#8823](https://github.com/pyg-team/pytorch_geometric/pull/8823))
 - Changed distributed sampler and loader tests to correctly report failures in subprocesses to `pytest` ([#8978](https://github.com/pyg-team/pytorch_geometric/pull/8978))
 - Remove filtering of node/edge types in `trim_to_layer` functionality ([#9021](https://github.com/pyg-team/pytorch_geometric/pull/9021))

@@ -240,6 +240,19 @@ def test_dataloader_tensor_frame():
         assert batch.edge_index.max() >= 10
 
 
+def test_dataloader_sparse():
+    adj_t = torch.sparse_coo_tensor(
+        indices=torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]),
+        values=torch.randn(4),
+        size=(3, 3),
+    )
+    data = Data(adj_t=adj_t)
+
+    loader = DataLoader([data, data], batch_size=2)
+    for batch in loader:
+        assert batch.adj_t.size() == (6, 6)
+
+
 if __name__ == '__main__':
     import argparse
     import time

@@ -502,48 +502,59 @@
 
     indices, values = [], []
     num_rows = num_cols = 0
+    is_coalesced = True
 
     if dim == 0:
         for i, tensor in enumerate(tensors):
             if i == 0:
-                indices.append(tensor.indices())
+                indices.append(tensor._indices())
             else:
                 offset = torch.tensor([[num_rows], [0]], device=tensor.device)
-                indices.append(tensor.indices() + offset)
-            values.append(tensor.values())
+                indices.append(tensor._indices() + offset)
+            values.append(tensor._values())
             num_rows += tensor.size(0)
             num_cols = max(num_cols, tensor.size(1))
+            if not tensor.is_coalesced():
+                is_coalesced = False
 
     elif dim == 1:
         for i, tensor in enumerate(tensors):
             if i == 0:
-                indices.append(tensor.indices())
+                indices.append(tensor._indices())
             else:
                 offset = torch.tensor([[0], [num_cols]], device=tensor.device)
                 indices.append(tensor.indices() + offset)
-            values.append(tensor.values())
+            values.append(tensor._values())
             num_rows = max(num_rows, tensor.size(0))
             num_cols += tensor.size(1)
+            is_coalesced = False
 
     else:
         for i, tensor in enumerate(tensors):
             if i == 0:
-                indices.append(tensor.indices())
+                indices.append(tensor._indices())
             else:
                 offset = torch.tensor([[num_rows], [num_cols]],
                                       device=tensor.device)
-                indices.append(tensor.indices() + offset)
-            values.append(tensor.values())
+                indices.append(tensor._indices() + offset)
+            values.append(tensor._values())
             num_rows += tensor.size(0)
             num_cols += tensor.size(1)
+            if not tensor.is_coalesced():
+                is_coalesced = False
 
-    return torch.sparse_coo_tensor(
+    out = torch.sparse_coo_tensor(
         indices=torch.cat(indices, dim=-1),
         values=torch.cat(values),
         size=(num_rows, num_cols) + values[-1].size()[1:],
         device=tensor.device,
     )
 
+    if is_coalesced:
+        out = out._coalesced_(True)
+
+    return out
+
 
 def cat_csr(tensors: List[Tensor], dim: Union[int, Tuple[int, int]]) -> Tensor:
     assert dim in {0, 1, (0, 1)}