diff --git a/tensornetwork/block_tensor/block_tensor.py b/tensornetwork/block_tensor/block_tensor.py
index 31c8298e6..515e4cdd1 100644
--- a/tensornetwork/block_tensor/block_tensor.py
+++ b/tensornetwork/block_tensor/block_tensor.py
@@ -20,7 +20,7 @@
 from tensornetwork.network_components import Node, contract, contract_between
 from tensornetwork.backends import backend_factory
 # pylint: disable=line-too-long
-from tensornetwork.block_tensor.index import Index, fuse_index_pair, split_index, fuse_charges, fuse_degeneracies
+from tensornetwork.block_tensor.index import Index, fuse_index_pair, split_index, fuse_charge_pair, fuse_degeneracies, fuse_charges
 import numpy as np
 import itertools
 import time
@@ -34,11 +34,11 @@ def check_flows(flows) -> None:
         "flows = {} contains values different from 1 and -1".format(flows))
 
 
-def compute_num_nonzero(charges: List[np.ndarray],
-                        flows: List[Union[bool, int]]) -> int:
+def compute_fused_charge_degeneracies(charges: List[np.ndarray],
+                                      flows: List[Union[bool, int]]) -> Dict:
   """
-  Compute the number of non-zero elements, given the meta-data of 
-  a symmetric tensor.
+  For a list of charges, compute all possible fused charges resulting
+  from fusing `charges`, together with their respective degeneracyn
   Args:
     charges: List of np.ndarray of int, one for each leg of the 
       underlying tensor. Each np.ndarray `charges[leg]` 
@@ -49,40 +49,64 @@ def compute_num_nonzero(charges: List[np.ndarray],
       of the charges on each leg. `1` is inflowing, `-1` is outflowing
       charge.
   Returns:
-    int: The number of non-zero elements.
+    dict: Mapping fused charges (int) to degeneracies (int)
   """
   if len(charges) == 1:
-    return len(np.nonzero(charges == 0)[0])
-  #get unique charges and their degeneracies on each leg
-  charge_degeneracies = [
-      np.unique(charge, return_counts=True) for charge in charges
-  ]
-  accumulated_charges, accumulated_degeneracies = charge_degeneracies[0]
+    return np.unique(charges[0], return_counts=True)
+
+  # get unique charges and their degeneracies on the first leg.
+  # We are fusing from "left" to "right".
+  accumulated_charges, accumulated_degeneracies = np.unique(
+      charges[0], return_counts=True)
   #multiply the flow into the charges of first leg
   accumulated_charges *= flows[0]
-  for n in range(1, len(charge_degeneracies)):
+  for n in range(1, len(charges)):
     #list of unique charges and list of their degeneracies
     #on the next unfused leg of the tensor
-    leg_charge, leg_degeneracies = charge_degeneracies[n]
+    leg_charges, leg_degeneracies = np.unique(charges[n], return_counts=True)
 
     #fuse the unique charges
     #Note: entries in `fused_charges` are not unique anymore.
     #flow1 = 1 because the flow of leg 0 has already been
     #mulitplied above
-    fused_charges = fuse_charges(
-        q1=accumulated_charges, flow1=1, q2=leg_charge, flow2=flows[n])
+    fused_charges = fuse_charge_pair(
+        q1=accumulated_charges, flow1=1, q2=leg_charges, flow2=flows[n])
     #compute the degeneracies of `fused_charges` charges
+    #`fused_degeneracies` is a list of degeneracies such that
+    # `fused_degeneracies[n]` is the degeneracy of of
+    # charge `c = fused_charges[n]`.
     fused_degeneracies = fuse_degeneracies(accumulated_degeneracies,
                                            leg_degeneracies)
     #compute the new degeneracies resulting from fusing
-    #`accumulated_charges` and `leg_charge_2`
+    #`accumulated_charges` and `leg_charges_2`
     accumulated_charges = np.unique(fused_charges)
-    accumulated_degeneracies = []
+    accumulated_degeneracies = np.empty(
+        len(accumulated_charges), dtype=np.int64)
     for n in range(len(accumulated_charges)):
-      accumulated_degeneracies.append(
-          np.sum(fused_degeneracies[fused_charges == accumulated_charges[n]]))
+      accumulated_degeneracies[n] = np.sum(
+          fused_degeneracies[fused_charges == accumulated_charges[n]])
+  return accumulated_charges, accumulated_degeneracies
+
 
-    accumulated_degeneracies = np.asarray(accumulated_degeneracies)
+def compute_num_nonzero(charges: List[np.ndarray],
+                        flows: List[Union[bool, int]]) -> int:
+  """
+  Compute the number of non-zero elements, given the meta-data of 
+  a symmetric tensor.
+  Args:
+    charges: List of np.ndarray of int, one for each leg of the 
+      underlying tensor. Each np.ndarray `charges[leg]` 
+      is of shape `(D[leg],)`.
+      The bond dimension `D[leg]` can vary on each leg.
+    flows: A list of integers, one for each leg,
+      with values `1` or `-1`, denoting the flow direction
+      of the charges on each leg. `1` is inflowing, `-1` is outflowing
+      charge.
+  Returns:
+    int: The number of non-zero elements.
+  """
+  accumulated_charges, accumulated_degeneracies = compute_fused_charge_degeneracies(
+      charges, flows)
   if len(np.nonzero(accumulated_charges == 0)[0]) == 0:
     raise ValueError(
         "given leg-charges `charges` and flows `flows` are incompatible "
@@ -136,12 +160,14 @@ def compute_nonzero_block_shapes(charges: List[np.ndarray],
   return charge_shape_dict
 
 
-def retrieve_non_zero_diagonal_blocks(
+def retrieve_non_zero_diagonal_blocks_old_version(
     data: np.ndarray,
     charges: List[np.ndarray],
     flows: List[Union[bool, int]],
     return_data: Optional[bool] = True) -> Dict:
   """
+  Deprecated: this version is about 2 times slower (worst case) than the current used
+  implementation
   Given the meta data and underlying data of a symmetric matrix, compute 
   all diagonal blocks and return them in a dict.
   Args: 
@@ -235,6 +261,123 @@ def retrieve_non_zero_diagonal_blocks(
   return blocks
 
 
+def retrieve_non_zero_diagonal_blocks(
+    data: np.ndarray,
+    row_charges: List[Union[List, np.ndarray]],
+    column_charges: List[Union[List, np.ndarray]],
+    row_flows: List[Union[bool, int]],
+    column_flows: List[Union[bool, int]],
+    return_data: Optional[bool] = True) -> Dict:
+  """
+  Given the meta data and underlying data of a symmetric matrix, compute 
+  all diagonal blocks and return them in a dict.
+  `row_charges` and `column_charges` are lists of np.ndarray. The tensor
+  is viewed as a matrix with rows given by fusing `row_charges` and 
+  columns given by fusing `column_charges`. Note that `column_charges`
+  are never explicitly fused (`row_charges` are).
+  Args: 
+    data: An np.ndarray of the data. The number of elements in `data`
+      has to match the number of non-zero elements defined by `charges` 
+      and `flows`
+    row_charges: List of np.ndarray, one for each leg of the row-indices.
+      Each np.ndarray `row_charges[leg]` is of shape `(D[leg],)`.
+      The bond dimension `D[leg]` can vary on each leg.
+    column_charges: List of np.ndarray, one for each leg of the column-indices.
+      Each np.ndarray `row_charges[leg]` is of shape `(D[leg],)`.
+      The bond dimension `D[leg]` can vary on each leg.
+    row_flows: A list of integers, one for each entry in `row_charges`.
+      with values `1` or `-1`, denoting the flow direction
+      of the charges on each leg. `1` is inflowing, `-1` is outflowing
+      charge.
+    column_flows: A list of integers, one for each entry in `column_charges`.
+      with values `1` or `-1`, denoting the flow direction
+      of the charges on each leg. `1` is inflowing, `-1` is outflowing
+      charge.
+    return_data: If `True`, the return dictionary maps quantum numbers `q` to 
+      actual `np.ndarray` with the data. This involves a copy of data.
+      If `False`, the returned dict maps quantum numbers of a list 
+      [locations, shape], where `locations` is an np.ndarray of type np.int64
+      containing the locations of the tensor elements within A.data, i.e.
+      `A.data[locations]` contains the elements belonging to the tensor with 
+      quantum numbers `(q,q). `shape` is the shape of the corresponding array.
+
+  Returns:
+    dict: Dictionary mapping quantum numbers (integers) to either an np.ndarray 
+      or a python list of locations and shapes, depending on the value of `return_data`.
+  """
+  flows = row_flows.copy()
+  flows.extend(column_flows)
+  check_flows(flows)
+  if len(flows) != (len(row_charges) + len(column_charges)):
+    raise ValueError(
+        "`len(flows)` is different from `len(row_charges) + len(column_charges)`"
+    )
+
+  #since we are using row-major we have to fuse the row charges anyway.
+  fused_row_charges = fuse_charges(row_charges, row_flows)
+  #get the unique row-charges
+  unique_row_charges, row_dims = np.unique(
+      fused_row_charges, return_counts=True)
+
+  #get the unique column-charges
+  #we only care about their degeneracies, not their order; that's much faster
+  #to compute since we don't have to fuse all charges explicitly
+  unique_column_charges, column_dims = compute_fused_charge_degeneracies(
+      column_charges, column_flows)
+  #get the charges common to rows and columns (only those matter)
+  common_charges = np.intersect1d(
+      unique_row_charges, -unique_column_charges, assume_unique=True)
+
+  #convenience container for storing the degeneracies of each
+  #row and column charge
+  row_degeneracies = dict(zip(unique_row_charges, row_dims))
+  column_degeneracies = dict(zip(unique_column_charges, column_dims))
+
+  # we only care about charges common to row and columns
+  mask = np.isin(fused_row_charges, common_charges)
+  relevant_row_charges = fused_row_charges[mask]
+
+  #some numpy magic to get the index locations of the blocks
+  #we generate a vector of `len(relevant_row_charges) which,
+  #for each charge `c` in `relevant_row_charges` holds the
+  #column-degeneracy of charge `c`
+  degeneracy_vector = np.empty(len(relevant_row_charges), dtype=np.int64)
+  #for each charge `c` in `common_charges` we generate a boolean mask
+  #for indexing the positions where `relevant_column_charges` has a value of `c`.
+  masks = {}
+  for c in common_charges:
+    mask = relevant_row_charges == c
+    masks[c] = mask
+    degeneracy_vector[mask] = column_degeneracies[-c]
+
+  # the result of the cumulative sum is a vector containing
+  # the stop positions of the non-zero values of each row
+  # within the data vector.
+  # E.g. for `relevant_row_charges` = [0,1,0,0,3],  and
+  # column_degeneracies[0] = 10
+  # column_degeneracies[1] = 20
+  # column_degeneracies[3] = 30
+  # we have
+  # `stop_positions` = [10, 10+20, 10+20+10, 10+20+10+10, 10+20+10+10+30]
+  # The starting positions of consecutive elements (in row-major order) in
+  # each row with charge `c=0` within the data vector are then simply obtained using
+  # masks[0] = [True, False, True, True, False]
+  # and `stop_positions[masks[0]] - column_degeneracies[0]`
+  stop_positions = np.cumsum(degeneracy_vector)
+  blocks = {}
+
+  for c in common_charges:
+    #numpy broadcasting is substantially faster than kron!
+    a = np.expand_dims(stop_positions[masks[c]] - column_degeneracies[-c], 0)
+    b = np.expand_dims(np.arange(column_degeneracies[-c]), 1)
+    if not return_data:
+      blocks[c] = [a + b, (row_degeneracies[c], column_degeneracies[-c])]
+    else:
+      blocks[c] = np.reshape(data[a + b],
+                             (row_degeneracies[c], column_degeneracies[-c]))
+  return blocks
+
+
 def retrieve_non_zero_diagonal_blocks_column_major(
     data: np.ndarray,
     charges: List[np.ndarray],
@@ -438,6 +581,10 @@ def compute_mapping_table(charges: List[np.ndarray],
       with `N` the number of non-zero elements, and `r` 
       the rank of the tensor.
   """
+  # we are using row-major encoding, meaning that the last index
+  # is moving quickest when iterating through the linear data
+  # transposing is done taking, for each value of the indices i_0 to i_N-2
+  # the junk i_N-1 that gives non-zero
   tables = np.meshgrid([np.arange(c.shape[0]) for c in charges], indexing='ij')
   tables = tables[::-1]  #reverse the order
   raise NotImplementedError()
@@ -533,40 +680,23 @@ def init_random():
   def rank(self):
     return len(self.indices)
 
-  #TODO: we should consider to switch the names
-  #`BlockSparseTensor.sparse_shape` and `BlockSparseTensor.shape`,
-  #i.e. have `BlockSparseTensor.shape`return the sparse shape of the tensor.
-  #This may be more convenient for building tensor-type and backend
-  #agnostic code. For example, in MPS code we essentially never
-  #explicitly set a shape to a certain value (apart from initialization).
-  #That is, code like this
-  #```
-  #tensor = np.random.rand(10,10,10)
-  #```
-  #is never used. Rather one inquires shapes of tensors and
-  #multiplies them to get new shapes:
-  #```
-  #new_tensor = reshape(tensor, [tensor.shape[0]*tensor.shape[1], tensor.shape[2]])
-  #```
-  #Thduis the return type of `BlockSparseTensor.shape` is never inspected explicitly
-  #(apart from debugging).
   @property
-  def sparse_shape(self) -> Tuple:
+  def dense_shape(self) -> Tuple:
     """
-    The sparse shape of the tensor.
-    Returns a copy of self.indices. 
+    The dense shape of the tensor.
     Returns:
-      Tuple: A tuple of `Index` objects.
+      Tuple: A tuple of `int`.
     """
-
-    return tuple([i.copy() for i in self.indices])
+    return tuple([i.dimension for i in self.indices])
 
   @property
   def shape(self) -> Tuple:
     """
-    The dense shape of the tensor.
+    The sparse shape of the tensor.
+    Returns:
+      Tuple: A tuple of `Index` objects.
     """
-    return tuple([i.dimension for i in self.indices])
+    return tuple(self.indices)
 
   @property
   def dtype(self) -> Type[np.number]:
@@ -584,7 +714,6 @@ def transpose(self, order):
     """
     Transpose the tensor into the new order `order`
     """
-
     raise NotImplementedError('transpose is not implemented!!')
 
   def reset_shape(self) -> None:
@@ -640,8 +769,7 @@ def reshape(self, shape: Union[Iterable[Index], Iterable[int]]) -> None:
       else:
         dense_shape.append(s)
     # a few simple checks
-
-    if np.prod(dense_shape) != np.prod(self.shape):
+    if np.prod(dense_shape) != np.prod(self.dense_shape):
       raise ValueError("A tensor with {} elements cannot be "
                        "reshaped into a tensor with {} elements".format(
                            np.prod(self.shape), np.prod(dense_shape)))
@@ -665,23 +793,26 @@ def raise_error():
 
     self.reset_shape()  #bring tensor back into its elementary shape
     for n in range(len(dense_shape)):
-      if dense_shape[n] > self.shape[n]:
-        while dense_shape[n] > self.shape[n]:
+      if dense_shape[n] > self.dense_shape[n]:
+        while dense_shape[n] > self.dense_shape[n]:
           #fuse indices
           i1, i2 = self.indices.pop(n), self.indices.pop(n)
           #note: the resulting flow is set to one since the flow
           #is multiplied into the charges. As a result the tensor
           #will then be invariant in any case.
           self.indices.insert(n, fuse_index_pair(i1, i2))
-        if self.shape[n] > dense_shape[n]:
+        if self.dense_shape[n] > dense_shape[n]:
           raise_error()
-      elif dense_shape[n] < self.shape[n]:
+      elif dense_shape[n] < self.dense_shape[n]:
         raise_error()
 
   def get_diagonal_blocks(self, return_data: Optional[bool] = True) -> Dict:
     """
     Obtain the diagonal blocks of symmetric matrix.
     BlockSparseTensor has to be a matrix.
+    For matrices with shape[0] << shape[1], this routine avoids explicit fusion
+    of column charges.
+
     Args:
       return_data: If `True`, the return dictionary maps quantum numbers `q` to 
         actual `np.ndarray` with the data. This involves a copy of data.
@@ -698,7 +829,43 @@ def get_diagonal_blocks(self, return_data: Optional[bool] = True) -> Dict:
       raise ValueError(
           "`get_diagonal_blocks` can only be called on a matrix, but found rank={}"
           .format(self.rank))
+
+    row_indices = self.indices[0].get_elementary_indices()
+    column_indices = self.indices[1].get_elementary_indices()
+
     return retrieve_non_zero_diagonal_blocks(
+        data=self.data,
+        row_charges=[i.charges for i in row_indices],
+        column_charges=[i.charges for i in column_indices],
+        row_flows=[i.flow for i in row_indices],
+        column_flows=[i.flow for i in column_indices],
+        return_data=return_data)
+
+  def get_diagonal_blocks_old_version(
+      self, return_data: Optional[bool] = True) -> Dict:
+    """
+    Deprecated
+
+    Obtain the diagonal blocks of symmetric matrix.
+    BlockSparseTensor has to be a matrix.
+    Args:
+      return_data: If `True`, the return dictionary maps quantum numbers `q` to 
+        actual `np.ndarray` with the data. This involves a copy of data.
+        If `False`, the returned dict maps quantum numbers of a list 
+        [locations, shape], where `locations` is an np.ndarray of type np.int64
+        containing the locations of the tensor elements within A.data, i.e.
+        `A.data[locations]` contains the elements belonging to the tensor with 
+        quantum numbers `(q,q). `shape` is the shape of the corresponding array.
+    Returns:
+      dict: Dictionary mapping charge to np.ndarray of rank 2 (a matrix)
+    
+    """
+    if self.rank != 2:
+      raise ValueError(
+          "`get_diagonal_blocks` can only be called on a matrix, but found rank={}"
+          .format(self.rank))
+
+    return retrieve_non_zero_diagonal_blocks_old_version(
         data=self.data,
         charges=self.charges,
         flows=self.flows,
@@ -707,6 +874,8 @@ def get_diagonal_blocks(self, return_data: Optional[bool] = True) -> Dict:
   def get_diagonal_blocks_deprecated(
       self, return_data: Optional[bool] = True) -> Dict:
     """
+    Deprecated 
+
     Obtain the diagonal blocks of symmetric matrix.
     BlockSparseTensor has to be a matrix.
     Args:
diff --git a/tensornetwork/block_tensor/index.py b/tensornetwork/block_tensor/index.py
index 326311ec1..96e8ba2d6 100644
--- a/tensornetwork/block_tensor/index.py
+++ b/tensornetwork/block_tensor/index.py
@@ -42,6 +42,9 @@ def __init__(self,
     self.right_child = right_child
     self.name = name if name else 'index'
 
+  def __repr__(self):
+    return str(self.dimension)
+
   @property
   def is_leave(self):
     return (self.left_child is None) and (self.right_child is None)
@@ -113,22 +116,22 @@ def __mul__(self, index: "Index") -> "Index":
   def charges(self):
     if self.is_leave:
       return self._charges
-    fused_charges = fuse_charges(self.left_child.charges, self.left_child.flow,
-                                 self.right_child.charges,
-                                 self.right_child.flow)
+    fused_charges = fuse_charge_pair(
+        self.left_child.charges, self.left_child.flow, self.right_child.charges,
+        self.right_child.flow)
 
     return fused_charges
 
 
-def fuse_charges(q1: Union[List, np.ndarray], flow1: int,
-                 q2: Union[List, np.ndarray], flow2: int) -> np.ndarray:
+def fuse_charge_pair(q1: Union[List, np.ndarray], flow1: int,
+                     q2: Union[List, np.ndarray], flow2: int) -> np.ndarray:
   """
   Fuse charges `q1` with charges `q2` by simple addition (valid
   for U(1) charges). `q1` and `q2` typically belong to two consecutive
   legs of `BlockSparseTensor`.
   Given `q1 = [0,1,2]` and `q2 = [10,100]`, this returns
   `[10, 11, 12, 100, 101, 102]`.
-  When using column-major ordering of indices in `BlockSparseTensor`, 
+  When using row-major ordering of indices in `BlockSparseTensor`, 
   the position of q1 should be "to the left" of the position of q2.
   Args:
     q1: Iterable of integers
@@ -143,6 +146,27 @@ def fuse_charges(q1: Union[List, np.ndarray], flow1: int,
       len(q1) * len(q2))
 
 
+def fuse_charges(charges: List[Union[List, np.ndarray]],
+                 flows: List[int]) -> np.ndarray:
+  """
+  Fuse all `charges` by simple addition (valid
+  for U(1) charges). 
+  Args:
+    chargs: A list of charges to be fused.
+    flows: A list of flows, one for each element in `charges`.
+  Returns:
+    np.ndarray: The result of fusing `charges`.
+  """
+  if len(charges) == 1:
+    #nothing to do
+    return charges[0]
+  fused_charges = charges[0] * flows[0]
+  for n in range(1, len(charges)):
+    fused_charges = fuse_charge_pair(
+        q1=fused_charges, flow1=1, q2=charges[n], flow2=flows[n])
+  return fused_charges
+
+
 def fuse_degeneracies(degen1: Union[List, np.ndarray],
                       degen2: Union[List, np.ndarray]) -> np.ndarray:
   """
@@ -151,7 +175,7 @@ def fuse_degeneracies(degen1: Union[List, np.ndarray],
   consecutive legs of `BlockSparseTensor`.
   Given `q1 = [0,1,2]` and `q2 = [10,100]`, this returns
   `[10, 11, 12, 100, 101, 102]`.
-  When using column-major ordering of indices in `BlockSparseTensor`, 
+  When using row-major ordering of indices in `BlockSparseTensor`, 
   the position of q1 should be "to the left" of the position of q2.
   Args:
     q1: Iterable of integers
@@ -182,8 +206,6 @@ def fuse_index_pair(left_index: Index,
     raise ValueError(
         "index1 and index2 are the same object. Can only fuse distinct objects")
 
-  # fused_charges = fuse_charges(left_index.charges, left_index.flow,
-  #                              right_index.charges, right_index.flow)
   return Index(
       charges=None, flow=flow, left_child=left_index, right_child=right_index)
 
@@ -212,7 +234,7 @@ def split_index(index: Index) -> Tuple[Index, Index]:
   Returns:
     Tuple[Index, Index]: The result of splitting `index`.
   """
-  if (not index.left_child) or (not index.right_child):
+  if index.is_leave:
     raise ValueError("cannot split an elementary index")
 
   return index.left_child, index.right_child
diff --git a/tensornetwork/block_tensor/index_test.py b/tensornetwork/block_tensor/index_test.py
index ff331a36a..780034133 100644
--- a/tensornetwork/block_tensor/index_test.py
+++ b/tensornetwork/block_tensor/index_test.py
@@ -1,14 +1,14 @@
 import numpy as np
 # pylint: disable=line-too-long
-from tensornetwork.block_tensor.index import Index, fuse_index_pair, split_index, fuse_charges, fuse_degeneracies
+from tensornetwork.block_tensor.index import Index, fuse_index_pair, split_index, fuse_charges, fuse_degeneracies, fuse_charge_pair
 
 
-def test_fuse_charges():
+def test_fuse_charge_pair():
   q1 = np.asarray([0, 1])
   q2 = np.asarray([2, 3, 4])
-  fused_charges = fuse_charges(q1, 1, q2, 1)
+  fused_charges = fuse_charge_pair(q1, 1, q2, 1)
   assert np.all(fused_charges == np.asarray([2, 3, 3, 4, 4, 5]))
-  fused_charges = fuse_charges(q1, 1, q2, -1)
+  fused_charges = fuse_charge_pair(q1, 1, q2, -1)
   assert np.all(fused_charges == np.asarray([-2, -1, -3, -2, -4, -3]))
 
 
@@ -26,7 +26,7 @@ def test_index_fusion_mul():
   i12 = i1 * i2
   assert i12.left_child is i1
   assert i12.right_child is i2
-  assert np.all(i12.charges == fuse_charges(q1, 1, q2, 1))
+  assert np.all(i12.charges == fuse_charge_pair(q1, 1, q2, 1))
 
 
 def test_index_fusion():
@@ -43,4 +43,52 @@ def test_index_fusion():
   i12 = fuse_index_pair(i1, i2)
   assert i12.left_child is i1
   assert i12.right_child is i2
-  assert np.all(i12.charges == fuse_charges(q1, 1, q2, 1))
+  assert np.all(i12.charges == fuse_charge_pair(q1, 1, q2, 1))
+
+
+def test_elementary_indices():
+  D = 10
+  B = 4
+  dtype = np.int16
+  q1 = np.random.randint(-B // 2, B // 2 + 1, D).astype(dtype)
+  q2 = np.random.randint(-B // 2, B // 2 + 1, D).astype(dtype)
+  i1 = Index(charges=q1, flow=1, name='index1')
+  i2 = Index(charges=q2, flow=1, name='index2')
+  i3 = Index(charges=q1, flow=1, name='index3')
+  i4 = Index(charges=q2, flow=1, name='index4')
+
+  i12 = i1 * i2
+  i34 = i3 * i4
+  elmt12 = i12.get_elementary_indices()
+  assert elmt12[0] is i1
+  assert elmt12[1] is i2
+
+  i1234 = i12 * i34
+  elmt1234 = i1234.get_elementary_indices()
+  assert elmt1234[0] is i1
+  assert elmt1234[1] is i2
+  assert elmt1234[2] is i3
+  assert elmt1234[3] is i4
+
+
+def test_copy():
+  D = 10
+  B = 4
+  dtype = np.int16
+  q1 = np.random.randint(-B // 2, B // 2 + 1, D).astype(dtype)
+  q2 = np.random.randint(-B // 2, B // 2 + 1, D).astype(dtype)
+  i1 = Index(charges=q1, flow=1, name='index1')
+  i2 = Index(charges=q2, flow=1, name='index2')
+  i3 = Index(charges=q1, flow=-1, name='index3')
+  i4 = Index(charges=q2, flow=-1, name='index4')
+
+  i12 = i1 * i2
+  i34 = i3 * i4
+  i1234 = i12 * i34
+  i1234_copy = i1234.copy()
+
+  elmt1234 = i1234_copy.get_elementary_indices()
+  assert elmt1234[0] is not i1
+  assert elmt1234[1] is not i2
+  assert elmt1234[2] is not i3
+  assert elmt1234[3] is not i4