From 178f82dc481bf31961206412c22dd5519a245b49 Mon Sep 17 00:00:00 2001
From: Wuwei Lin <wuwei@apache.org>
Date: Tue, 27 Sep 2022 16:49:31 -0700
Subject: [PATCH] [TOPI] Implement Einsum with reduction axes (#12913)

* [TOPI] Implement Einsum with reduction axes

* address comments
---
 include/tvm/topi/einsum.h                    | 889 +------------------
 src/relay/op/tensor/math.cc                  |   2 +-
 src/topi/einsum.cc                           | 353 ++++++++
 src/topi/transform.cc                        |   4 -
 tests/python/topi/python/test_topi_einsum.py |  36 +-
 5 files changed, 397 insertions(+), 887 deletions(-)
 create mode 100644 src/topi/einsum.cc
diff --git a/include/tvm/topi/einsum.h b/include/tvm/topi/einsum.h
index a0c4039909ad..5e7813f8431b 100644
--- a/include/tvm/topi/einsum.h
+++ b/include/tvm/topi/einsum.h
@@ -49,568 +49,6 @@ namespace topi {
 using namespace tvm::te;
 using namespace topi::detail;
 
-/*!
- * \brief Compute the stride of the given shape.
- *
- * \param shape for the operation.
- *
- * \return the stride of the shape.
- */
-inline Array<PrimExpr> GetStride(const Array<PrimExpr> shape) {
-  size_t ndim = shape.size();
-  int prod = 1;
-  Array<PrimExpr> stride = Array<PrimExpr>(ndim, -1);
-  for (int i = ndim - 1; i >= 0; i--) {
-    stride.Set(i, if_then_else(shape[i] > 1, prod, 0));
-    prod = prod * GetConstInt(shape[i]);
-  }
-  return stride;
-}
-
-/*!
- * \brief Pad the shape with 1.
- *
- * \param shape the input shape to be padded
- * \param odim the padding size of the objective shape.
- *
- * \return the padded shape.
- */
-inline Array<PrimExpr> Pad(const Array<PrimExpr> shape, int odim) {
-  int ndim = shape.size();
-  CHECK_GE(odim, ndim);
-  Array<PrimExpr> ret(static_cast<size_t>(odim), 1);
-  for (int idim = 0; idim < ndim; ++idim) {
-    ret.Set(idim, shape[idim]);
-  }
-  return ret;
-}
-
-/*!
- * \brief Parse the subscripts for one operand into an output of 'ndim' labels.
- *
- * \param subscripts the subscripts for to be parsed.
- * \param length subscripts[0: length] represents the current operand.
- * \param ndim the ndim of current operand.
- * \param iop the index of the operand.
- * \param op_labels the parsing result.
- *        For Example:
- *           subscripts="abbcbc",  ndim=6 -> op_labels=[97, 98, -1, 99, -3, -2].
- *           subscripts="ab...bc", ndim=6 -> op_labels=[97, 98, 0, 0, -3, 99].
- * \param label_counts Count the number the label appears.
- * \param min_label Save the minimal label according to ASCII.
- * \param max_label Save the maximal label according to ASCII.
- *
- * \return 0.
- */
-inline int ParseOperandSubscripts(const char* subscripts, int length, int ndim, int iop,
-                                  char* op_labels, char* label_counts, int* min_label,
-                                  int* max_label) {
-  int i;
-  int idim = 0;
-  int ellipsis = -1;
-
-  /* Process all labels for this operand */
-  for (i = 0; i < length; ++i) {
-    int label = subscripts[i];
-
-    /* A proper label for an axis. */
-    if (label > 0 && isalpha(label)) {
-      /* Check we don't exceed the operator dimensions. */
-      CHECK(idim < ndim) << "einstein sum subscripts string contains "
-                         << "too many subscripts for operand " << iop;
-
-      op_labels[idim++] = label;
-      if (label < *min_label) {
-        *min_label = label;
-      }
-      if (label > *max_label) {
-        *max_label = label;
-      }
-      label_counts[label]++;
-    } else if (label == '.') {
-      /* The beginning of the ellipsis. */
-      /* Check it's a proper ellipsis. */
-      CHECK(
-          !(ellipsis != -1 || i + 2 >= length || subscripts[++i] != '.' || subscripts[++i] != '.'))
-          << "einstein sum subscripts string contains a "
-          << "'.' that is not part of an ellipsis ('...') "
-          << "in operand " << iop;
-
-      ellipsis = idim;
-    } else {
-      CHECK(label == ' ') << "invalid subscript '" << static_cast<char>(label)
-                          << "' in einstein sum "
-                          << "subscripts string, subscripts must "
-                          << "be letters";
-    }
-  }
-
-  /* No ellipsis found, labels must match dimensions exactly. */
-  if (ellipsis == -1) {
-    CHECK(idim == ndim) << "operand has more dimensions than subscripts "
-                        << "given in einstein sum, but no '...' ellipsis "
-                        << "provided to broadcast the extra dimensions.";
-  } else if (idim < ndim) {
-    /* Ellipsis found, may have to add broadcast dimensions. */
-    /* Move labels after ellipsis to the end. */
-    for (i = 0; i < idim - ellipsis; ++i) {
-      op_labels[ndim - i - 1] = op_labels[idim - i - 1];
-    }
-    /* Set all broadcast dimensions to zero. */
-    for (i = 0; i < ndim - idim; ++i) {
-      op_labels[ellipsis + i] = 0;
-    }
-  }
-
-  /*
-   * Find any labels duplicated for this operand, and turn them
-   * into negative offsets to the axis to merge with.
-   *
-   * In C, the char type may be signed or unsigned, but with
-   * twos complement arithmetic the char is ok either way here, and
-   * later where it matters the char is cast to a signed char.
-   */
-  for (idim = 0; idim < ndim - 1; ++idim) {
-    int label = op_labels[idim];
-    /* If it is a proper label, find any duplicates of it. */
-    if (label > 0) {
-      /* Search for the next matching label. */
-      char* next = reinterpret_cast<char*>(memchr(op_labels + idim + 1, label, ndim - idim - 1));
-
-      while (next != nullptr) {
-        /* The offset from next to op_labels[idim] (negative). */
-        *next = static_cast<char>((op_labels + idim) - next);
-        /* Search for the next matching label. */
-        next = reinterpret_cast<char*>(memchr(next + 1, label, op_labels + ndim - 1 - next));
-      }
-    }
-  }
-  return 0;
-}
-
-/*!
- * \brief Parse the subscripts for the output into an output that includes 'ndim_broadcast'
- *        unlabeled dimensions.
- *
- * \param subscripts the subscripts for to be parsed.
- * \param length subscripts[0: length] represents the output operand.
- * \param ndim_broadcast the broadcast dimension number.
- * \param label_counts Count the number the label appears.
- * \param out_labels similar to the op_labels in ParseOperandSubscripts, for each
- *        dimension, the ASCII code of the corresponding label. zero for the broadcasting dim.
- *
- * \return the total number of output dimensions or -1 if there is an error.
- */
-inline int ParseOutputSubscripts(const char* subscripts, int length, int ndim_broadcast,
-                                 const char* label_counts, char* out_labels) {
-  int i, bdim;
-  int ndim = 0;
-  int ellipsis = 0;
-
-  /* Process all the output labels. */
-  for (i = 0; i < length; ++i) {
-    int label = subscripts[i];
-
-    /* A proper label for an axis. */
-    if (label > 0 && isalpha(label)) {
-      /* Check that it doesn't occur again. */
-      CHECK(memchr(subscripts + i + 1, label, length - i - 1) == nullptr)
-          << "einstein sum subscripts string includes "
-          << "output subscript '" << static_cast<char>(label) << "' multiple times";
-
-      /* Check that it was used in the inputs. */
-      CHECK(label_counts[label] != 0)
-          << "einstein sum subscripts string included "
-          << "output subscript '" << static_cast<char>(label) << "' which never appeared "
-          << "in an input";
-
-      /* Check that there is room in out_labels for this label. */
-      CHECK(ndim < NPY_MAXDIMS) << "einstein sum subscripts string contains "
-                                << "too many subscripts in the output";
-
-      out_labels[ndim++] = label;
-    } else if (label == '.') {
-      /* The beginning of the ellipsis. */
-      /* Check it is a proper ellipsis. */
-      CHECK(!(ellipsis || i + 2 >= length || subscripts[++i] != '.' || subscripts[++i] != '.'))
-          << "einstein sum subscripts string "
-          << "contains a '.' that is not part of "
-          << "an ellipsis ('...') in the output";
-
-      /* Check there is room in out_labels for broadcast dims. */
-      CHECK(ndim + ndim_broadcast <= NPY_MAXDIMS) << "einstein sum subscripts string contains "
-                                                  << "too many subscripts in the output";
-
-      ellipsis = 1;
-      for (bdim = 0; bdim < ndim_broadcast; ++bdim) {
-        out_labels[ndim++] = 0;
-      }
-    } else {
-      CHECK(label == ' ') << "invalid subscript '" << static_cast<char>(label)
-                          << "' in einstein sum "
-                          << "subscripts string, subscripts must "
-                          << "be letters";
-    }
-  }
-
-  /* If no ellipsis was found there should be no broadcast dimensions. */
-  CHECK(!(!ellipsis && ndim_broadcast > 0)) << "output has more dimensions than subscripts "
-                                            << "given in einstein sum, but no '...' ellipsis "
-                                            << "provided to broadcast the extra dimensions.";
-
-  return ndim;
-}
-
-/*!
- * \brief If any dimensions are combined, create a view that combines them.
- *        Shows in newshape and newstride.
- *
- * \param op the operand tensor.
- * \param iop the index of the operand.
- * \param labels the op_labels fot the operand. Like [97, 98, -2] for "aba".
- * \param newshape The combined shape.
- * \param newstride The combined stride.
- *
- * For example:
- *  "aba -> ab",              shape = [2,3,2] stride = [6,2,1]
- *  op_labels = [97, 98, -2], newshape = [2,3], newstride = [7,2]
- */
-inline void GetCombinedDimsView(const Tensor& op, int iop, char* labels, Array<PrimExpr>* newshape,
-                                Array<PrimExpr>* newstride) {
-  int idim, ndim, icombine, combineoffset;
-  int icombinemap[NPY_MAXDIMS];
-  int newdim;
-
-  Array<PrimExpr> shape = op->shape;
-  Array<PrimExpr> stride = GetStride(shape);
-  ndim = op.ndim();
-  newdim = newshape->size();
-
-  /* Initialize the dimensions and strides to zero */
-  for (idim = 0; idim < newdim; ++idim) {
-    newshape->Set(idim, 0);
-    newstride->Set(idim, 0);
-  }
-
-  /* Copy the dimensions and strides, except when collapsing */
-  icombine = 0;
-  for (idim = 0; idim < ndim; ++idim) {
-    /*
-     * The char type may be either signed or unsigned, we
-     * need it to be signed here.
-     */
-    int label = (signed char)labels[idim];
-    /* If this label says to merge axes, get the actual label */
-    if (label < 0) {
-      combineoffset = label;
-      label = labels[idim + label];
-    } else {
-      combineoffset = 0;
-      if (icombine != idim) {
-        labels[icombine] = labels[idim];
-      }
-      icombinemap[idim] = icombine;
-    }
-    /* If the label is 0, it's an unlabeled broadcast dimension */
-    if (label == 0) {
-      newshape->Set(icombine, shape[idim]);
-      newstride->Set(icombine, stride[idim]);
-    } else {
-      /* Update the combined axis dimensions and strides */
-      int i = icombinemap[idim + combineoffset];
-      CHECK(!((combineoffset < 0) &&
-              GetConstInt((*newshape)[i] != 0 && (*newshape)[i] != shape[idim])))
-          << "dimensions in operand " << iop << " for collapsing index '" << label
-          << "' don't match (" << GetConstInt((*newshape)[i]) << " != " << shape[idim] << ")";
-      newshape->Set(i, shape[idim]);
-      newstride->Set(i, (*newstride)[i] + stride[idim]);
-    }
-
-    /* If the label didn't say to combine axes, increment dest i */
-    if (combineoffset == 0) {
-      icombine++;
-    }
-  }
-}
-
-/*!
- * \brief Prepare the operand axes to match each stride or shape pair.
- *
- * \param ndim the ndim of the operand tensor.
- * \param iop the index of the operand.
- * \param labels the op_labels fot the operand. [97, 98, -1, 99, -3, -2] for "abbcbc".
- * \param axes The matched axes to be calculated.
- * \param ndim_iter the dimension of iterating. Subscripts "ab, bc -> ac" ndim_iter = 3.
- * \param iter_labels output_labels with the iterating label. ['a', 'c', 'b'] for the case above.
- */
-inline static int PrepareOpAxes(int ndim, int iop, char* labels, int* axes, int ndim_iter,
-                                char* iter_labels) {
-  int i, label, ibroadcast;
-
-  ibroadcast = ndim - 1;
-  for (i = ndim_iter - 1; i >= 0; --i) {
-    label = iter_labels[i];
-    /*
-     * If it's an unlabeled broadcast dimension, choose
-     * the next broadcast dimension from the operand.
-     */
-    if (label == 0) {
-      while (ibroadcast >= 0 && labels[ibroadcast] != 0) {
-        --ibroadcast;
-      }
-      /*
-       * If we used up all the operand broadcast dimensions,
-       * extend it with a "newaxis"
-       */
-      if (ibroadcast < 0) {
-        axes[i] = -1;
-      } else {
-        /* Otherwise map to the broadcast axis */
-        axes[i] = ibroadcast;
-        --ibroadcast;
-      }
-    } else {
-      /* It's a labeled dimension, find the matching one */
-      char* match = reinterpret_cast<char*>(memchr(labels, label, ndim));
-      /* If the op doesn't have the label, broadcast it */
-      if (match == nullptr) {
-        axes[i] = -1;
-      } else {
-        /* Otherwise use it */
-        axes[i] = match - labels;
-      }
-    }
-  }
-  return 0;
-}
-
-/*!
- * \brief Count SubString.
- * \param str the object string
- * \param sub the pattern string
- *
- * \return number of substring
- */
-inline int CountSubstring(const std::string& str, const std::string& sub) {
-  int count = 0;
-  std::string::size_type pos = 0;
-  while ((pos = str.find(sub, pos)) != std::string::npos) {
-    ++count;
-    pos += sub.length();
-  }
-  return count;
-}
-
-/*!
- * \brief Transfer string to.
- * \param str input string.
- *
- * \return bitset.
- */
-inline std::bitset<LABELRANGE> Str2Set(const std::string& str) {
-  std::bitset<LABELRANGE> ret;
-  for (const char& c : str) {
-    ret.set(static_cast<int>(c));
-  }
-  return ret;
-}
-
-/*!
- * \brief Split str according to substring.
- * \param str input string.
- * \param sub the split pattern string.
- *
- * \return vector contains the splited substring.
- */
-inline std::vector<std::string> Split(const std::string& str, const std::string& sub) {
-  std::string::size_type pos = 0;
-  std::string::size_type start = 0;
-  std::vector<std::string> ret;
-  while ((pos = str.find(sub, start)) != std::string::npos) {
-    ret.push_back(str.substr(start, pos - start));
-    start = pos + sub.length();
-  }
-  ret.push_back(str.substr(start));
-  return ret;
-}
-
-/*!
- * \brief Parse the input subscripts into a vector of strings.
- * \param subscripts input subscripts.
- * \param operands operand tensors.
- *
- * \return vector of strings, vector[0] represents the input part, vector[1] represents the output.
- * if no output, the vector[1] is NULL.
- * "ab, bc -> ac" => ["ab,bc", "ac"]
- */
-inline std::tuple<std::string, std::string> ParseEinsumInput(
-    std::string subscripts, const std::vector<Array<PrimExpr>>& operands) {
-  const std::string einsum_symbols = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-  std::bitset<LABELRANGE> einsum_symbols_set;
-  for (const char& c : einsum_symbols) {
-    einsum_symbols_set.set(c);
-  }
-
-  CHECK_NE(operands.size(), 0U) << "No input operands";
-
-  auto end_pos = std::remove(subscripts.begin(), subscripts.end(), ' ');
-  subscripts.erase(end_pos, subscripts.end());
-
-  // Ensure all characters are valid
-  for (const char& c : subscripts) {
-    if (c == '.' || c == ',' || c == '-' || c == '>') {
-      continue;
-    }
-    CHECK(einsum_symbols_set.test(c)) << "Character " << c << " is not a valid symbol.";
-  }
-
-  // Check for proper "->"
-  if (subscripts.find('-') != std::string::npos || subscripts.find('>') != std::string::npos) {
-    bool invalid = (std::count(subscripts.begin(), subscripts.end(), '-') > 1 ||
-                    std::count(subscripts.begin(), subscripts.end(), '>') > 1);
-    CHECK(!invalid && CountSubstring(subscripts, "->") == 1)
-        << "Subscripts can only contain one '->'.";
-  }
-
-  // Parse ellipses
-  if (subscripts.find('.') != std::string::npos) {
-    std::string used = subscripts;
-    used.erase(
-        std::remove_if(used.begin(), used.end(),
-                       [](const char& c) { return c == '.' || c == ',' || c == '-' || c == '>'; }),
-        used.end());
-
-    std::bitset<LABELRANGE> used_set = Str2Set(used);
-    std::string ellipse_inds = "";
-    for (const char& c : einsum_symbols) {
-      if (!used_set.test(static_cast<int>(c))) {
-        ellipse_inds.append(1, c);
-      }
-    }
-    int longest = 0;
-    std::string input_tmp, output_sub;
-    std::vector<std::string> split_subscripts;
-    bool out_sub;
-
-    if (subscripts.find("->") != std::string::npos) {
-      std::vector<std::string> tmp = Split(subscripts, "->");
-      input_tmp = tmp[0];
-      output_sub = tmp[1];
-      split_subscripts = Split(input_tmp, ",");
-      out_sub = true;
-    } else {
-      split_subscripts = Split(subscripts, ",");
-      out_sub = false;
-    }
-
-    size_t size_split_subscripts = split_subscripts.size();
-    subscripts = "";
-    for (size_t i = 0; i < size_split_subscripts; ++i) {
-      const std::string& sub = split_subscripts[i];
-      if (sub.find('.') != std::string::npos) {
-        CHECK_EQ(std::count(sub.begin(), sub.end(), '.'), 3) << "Invalid Ellipses";
-        CHECK_EQ(CountSubstring(sub, "..."), 1) << "Invalid Ellipses";
-
-        // Take into account numerical values
-        int ellipse_count = 0;
-        if (operands[i].size() == 0) {
-          ellipse_count = 0;
-        } else {
-          ellipse_count = std::max(operands[i].size(), static_cast<size_t>(1));
-          ellipse_count -= sub.length() - 3;
-        }
-
-        if (ellipse_count > longest) {
-          longest = ellipse_count;
-        }
-
-        CHECK_GE(ellipse_count, 0) << "Ellipses lengths do not match.";
-        if (ellipse_count == 0) {
-          split_subscripts[i].erase(sub.find("..."), 3);
-        } else {
-          std::string rep_inds = ellipse_inds.substr(ellipse_inds.length() - ellipse_count);
-          split_subscripts[i].replace(sub.find("..."), 3, rep_inds);
-        }
-      }
-      subscripts += split_subscripts[i];
-      if (i + 1 < size_split_subscripts) {
-        subscripts += ",";
-      }
-    }
-    std::string out_ellipse;
-    if (longest == 0) {
-      out_ellipse = "";
-    } else {
-      out_ellipse = ellipse_inds.substr(ellipse_inds.length() - longest);
-    }
-
-    if (out_sub) {
-      output_sub.replace(output_sub.find("..."), 3, out_ellipse);
-      subscripts += "->" + output_sub;
-    } else {
-      // Special care for outputless ellipses
-      std::bitset<LABELRANGE> out_ellipse_set = Str2Set(out_ellipse);
-      std::string tmp_subscripts = subscripts, output_subscript = "";
-      size_t len_tmp_subscripts = tmp_subscripts.length();
-      std::sort(tmp_subscripts.begin(), tmp_subscripts.end());
-      for (size_t i = 0; i < len_tmp_subscripts; ++i) {
-        const char& c = tmp_subscripts[i];
-        if (c == ',') {
-          continue;
-        }
-        CHECK(einsum_symbols_set.test(c)) << "Character " << c << " is not a valid symbol.";
-        if ((i == 0 || tmp_subscripts[i - 1] != c) &&
-            (i == len_tmp_subscripts - 1 || tmp_subscripts[i + 1] != c) &&
-            !out_ellipse_set.test(c)) {
-          output_subscript.append(1, c);
-        }
-      }
-      subscripts += "->" + out_ellipse + output_subscript;
-    }
-  }
-
-  // Build output string if does not exist
-  std::tuple<std::string, std::string> ret;
-  if (subscripts.find("->") != std::string::npos) {
-    std::vector<std::string> tmp(2);
-    tmp = Split(subscripts, "->");
-    ret = std::make_tuple(tmp[0], tmp[1]);
-  } else {
-    std::string first = subscripts;
-    std::string second = "";
-    // Build output subscripts
-    std::string tmp_subscripts = subscripts;
-    size_t len_tmp_subscripts = tmp_subscripts.length();
-    std::sort(tmp_subscripts.begin(), tmp_subscripts.end());
-    for (size_t i = 0; i < len_tmp_subscripts; ++i) {
-      const char& c = tmp_subscripts[i];
-      if (c == ',') {
-        continue;
-      }
-      CHECK(einsum_symbols_set.test(c)) << "Character " << c << " is not a valid symbol.";
-      if ((i == 0 || tmp_subscripts[i - 1] != c) &&
-          (i == len_tmp_subscripts - 1 || tmp_subscripts[i + 1] != c)) {
-        second.append(1, c);
-      }
-    }
-    ret = std::make_tuple(first, second);
-  }
-
-  // Make sure output subscripts are in the input
-  std::bitset<LABELRANGE> input_subscripts_set = Str2Set(std::get<0>(ret));
-  for (const char& c : std::get<1>(ret)) {
-    CHECK(input_subscripts_set.test(c))
-        << "Output character " << c << " did not appear in the input";
-  }
-
-  // Make sure number operands is equivalent to the number of terms
-  CHECK_EQ(std::count(std::get<0>(ret).begin(), std::get<0>(ret).end(), ',') + 1, operands.size())
-      << "Number of einsum subscripts must be equal to the "
-      << "number of operands.";
-
-  return ret;
-}
-
 /*!
  * \brief Compute the shape of the output.
  * \param subscripts input subscripts.
@@ -618,54 +56,8 @@ inline std::tuple<std::string, std::string> ParseEinsumInput(
  *
  * \return the shape of the output.
  */
-inline Array<PrimExpr> NumpyEinsumShape(const std::string subscripts,
-                                        const std::vector<Array<PrimExpr>>& operands) {
-  // Parsing
-  std::tuple<std::string, std::string> parsed_subscripts = ParseEinsumInput(subscripts, operands);
-
-  // Build a few useful list and sets
-  std::vector<std::string> input_list = Split(std::get<0>(parsed_subscripts), ",");
-  size_t isize = input_list.size();
-
-  // Get length of each unique dimension and ensure all dimensions are correct
-  int dimension_dict[LABELRANGE];
-  memset(dimension_dict, -1, sizeof(dimension_dict));
-  for (size_t i = 0; i < isize; ++i) {
-    const std::string& term = input_list[i];
-    const Array<PrimExpr>& sh = operands[i];
-    CHECK_EQ(sh.size(), term.length())
-        << "Einstein sum subscript " << input_list[i] << " does not contain the "
-        << "correct number of indices for operand " << i << ".";
-    size_t len_term = term.length();
-    for (size_t j = 0; j < len_term; ++j) {
-      int64_t dim = GetConstInt(sh[j]);
-      const char& c = term[j];
-
-      if (dimension_dict[static_cast<int>(c)] != -1) {
-        // For broadcasting cases we always want the largest dim size
-        if (dimension_dict[static_cast<int>(c)] == 1) {
-          dimension_dict[static_cast<int>(c)] = dim;
-        }
-        CHECK(dim == 1 || dim == dimension_dict[static_cast<int>(c)])
-            << "Size of label '" << c << "' for operand  " << i << " ("
-            << dimension_dict[static_cast<int>(c)] << ") does not match previous terms (" << dim
-            << ").";
-      } else {
-        dimension_dict[static_cast<int>(c)] = dim;
-      }
-    }
-  }
-
-  // Get oshape
-  const std::string& output_str = std::get<1>(parsed_subscripts);
-  size_t odim = output_str.size();
-  Array<PrimExpr> oshape(odim, -1);
-  for (size_t i = 0; i < odim; ++i) {
-    oshape.Set(i, dimension_dict[static_cast<int>(output_str[i])]);
-  }
-  // Neglecting oshape assign check temporally
-  return oshape;
-}
+Array<PrimExpr> InferEinsumShape(const std::string& subscripts,
+                                 const std::vector<Array<PrimExpr>>& operands);
 
 /*!
  * \brief Evaluates the Einstein summation convention on the operands.
@@ -678,265 +70,26 @@ inline Array<PrimExpr> NumpyEinsumShape(const std::string subscripts,
  *
  * \return The calculation based on the Einstein summation convention.
  */
-inline Tensor einsum(const std::string& subscripts_str, const Array<Tensor> inputs,
-                     std::string name = "T_einsum", std::string tag = kEinsum) {
-  bool back = false;
-  const char* subscripts = subscripts_str.data();
-  const char* head = subscripts;
-  const int nop = inputs.size();
-
-  /* Step 1: Parse the subscripts string into label_counts and op_labels */
-  int iop, idim, min_label = LABELRANGE - 1, max_label = 0;
-  char label_counts[LABELRANGE], op_labels[NPY_MAXARGS][NPY_MAXDIMS];
-  memset(label_counts, 0, sizeof(label_counts));
-  for (iop = 0; iop < nop; ++iop) {
-    int length = static_cast<int>(strcspn(subscripts, ",-"));
-
-    CHECK(!(iop == nop - 1 && subscripts[length] == ','))
-        << "more operands provided to einstein sum function "
-        << "than specified in the subscripts string";
-    CHECK(!(iop < nop - 1 && subscripts[length] != ','))
-        << "fewer operands provided to einstein sum function "
-        << "than specified in the subscripts string";
-    CHECK_EQ(ParseOperandSubscripts(subscripts, length, inputs[iop + back].ndim(), iop,
-                                    op_labels[iop], label_counts, &min_label, &max_label),
-             0);
-
-    /* Move subscripts to the start of the labels for the next op */
-    subscripts += length;
-
-    if (iop < nop - 1) {
-      CHECK_LT(subscripts - head, subscripts_str.length()) << "subscripts out of range";
-      subscripts++;
-    }
-  }
-  /*
-   * Find the number of broadcast dimensions, which is the maximum
-   * number of labels == 0 in an op_labels array.
+Tensor einsum(const std::string& subscripts_str, const Array<Tensor> inputs,
+              std::string name = "T_einsum", std::string tag = kEinsum);
+
+struct EinsumEquation {
+  /*!
+   * \brief Create EinsumEquation from a string.
+   * The result will be converted to the explicit mode of Einsum if it is in implicit mode.
+   * \return The created EinsumEquation.
    */
-  int ndim_broadcast = 0;
-  for (iop = 0; iop < nop; ++iop) {
-    int count_zeros = 0;
-    int ndim;
-    char* labels = op_labels[iop];
-
-    ndim = inputs[iop + back].ndim();
-    for (idim = 0; idim < ndim; ++idim) {
-      if (labels[idim] == 0) {
-        ++count_zeros;
-      }
-    }
-
-    if (count_zeros > ndim_broadcast) {
-      ndim_broadcast = count_zeros;
-    }
-  }
-
-  /*
-   * If there is no output signature, fill output_labels and ndim_output
-   * using each label that appeared once, in alphabetical order.
-   */
-  int label, ndim_output;
-  char output_labels[NPY_MAXDIMS];
-  if (subscripts[0] == '\0') {
-    /* If no output was specified, always broadcast left, as usual. */
-    for (ndim_output = 0; ndim_output < ndim_broadcast; ++ndim_output) {
-      output_labels[ndim_output] = 0;
-    }
-    for (label = min_label; label <= max_label; ++label) {
-      if (label_counts[label] == 1) {
-        CHECK(ndim_output < NPY_MAXDIMS) << "einstein sum subscript string has too many "
-                                         << "distinct labels";
-        output_labels[ndim_output++] = label;
-      }
-    }
-  } else {
-    CHECK(subscripts[0] == '-' && subscripts[1] == '>') << "einstein sum subscript string does not "
-                                                        << "contain proper '->' output specified";
-    subscripts += 2;
-
-    /* Parse the output subscript string. */
-    ndim_output = ParseOutputSubscripts(subscripts, strlen(subscripts), ndim_broadcast,
-                                        label_counts, output_labels);
-    CHECK_GE(ndim_output, 0);
-  }
-
-  /*
-   * Step 2:
-   * Process all the input ops, combining dimensions into their
-   * diagonal where specified.
-   */
-  std::vector<Array<PrimExpr>> opshape(nop), opstride_true(nop);
-  for (iop = 0; iop < nop; ++iop) {
-    char* labels = op_labels[iop];
-    int combine, ndim;
-
-    ndim = inputs[iop + back].ndim();
-
-    /*
-     * Check whether any dimensions need to be combined
-     *
-     * The char type may be either signed or unsigned, we
-     * need it to be signed here.
-     */
-    combine = 0;
-    for (idim = 0; idim < ndim; ++idim) {
-      if ((signed char)labels[idim] < 0) {
-        combine++;
-      }
-    }
-    /* If any dimensions are combined, create a view which combines them */
-    if (combine) {
-      Array<PrimExpr> tshape(static_cast<size_t>(ndim - combine), -1);
-      Array<PrimExpr> tstride(static_cast<size_t>(ndim - combine), -1);
-      GetCombinedDimsView(inputs[iop + back], iop, labels, &tshape, &tstride);
-      opshape[iop] = tshape;
-      opstride_true[iop] = tstride;
-    } else {
-      /* No combining needed */
-      opshape[iop] = inputs[iop + back]->shape;
-      opstride_true[iop] = GetStride(opshape[iop]);
-    }
-  }
-  /*
-   * Step 3:
-   * Set up the labels for the iterator (output + combined labels).
-   * Can just share the output_labels memory, because iter_labels
-   * is output_labels with some more labels appended.
-   */
-  char* iter_labels = output_labels;
-  int ndim_iter = ndim_output;
-  for (label = min_label; label <= max_label; ++label) {
-    if (label_counts[label] > 0 && memchr(output_labels, label, ndim_output) == nullptr) {
-      CHECK(ndim_iter < NPY_MAXDIMS) << "too many subscripts in einsum";
-      iter_labels[ndim_iter++] = label;
-    }
-  }
-  /* Step 4: Set up the op_axes for the iterator */
-  Array<PrimExpr> itershape(static_cast<size_t>(ndim_iter), -1);
-  std::vector<Array<PrimExpr>> iterstride(nop + 1,
-                                          Array<PrimExpr>(static_cast<size_t>(ndim_iter), 0));
-
-  // output_shape
-  std::vector<Array<PrimExpr>> operands;
-  for (size_t i = 0; i < inputs.size(); i++) {
-    operands.push_back(inputs[i]->shape);
-  }
-  Array<PrimExpr> oshape = NumpyEinsumShape(subscripts_str, operands);
-  Array<PrimExpr> ostride_true = GetStride(oshape);
-  Array<PrimExpr> reduceshape;
-  std::vector<Array<PrimExpr>> remainshape(nop);
-  int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
-  int* op_axes[NPY_MAXARGS];
-  for (iop = 0; iop < nop; ++iop) {
-    op_axes[iop] = op_axes_arrays[iop];
-    CHECK_GE(PrepareOpAxes(opshape[iop].size(), iop, op_labels[iop], op_axes[iop], ndim_iter,
-                           iter_labels),
-             0);
-    for (idim = 0; idim < ndim_iter; idim++) {
-      if (op_axes[iop][idim] != -1) {
-        iterstride[iop].Set(idim, opstride_true[iop][op_axes[iop][idim]]);
-        if (GetConstInt(itershape[idim]) != -1) {
-          if (GetConstInt(itershape[idim]) == 1) {
-            itershape.Set(idim, opshape[iop][op_axes[iop][idim]]);
-          }
-        } else {
-          itershape.Set(idim, opshape[iop][op_axes[iop][idim]]);
-        }
-      }
-    }
-  }
-  for (idim = 0; idim < ndim_output; ++idim) {
-    iterstride[nop].Set(idim, ostride_true[idim]);
-  }
-  reduceshape = Array<PrimExpr>(static_cast<size_t>(ndim_iter - ndim_output), 0);
-  for (idim = ndim_output; idim < ndim_iter; ++idim) {
-    reduceshape.Set(idim - ndim_output, itershape[idim]);
-  }
-  for (iop = 0; iop < nop; iop++) {
-    Array<Integer> rsh;
-    for (idim = 0; idim < ndim_iter; idim++) {
-      if (op_axes_arrays[iop][idim] == -1) {
-        rsh.push_back(GetConstInt(itershape[idim]));
-      } else {
-        if (GetConstInt(itershape[idim] != opshape[iop][op_axes_arrays[iop][idim]])) {
-          rsh.push_back(GetConstInt(itershape[idim]));
-        }
-      }
-    }
-    remainshape[iop] = Array<PrimExpr>(rsh.begin(), rsh.end());
-  }
-  // exclude the 0-dim case
-  if (ndim_iter == 0) {
-    ndim_iter = 1;
-  }
-  itershape = Pad(itershape, ndim_iter);
-  for (iop = 0; iop <= nop; ++iop) {
-    iterstride[iop] = Pad(iterstride[iop], ndim_iter);
-  }
-  // oshape = Pad(oshape, ndim_iter);
-  reduceshape = Pad(reduceshape, ndim_iter);
-  for (iop = 0; iop < nop; ++iop) {
-    opshape[iop] = Pad(opshape[iop], ndim_iter);
-    remainshape[iop] = Pad(remainshape[iop], ndim_iter);
-  }
-  // ostride and rstride
-  Array<Array<PrimExpr>> ostride;
-  Array<Array<PrimExpr>> rstride;
-
-  for (iop = 0; iop < nop; ++iop) {
-    Array<PrimExpr> otmp(static_cast<size_t>(ndim_iter), 0);
-    Array<PrimExpr> rtmp(static_cast<size_t>(ndim_iter), 0);
-    for (idim = 0; idim < ndim_iter; ++idim) {
-      otmp.Set(idim, idim < ndim_output ? iterstride[iop][idim] : 1);
-      rtmp.Set(idim, idim < ndim_iter - ndim_output ? iterstride[iop][idim + ndim_output] : 1);
-    }
-    ostride.push_back(otmp);
-    rstride.push_back(rtmp);
-  }
-
-  // func: input indices => return cooresponding value
-  auto func = [inputs, oshape, ostride, reduceshape, ndim_iter, rstride,
-               nop](const Array<Var>& input_indices) -> PrimExpr {
-    for (int rdim = 0; rdim < ndim_iter; ++rdim) {
-      if (GetConstInt(reduceshape[rdim]) == 0) {
-        return 0;  //
-      }
-    }
-    Array<PrimExpr> ridx = UnravelIndex(0, reduceshape);
-
-    PrimExpr sum = 0;
-    bool rec_flag = false;
-    do {
-      PrimExpr tmp = 1;
-      for (int iop = 0; iop < nop; ++iop) {
-        if (iop != -1) {
-          PrimExpr k = 0;
-
-          for (size_t i = 0; i < input_indices.size(); ++i) {
-            k += input_indices[i] * ostride[iop][i];
-          }
-          for (size_t i = 0; i < ridx.size(); ++i) {
-            k += ridx[i] * rstride[iop][i];
-          }
-          Array<PrimExpr> temp_indices = UnravelIndex(k, inputs[iop]->shape);
-          tmp = tmp * inputs[iop](temp_indices);
-        }
-      }
-      sum += tmp;
-      ridx.Set(ridx.size() - 1, ridx[ridx.size() - 1] + 1);
-      for (int i = static_cast<int>(ridx.size() - 1);
-           (i > 0) && GetConstInt(ridx[i] >= reduceshape[i]); --i) {
-        ridx.Set(i, ridx[i] - reduceshape[i]);
-        ridx.Set(i - 1, ridx[i - 1] + 1);
-      }
-      rec_flag = GetConstInt(ridx[0] < reduceshape[0]);
-    } while (rec_flag);
-    return sum;
-  };
-
-  return compute(oshape, func, name, tag);
-}
+  static EinsumEquation FromString(const std::string& equation);
+  using Label = char;
+  using Subscript = std::vector<Label>;
+  // Special label value for ellipsis. The value is chosen to be less than any other letters so make
+  // sorting easier.
+  static constexpr Label kEllipsis = '\0';
+  // The input subscripts for each operand of the Einsum operator.
+  std::vector<Subscript> inputs;
+  // The output subscript of the Einsum equation.
+  Subscript output;
+};
 
 }  // namespace topi
 }  // namespace tvm
diff --git a/src/relay/op/tensor/math.cc b/src/relay/op/tensor/math.cc
index 246fba62cc66..6d1dabb497e0 100644
--- a/src/relay/op/tensor/math.cc
+++ b/src/relay/op/tensor/math.cc
@@ -77,7 +77,7 @@ bool EinsumRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
   }
 
   // Calculate output shape
-  Array<IndexExpr> oshape = topi::NumpyEinsumShape(param->equation, input_shapes);
+  Array<IndexExpr> oshape = topi::InferEinsumShape(param->equation, input_shapes);
 
   auto rtype = TensorType(oshape, dtype);
   reporter->Assign(types[1], rtype);
diff --git a/src/topi/einsum.cc b/src/topi/einsum.cc
new file mode 100644
index 000000000000..892a17e58d7f
--- /dev/null
+++ b/src/topi/einsum.cc
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file topi/einsum.cc
+ * \brief Einstein summation op
+ */
+#include <tvm/topi/broadcast.h>
+#include <tvm/topi/einsum.h>
+
+namespace tvm {
+namespace topi {
+
+EinsumEquation EinsumEquation::FromString(const std::string& equation) {
+  EinsumEquation result;
+  Subscript current;
+  bool has_arrow = false;
+  bool has_ellipsis = false;
+
+  for (int i = 0, n = equation.size(); i < n; ++i) {
+    switch (equation[i]) {
+      case ' ':
+        // Ignore spaces
+        break;
+      case '-':
+        // Arrow
+        CHECK(!has_arrow) << "Equation can only have one arrow";
+        CHECK(i + 1 < n && equation[i + 1] == '>')
+            << "Cannot parse the Einsum equation: invalid arrow";
+        i++;
+        has_arrow = true;
+        [[fallthrough]];
+      case ',':
+        // Delimiter between inputs, push current and start a new one
+        result.inputs.emplace_back(current);
+        current.clear();
+        has_ellipsis = false;
+        break;
+      case '.':
+        // Ellipsis
+        CHECK(!has_ellipsis) << "Ellipsis can only appear once for each input and output";
+        CHECK(i + 2 < n && equation[i + 1] == '.' && equation[i + 2] == '.')
+            << "Cannot parse the Einsum equation: invalid ellipsis";
+        current.push_back(kEllipsis);
+        has_ellipsis = true;
+        i += 2;
+        break;
+      default:
+        // Default case: current character is a subscript label
+        CHECK(std::isalpha(equation[i])) << "Cannot parse the Einsum equation: invalid character "
+                                         << equation[i] << " in equation " << equation;
+        current.emplace_back(equation[i]);
+        break;
+    }
+  }
+
+  if (has_arrow) {
+    // If there is an arrow, the last subscript is the output
+    result.output = current;
+  } else {
+    // Otherwise, the equation is in implicit mode, and the last subscript is an input
+    result.inputs.emplace_back(current);
+  }
+
+  // Convert the equation to explicit mode if it is in implicit mode
+  if (!has_arrow) {
+    // The output of the implicit mode is all repeated labels sorted in alphabetical order and the
+    // ellipsis in the leftmost if it exists in the inputs.
+    std::map<char, int> label_counts;
+    for (const Subscript& subscript : result.inputs) {
+      for (char label : subscript) {
+        label_counts[label]++;
+      }
+    }
+    for (auto [label, count] : label_counts) {
+      if (label == kEllipsis || count == 1) {
+        result.output.emplace_back(label);
+      }
+    }
+  }
+  return result;
+}
+
+PrimExpr GetBroadcastedExtent(const PrimExpr& extent1, const PrimExpr& extent2) {
+  int64_t extent1_value = GetConstInt(extent1);
+  int64_t extent2_value = GetConstInt(extent2);
+  if (extent1_value == extent2_value) {
+    return extent1;
+  } else if (extent1_value == 1 || extent2_value == 1) {
+    return Integer(std::max(extent1_value, extent2_value));
+  }
+  LOG(FATAL) << "Cannot broadcast extents " << extent1 << " and " << extent2;
+  throw;
+}
+
+PrimExpr GetIndexForBroadcastedDim(const Var& index, const PrimExpr& extent,
+                                   const PrimExpr& broadcasted_extent) {
+  if (GetConstInt(extent) == GetConstInt(broadcasted_extent)) {
+    return index;
+  } else {
+    return Integer(0);
+  }
+}
+
+/*! \brief The compute builder for Einsum */
+class EinsumBuilder {
+ public:
+  /*!
+   * \brief The constructor
+   * \param equation The Einsum equation
+   * \param input_shapes The shapes of the input tensors
+   */
+  EinsumBuilder(EinsumEquation equation, Array<Array<PrimExpr>> input_shapes)
+      : equation_(equation), input_shapes_(input_shapes) {}
+
+  /*!
+   * \brief Run the shape inference
+   * \return The inferred shape of the output
+   */
+  Array<PrimExpr> InferShape() {
+    CHECK_EQ(equation_.inputs.size(), input_shapes_.size())
+        << "Number of operands does not match the "
+           "equation";
+
+    std::vector<Array<PrimExpr>>
+        ellipis_shapes;  // the sub-shape covered by the ellipsis for each operand
+
+    // Step 1: Collect the broadcasted extent for each label
+    for (int operand_index = 0; operand_index < static_cast<int>(input_shapes_.size());
+         ++operand_index) {
+      const EinsumEquation::Subscript subscript = equation_.inputs[operand_index];
+      const Array<PrimExpr>& input_shape = input_shapes_[operand_index];
+
+      int current_dim = 0;
+      for (auto label : subscript) {
+        if (label == EinsumEquation::kEllipsis) {
+          // Find the sub-shape covered by the ellipsis
+          int ellipsis_ndim =
+              static_cast<int>(input_shape.size()) - static_cast<int>(subscript.size()) + 1;
+          ellipis_shapes.emplace_back(input_shape.begin() + current_dim,
+                                      input_shape.begin() + current_dim + ellipsis_ndim);
+          current_dim += ellipsis_ndim;
+        } else {
+          const PrimExpr& extent = input_shape[current_dim++];
+          auto it = label_to_extent_.find(label);
+          if (it == label_to_extent_.end()) {
+            label_to_extent_[label] = extent;
+          } else {
+            it->second = GetBroadcastedExtent(it->second, extent);
+          }
+        }
+      }
+      ICHECK_EQ(current_dim, input_shape.size());
+    }
+
+    // Step 2: Infer the shape of the ellipsis if exists
+    // The ellipsis may cover different number of dimensions for each operand, these sub-shapes
+    // need to be broadcasted to the shape with the maximum number of dimensions
+    Array<PrimExpr> ellipsis_shape;
+    if (ellipis_shapes.size()) {
+      ellipsis_shape = *std::max_element(
+          ellipis_shapes.begin(), ellipis_shapes.end(),
+          [](const Array<PrimExpr>& a, const Array<PrimExpr>& b) { return a.size() < b.size(); });
+      for (const Array<PrimExpr>& shape : ellipis_shapes) {
+        auto common_shape = detail::BroadcastShape(ellipsis_shape, shape).common_shape;
+        ellipsis_shape = Array<PrimExpr>(common_shape.begin(), common_shape.end());
+      }
+    }
+
+    // Step 3: Infer output shape based on infered extent for each label
+    for (auto label : equation_.output) {
+      if (label == EinsumEquation::kEllipsis) {
+        output_shape_.insert(output_shape_.end(), ellipsis_shape.begin(), ellipsis_shape.end());
+      } else {
+        output_shape_.push_back(label_to_extent_[label]);
+      }
+    }
+    ellipsis_shape_ = std::move(ellipsis_shape);
+    return output_shape_;
+  }
+
+  PrimExpr BuildOutputExpr(const Array<Tensor> inputs, const Array<Var>& indices) {
+    std::unordered_map<EinsumEquation::Label, Var> label_to_index;
+    Array<Var> ellipsis_indices;
+    Array<IterVar> reduce_axes;
+
+    PrepareOutputIndicesMapping(indices, &label_to_index, &ellipsis_indices);
+    PrepareReductionIndicesMapping(indices, &label_to_index, &ellipsis_indices, &reduce_axes);
+
+    auto zero = make_zero(inputs[0]->dtype);
+
+    PrimExpr result = zero;
+    for (int i = 0, n = static_cast<int>(inputs.size()); i < n; ++i) {
+      auto term = inputs[i](GetIndicesForOperand(i, label_to_index, ellipsis_indices));
+      if (i == 0) {
+        result = term;
+      } else {
+        result = result * term;
+      }
+    }
+    if (reduce_axes.size() > 0) {
+      result = sum(result, reduce_axes, {zero});
+    }
+    return result;
+  }
+
+ private:
+  /*!
+   * \brief Prepare mapping from label (including ellipsis) to the output indices
+   */
+  void PrepareOutputIndicesMapping(const Array<Var>& indices,
+                                   std::unordered_map<EinsumEquation::Label, Var>* label_to_index,
+                                   Array<Var>* ellipsis_indices) {
+    int i = 0;
+    for (auto label : equation_.output) {
+      if (label == EinsumEquation::kEllipsis) {
+        auto ellipsis_ndim = ellipsis_shape_.value().size();
+        *ellipsis_indices = Array<Var>(indices.begin() + i, indices.begin() + i + ellipsis_ndim);
+        i += ellipsis_ndim;
+      } else {
+        label_to_index->emplace(label, indices[i++]);
+      }
+    }
+    ICHECK_EQ(i, indices.size());
+  }
+
+  /*!
+   * \brief Create reduction axes and prepare mapping from reduction label (including ellipsis if
+   * necessary) to the reduction axes
+   */
+  void PrepareReductionIndicesMapping(
+      const Array<Var>& indices, std::unordered_map<EinsumEquation::Label, Var>* label_to_index,
+      Array<Var>* ellipsis_indices, Array<IterVar>* reduction_axes) {
+    // Collect labels that need to be reduced, which is the union(input_labels) - output_labels
+    std::set<char> reduction_labels;
+    for (const EinsumEquation::Subscript& subscript : equation_.inputs) {
+      reduction_labels.insert(subscript.begin(), subscript.end());
+    }
+    for (auto label : equation_.output) {
+      reduction_labels.erase(label);
+    }
+
+    // Create reduction axes.The order of the reduction axes is not specified in the Einsum
+    // equation. Here we sort them alphabetically, with the ellipsis axes at the
+    // beginning if exists.
+    for (auto label : reduction_labels) {
+      if (label == EinsumEquation::kEllipsis) {
+        // Ellipsis
+        auto ellipsis_shape = ellipsis_shape_.value();
+        for (int i = 0; i < static_cast<int>(ellipsis_shape.size()); ++i) {
+          reduction_axes->push_back(
+              IterVar(Range(0, ellipsis_shape[i]), Var("k"), IterVarType::kCommReduce));
+          ellipsis_indices->push_back(reduction_axes->back()->var);
+        }
+      } else {
+        // Normal label
+        reduction_axes->push_back(IterVar(Range(0, label_to_extent_[label]),
+                                          Var(std::string(1, label)), IterVarType::kCommReduce));
+        label_to_index->emplace(label, reduction_axes->back()->var);
+      }
+    }
+  }
+
+  Array<PrimExpr> GetIndicesForOperand(
+      int operand_index, const std::unordered_map<EinsumEquation::Label, Var>& label_to_index,
+      const Array<Var>& ellipsis_indices) {
+    const EinsumEquation::Subscript& subscript = equation_.inputs[operand_index];
+    Array<PrimExpr> indices;  // the indices for the operand
+    const Array<PrimExpr> input_shape = input_shapes_[operand_index];
+
+    int i = 0;  // index of the operand shape
+    for (char label : subscript) {
+      if (label == EinsumEquation::kEllipsis) {
+        // Ellipsis
+        Array<PrimExpr> ellipsis_shape = ellipsis_shape_.value();
+        int ellipsis_ndim =
+            static_cast<int>(input_shape.size()) - static_cast<int>(subscript.size()) + 1;
+        // use last 'ellipsis_ndim' axes
+        for (int j = static_cast<int>(ellipsis_indices.size()) - ellipsis_ndim;
+             j < static_cast<int>(ellipsis_indices.size()); ++j) {
+          indices.push_back(
+              GetIndexForBroadcastedDim(ellipsis_indices[j], input_shape[i++], ellipsis_shape[j]));
+        }
+      } else {
+        // Normal label
+        indices.push_back(GetIndexForBroadcastedDim(label_to_index.at(label), input_shape[i++],
+                                                    label_to_extent_.at(label)));
+      }
+    }
+    ICHECK_EQ(i, input_shape.size());
+    ICHECK_EQ(indices.size(), input_shape.size());
+    return indices;
+  }
+
+  EinsumEquation equation_;
+  Array<Array<PrimExpr>> input_shapes_;
+
+  // intermediate results of shape inference
+
+  // The output shape
+  Array<PrimExpr> output_shape_;
+  // The extent of each label with broadcast rules applied
+  std::unordered_map<EinsumEquation::Label, PrimExpr> label_to_extent_;
+  // The shape of the ellipsis if ellipsis is used. The shape covered by the
+  // ellipsis in each operand might be different from this, this is the common
+  // shape among them according to the broadcast rules.
+  Optional<Array<PrimExpr>> ellipsis_shape_;
+};
+
+Tensor einsum(const std::string& subscripts_str, const Array<Tensor> inputs, std::string name,
+              std::string tag) {
+  EinsumEquation equation = EinsumEquation::FromString(subscripts_str);
+  Array<Array<PrimExpr>> input_shapes;
+  for (const Tensor& input : inputs) {
+    input_shapes.push_back(input->shape);
+  }
+  EinsumBuilder einsum_builder = EinsumBuilder(equation, input_shapes);
+  auto output_shape = einsum_builder.InferShape();
+  return te::compute(
+      output_shape,
+      [&](const Array<Var>& indices) { return einsum_builder.BuildOutputExpr(inputs, indices); },
+      name, tag);
+}
+
+Array<PrimExpr> InferEinsumShape(const std::string& subscripts,
+                                 const std::vector<Array<PrimExpr>>& operands) {
+  EinsumEquation equation = EinsumEquation::FromString(subscripts);
+  EinsumBuilder einsum_builder = EinsumBuilder(equation, operands);
+  return einsum_builder.InferShape();
+}
+
+TVM_REGISTER_GLOBAL("topi.einsum").set_body([](TVMArgs args, TVMRetValue* rv) {
+  *rv = einsum(args[0], args[1]);
+});
+
+}  // namespace topi
+}  // namespace tvm
diff --git a/src/topi/transform.cc b/src/topi/transform.cc
index 56e799f52563..0ea1392e5daf 100644
--- a/src/topi/transform.cc
+++ b/src/topi/transform.cc
@@ -173,10 +173,6 @@ TVM_REGISTER_GLOBAL("topi.tensordot").set_body([](TVMArgs args, TVMRetValue* rv)
   }
 });
 
-TVM_REGISTER_GLOBAL("topi.einsum").set_body([](TVMArgs args, TVMRetValue* rv) {
-  *rv = einsum(args[0], args[1]);
-});
-
 TVM_REGISTER_GLOBAL("topi.strided_slice").set_body([](TVMArgs args, TVMRetValue* rv) {
   Tensor x = args[0];
   Array<PrimExpr> begin = args[1];
diff --git a/tests/python/topi/python/test_topi_einsum.py b/tests/python/topi/python/test_topi_einsum.py
index 994d5438e661..d6dc43e4da00 100644
--- a/tests/python/topi/python/test_topi_einsum.py
+++ b/tests/python/topi/python/test_topi_einsum.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import numpy as np
+import pytest
 import tvm
 import tvm.testing
 from tvm import te
@@ -59,20 +60,27 @@ def verify_einsum(subscripts, shapes):
     tvm.testing.assert_allclose(c1, c2, rtol=1e-5, atol=1e-5)
 
 
-def test_einsum():
-    verify_einsum("ii", [(5, 5)])
-    verify_einsum("ii->i", [(5, 5)])
-    verify_einsum("ij->i", [(5, 5)])
-    verify_einsum("...j->...", [(5, 5)])
-    verify_einsum("...j, j", [(5, 5), (5,)])
-    verify_einsum("..., ...", [(), (2, 3)])
-    verify_einsum("ijk, jil->kl", [(3, 4, 5), (4, 3, 2)])
-    verify_einsum("ij, ij -> i", [(1, 4), (2, 4)])
-    verify_einsum("...ij, ...jk -> ...ik", [(1, 4), (4, 2)])
-    verify_einsum("...ij, ...ik -> ...jk", [(1, 1, 1, 4), (1, 1, 1, 3)])
-    verify_einsum("ij,jk->ik", [(2, 3), (3, 4)])
-    verify_einsum("ij,jk,km->im", [(2, 3), (3, 4), (4, 5)])
+@pytest.mark.parametrize(
+    "equation,inputs",
+    [
+        ("ii", [(5, 5)]),
+        ("ii->i", [(5, 5)]),
+        ("ij->i", [(5, 5)]),
+        ("...j->...", [(5, 5)]),
+        ("...j, j", [(5, 5), (5,)]),
+        ("..., ...", [(), (2, 3)]),
+        ("ijk, jil->kl", [(3, 4, 5), (4, 3, 2)]),
+        ("ij, ij -> i", [(1, 4), (2, 4)]),
+        ("...ij, ...jk -> ...ik", [(1, 4), (4, 2)]),
+        ("...ij, ...ik -> ...jk", [(1, 1, 1, 4), (1, 1, 1, 3)]),
+        ("...ik, ...jk, ...hk -> i...jh", [(3, 4, 4), (1, 5, 3, 8, 4), (2, 5, 3, 6, 4)]),
+        ("ij,jk->ik", [(2, 3), (3, 4)]),
+        ("ij,jk,km->im", [(2, 3), (3, 4), (4, 5)]),
+    ],
+)
+def test_einsum(equation, inputs):
+    verify_einsum(equation, inputs)
 
 
 if __name__ == "__main__":
-    test_einsum()
+    tvm.testing.main()