diffpy · tinatn29 · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026
diff --git a/news/tn-structuredf.rst b/news/tn-structuredf.rst
@@ -0,0 +1,23 @@
+**Added:**
+
+* create ClusterDataFrame class (DataFrame) to store a cluster of atoms
+
+**Changed:**
+
+* <news item>
+
+**Deprecated:**
+
+* <news item>
+
+**Removed:**
+
+* <news item>
+
+**Fixed:**
+
+* <news item>
+
+**Security:**
+
+* <news item>
diff --git a/src/diffpy/clusterrender/clusterdataframe.py b/src/diffpy/clusterrender/clusterdataframe.py
@@ -0,0 +1,51 @@
+"""This module defines class ClusterDataFrame.
+
+A local structure or cluster of atoms is represented in a DataFrame
+format.
+"""
+
+import pandas as pd
+
+# -------------------------
+
+
+class ClusterDataFrame(pd.DataFrame):
+    """Define a cluster of atoms in a pandas DataFrame format.
+
+    Each row corresponds to an atom, and columns represent
+    atomic properties: species, xyz coordinates, and (optionally)
+    coordination shells, specifying the central atom (0) and its
+    neighboring atoms (1, 2, ...).
+
+    Methods
+    -------
+    parse_structure(structure_input, site_index=0)
+        Parse structure data from a structure, a file, a dictionary,
+        or a DataFrame into ClusterDataFrame.
+
+    Attributes
+    ----------
+    _constructor : property
+        Ensures that DataFrame operations return ClusterDataFrame objects.
+    """
+
+    @property
+    def _constructor(self):
+        return ClusterDataFrame
+
+    def __init__(self, structure_input, site_index=0):
+        """Initialize ClusterDataFrame from a Structure object, a file,
+        or generic DataFrame arguments.
+
+        Parameters
+        ----------
+        structure_input : pymatgen.core.Structure, pathlib.Path, str,
+        dict, or pd.DataFrame
+            The input structure or cluster of atoms to be visualized.
+        site_index : int, optional
+            The index of atom in the structure to be treated as the
+            central atom.
+            Default is 0.
+        """
+        # parse and load structure_input
+        self._parse_structure(structure_input, site_index)
diff --git a/tests/test_clusterdataframe.py b/tests/test_clusterdataframe.py
@@ -0,0 +1,64 @@
+import pandas as pd
+import pytest
+
+from diffpy.clusterrender.clusterdataframe import ClusterDataFrame
+
+"""
+Tests for the ClusterDataFrame class.
+"""
+# set up test data
+dict_input = {
+    "species": ["C", "O", "O"],
+    "x": [0.0, 1.0, -1.0],
+    "y": [0.0, 0.0, 0.0],
+    "z": [0.0, 0.0, 0.0],
+}
+df_input = pd.DataFrame(dict_input)
+
+# basic outputs (specifying center but not coordination shells)
+output_C0 = pd.DataFrame(
+    {
+        "species": ["C", "O", "O"],
+        "x": [0.0, 1.0, -1.0],
+        "y": [0.0, 0.0, 0.0],
+        "z": [0.0, 0.0, 0.0],
+        "shell": [0, None, None],
+    }
+)
+output_O1 = pd.DataFrame(
+    {
+        "species": ["O", "C", "O"],
+        "x": [0.0, -1.0, -2.0],
+        "y": [0.0, 0.0, 0.0],
+        "z": [0.0, 0.0, 0.0],
+        "shell": [0, None, None],
+    }
+)
+
+test_data = [
+    # (input, expected_output) or
+    # (input, test_index, expected_output)
+    # basic inputs: read from dict or DataFrame
+    # without any changes
+    (dict_input, df_input),
+    (df_input, df_input),
+    # with site_index specified
+    (dict_input, 0, df_input),
+    (dict_input, 1, output_O1),
+]
+
+
+@pytest.mark.parametrize("input_test_data", test_data)
+def test_clusterdataframe(input_test_data):
+    """Test ClusterDataFrame initialization and parsing."""
+    if len(input_test_data) == 2:
+        input_structure, expected_output = input_test_data
+        cdf = ClusterDataFrame(input_structure)
+    else:
+        input_structure, site_index, expected_output = input_test_data
+        cdf = ClusterDataFrame(input_structure, site_index=site_index)
+
+    # check if the output matches the expected DataFrame
+    pd.testing.assert_frame_equal(
+        cdf.reset_index(drop=True), expected_output.reset_index(drop=True)
+    )