diff --git a/news/tn-structuredf.rst b/news/tn-structuredf.rst new file mode 100644 index 0000000..0996e84 --- /dev/null +++ b/news/tn-structuredf.rst @@ -0,0 +1,23 @@ +**Added:** + +* create ClusterDataFrame class (DataFrame) to store a cluster of atoms + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/src/diffpy/clusterrender/clusterdataframe.py b/src/diffpy/clusterrender/clusterdataframe.py new file mode 100644 index 0000000..dd7457c --- /dev/null +++ b/src/diffpy/clusterrender/clusterdataframe.py @@ -0,0 +1,51 @@ +"""This module defines class ClusterDataFrame. + +A local structure or cluster of atoms is represented in a DataFrame +format. +""" + +import pandas as pd + +# ------------------------- + + +class ClusterDataFrame(pd.DataFrame): + """Define a cluster of atoms in a pandas DataFrame format. + + Each row corresponds to an atom, and columns represent + atomic properties: species, xyz coordinates, and (optionally) + coordination shells, specifying the central atom (0) and its + neighboring atoms (1, 2, ...). + + Methods + ------- + parse_structure(structure_input, site_index=0) + Parse structure data from a structure, a file, a dictionary, + or a DataFrame into ClusterDataFrame. + + Attributes + ---------- + _constructor : property + Ensures that DataFrame operations return ClusterDataFrame objects. + """ + + @property + def _constructor(self): + return ClusterDataFrame + + def __init__(self, structure_input, site_index=0): + """Initialize ClusterDataFrame from a Structure object, a file, + or generic DataFrame arguments. + + Parameters + ---------- + structure_input : pymatgen.core.Structure, pathlib.Path, str, + dict, or pd.DataFrame + The input structure or cluster of atoms to be visualized. + site_index : int, optional + The index of atom in the structure to be treated as the + central atom. + Default is 0. + """ + # parse and load structure_input + self._parse_structure(structure_input, site_index) diff --git a/tests/test_clusterdataframe.py b/tests/test_clusterdataframe.py new file mode 100644 index 0000000..c6fc648 --- /dev/null +++ b/tests/test_clusterdataframe.py @@ -0,0 +1,64 @@ +import pandas as pd +import pytest + +from diffpy.clusterrender.clusterdataframe import ClusterDataFrame + +""" +Tests for the ClusterDataFrame class. +""" +# set up test data +dict_input = { + "species": ["C", "O", "O"], + "x": [0.0, 1.0, -1.0], + "y": [0.0, 0.0, 0.0], + "z": [0.0, 0.0, 0.0], +} +df_input = pd.DataFrame(dict_input) + +# basic outputs (specifying center but not coordination shells) +output_C0 = pd.DataFrame( + { + "species": ["C", "O", "O"], + "x": [0.0, 1.0, -1.0], + "y": [0.0, 0.0, 0.0], + "z": [0.0, 0.0, 0.0], + "shell": [0, None, None], + } +) +output_O1 = pd.DataFrame( + { + "species": ["O", "C", "O"], + "x": [0.0, -1.0, -2.0], + "y": [0.0, 0.0, 0.0], + "z": [0.0, 0.0, 0.0], + "shell": [0, None, None], + } +) + +test_data = [ + # (input, expected_output) or + # (input, test_index, expected_output) + # basic inputs: read from dict or DataFrame + # without any changes + (dict_input, df_input), + (df_input, df_input), + # with site_index specified + (dict_input, 0, df_input), + (dict_input, 1, output_O1), +] + + +@pytest.mark.parametrize("input_test_data", test_data) +def test_clusterdataframe(input_test_data): + """Test ClusterDataFrame initialization and parsing.""" + if len(input_test_data) == 2: + input_structure, expected_output = input_test_data + cdf = ClusterDataFrame(input_structure) + else: + input_structure, site_index, expected_output = input_test_data + cdf = ClusterDataFrame(input_structure, site_index=site_index) + + # check if the output matches the expected DataFrame + pd.testing.assert_frame_equal( + cdf.reset_index(drop=True), expected_output.reset_index(drop=True) + )