diff --git a/aiida_dataframe/data/dataframe.py b/aiida_dataframe/data/dataframe.py index ec9e965..e4409d9 100644 --- a/aiida_dataframe/data/dataframe.py +++ b/aiida_dataframe/data/dataframe.py @@ -130,8 +130,12 @@ def store(self, *args, **kwargs) -> PandasFrameData: on the dataframe e.g. `df["A"] = new_value` This is only done if the hashes of the DATA does not match up """ - current_hash = self._hash_dataframe(self._df) - if current_hash != self.get_attribute("_pandas_data_hash"): - self._update_dataframe(self._df) + if not self.is_stored: + # Check if the dataframe directly attached to the node + # has been mutated in place before storing + # If so the underlying file is updated + current_hash = self._hash_dataframe(self._df) + if current_hash != self.get_attribute("_pandas_data_hash"): + self._update_dataframe(self._df) return super().store(*args, **kwargs) diff --git a/tests/test_data.py b/tests/test_data.py index 52d1945..1d9209c 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -494,3 +494,40 @@ def test_non_default_filename(entry_point): assert loaded is not node assert loaded.list_object_names() == ["non_default.h5"] assert_frame_equal(loaded.df, df) + + +@pytest.mark.parametrize( + "entry_point", + ("dataframe.frame",), +) +def test_modification_store_already_stored(entry_point): + """ + Test that callign store on an already stroed node does nothing + (Including crashes) + """ + + PandasFrameData = DataFactory(entry_point) + + # Example from pandas Docs + df = pd.DataFrame( + { + "A": 1.0, + "B": pd.Timestamp("20130102"), + "C": pd.Series(1, index=list(range(4)), dtype="float32"), + "D": np.array([3] * 4, dtype="int32"), + "E": pd.Categorical(["test", "train", "test", "train"]), + "F": "foo", + } + ) + + node = PandasFrameData(df) + node.store() + assert node.is_stored + + loaded = load_node(node.pk) + assert loaded is not node + assert_frame_equal(loaded.df, df) + + loaded_store = loaded.store() + assert loaded_store is loaded + assert_frame_equal(loaded_store.df, df)