Skip to content

Commit

Permalink
BUGFIX: Do not check hashes in store method if node is already stored
Browse files Browse the repository at this point in the history
In this case the _df attribute does not necessarily exist as it is
created from a database query and not via __init__
  • Loading branch information
janssenhenning committed May 5, 2023
1 parent a8a79ed commit 58402d5
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 3 deletions.
10 changes: 7 additions & 3 deletions aiida_dataframe/data/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,12 @@ def store(self, *args, **kwargs) -> PandasFrameData:
on the dataframe e.g. `df["A"] = new_value`
This is only done if the hashes of the DATA does not match up
"""
current_hash = self._hash_dataframe(self._df)
if current_hash != self.get_attribute("_pandas_data_hash"):
self._update_dataframe(self._df)
if not self.is_stored:
# Check if the dataframe directly attached to the node
# has been mutated in place before storing
# If so the underlying file is updated
current_hash = self._hash_dataframe(self._df)
if current_hash != self.get_attribute("_pandas_data_hash"):
self._update_dataframe(self._df)

return super().store(*args, **kwargs)
37 changes: 37 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,3 +494,40 @@ def test_non_default_filename(entry_point):
assert loaded is not node
assert loaded.list_object_names() == ["non_default.h5"]
assert_frame_equal(loaded.df, df)


@pytest.mark.parametrize(
"entry_point",
("dataframe.frame",),
)
def test_modification_store_already_stored(entry_point):
"""
Test that callign store on an already stroed node does nothing
(Including crashes)
"""

PandasFrameData = DataFactory(entry_point)

# Example from pandas Docs
df = pd.DataFrame(
{
"A": 1.0,
"B": pd.Timestamp("20130102"),
"C": pd.Series(1, index=list(range(4)), dtype="float32"),
"D": np.array([3] * 4, dtype="int32"),
"E": pd.Categorical(["test", "train", "test", "train"]),
"F": "foo",
}
)

node = PandasFrameData(df)
node.store()
assert node.is_stored

loaded = load_node(node.pk)
assert loaded is not node
assert_frame_equal(loaded.df, df)

loaded_store = loaded.store()
assert loaded_store is loaded
assert_frame_equal(loaded_store.df, df)

0 comments on commit 58402d5

Please sign in to comment.