Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optional memory usage optimization and show mem usage #437

Merged
merged 2 commits into from
Feb 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions dtale/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,3 +733,12 @@ def apply(df, func, *args, **kwargs):
return df.swifter.progress_bar(False).apply(func, *args, **kwargs)
except ImportError:
return df.apply(func, *args, **kwargs)


def optimize_df(df):
for col in df.select_dtypes(include=["object"]):
num_unique_values = len(df[col].unique())
num_total_values = len(df[col])
if num_unique_values / num_total_values < 0.5:
df[col] = df[col].astype("category")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was always told to use .iloc, but I heard differing things. Do you know which is more correct?

Suggested change
df[col] = df[col].astype("category")
df.loc[:, col] = df[col].astype("category")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they are the same?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @Centropy-io 🙏 I remember pandas throwing warnings at one point in time if you didnt use iloc because of setting values on a copy, but in this instance it doesnt apply.

return df
13 changes: 12 additions & 1 deletion dtale/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
running_with_flask_debug,
running_with_pytest,
sort_df_for_grid,
optimize_df,
)

logger = getLogger(__name__)
Expand Down Expand Up @@ -765,6 +766,7 @@ def startup(
precision=2,
show_columns=None,
hide_columns=None,
optimize_dataframe=False,
):
"""
Loads and stores data globally
Expand Down Expand Up @@ -871,7 +873,6 @@ def startup(

if data_id is None:
data_id = global_state.new_data_inst()

if global_state.get_settings(data_id) is not None:
curr_settings = global_state.get_settings(data_id)
curr_locked = curr_settings.get("locked", [])
Expand All @@ -895,6 +896,8 @@ def startup(
precision=precision,
)
global_state.set_settings(data_id, base_settings)
if optimize_dataframe:
data = optimize_df(data)
global_state.set_data(data_id, data)
dtypes_state = build_dtypes_state(data, global_state.get_dtypes(data_id) or [])
if show_columns or hide_columns:
Expand Down Expand Up @@ -1123,6 +1126,14 @@ def _load_process(data_id):
start=json_date(mdata["start"], fmt="%-I:%M:%S %p"),
ts=json_timestamp(mdata["start"]),
name=global_state.get_name(data_id),
# mem usage in MB
mem_usage=int(
global_state.get_data(data_id)
.memory_usage(index=False, deep=True)
.sum()
/ 1024
/ 1024
),
)

processes = sorted(
Expand Down
11 changes: 11 additions & 0 deletions static/popups/instances/Instances.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,17 @@ class Instances extends React.Component {
</span>
)}
className="cell"
/>
<Column
width={150}
dataKey="mem_usage"
label="Memory Usage (MB)"
style={{
textAlign: "center",
paddingRight: ".5em",
fontSize: "80%",
}}
className="cell"
/>
{previewCol}
</Table>
Expand Down
1 change: 1 addition & 0 deletions tests/dtale/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def test_processes(test_data, unittest):
"names": u"date,security_id,foo,bar,baz",
"data_id": c.port,
"columns": 5,
"mem_usage": 0,
}
],
response_data["data"],
Expand Down