Skip to content

Commit

Permalink
update plots
Browse files Browse the repository at this point in the history
  • Loading branch information
dtch1997 committed May 20, 2024
1 parent ada05d8 commit b914302
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 8 deletions.
Binary file modified repepo/paper/figures/counts_of_positive_options_per_dataset.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified repepo/paper/figures/slope_vs_pos_option_is_Yes.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
91 changes: 83 additions & 8 deletions repepo/paper/make_figures_steering_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ def plot_slope_vs_response_is_A(df):
order = order['Dataset'],
errorbar = None
)

fig.tight_layout()
fig.savefig('figures/slope_vs_pos_option_is_A.png')

plot_slope_vs_response_is_A(df)
Expand Down Expand Up @@ -192,6 +194,7 @@ def plot_response_is_A(df):
ax.set_ylabel('Dataset')
ax.set_xlabel('Count')
ax.set_title('Count of Positive Options per Dataset')
fig.tight_layout()
plt.show()


Expand All @@ -202,16 +205,18 @@ def plot_slope_vs_response_is_Yes(df):

# Filter by datasets where there is at least one Yes and one No
# Count the number of "no" responses
yes_counts = df.groupby('dataset_name')['pos_option_is_Yes'].sum()
total_counts = df.groupby('dataset_name')['pos_option_is_Yes'].count()
# Filter by datasets where there is at least one Yes and one No
selected_datasets = total_counts[(yes_counts > 0) & (yes_counts < total_counts)]
print(selected_datasets)
# yes_counts = df.groupby('dataset_name')['pos_option_is_Yes'].sum()
# total_counts = df.groupby('dataset_name')['pos_option_is_Yes'].count()
# # Filter by datasets where there is at least one Yes and one No
# selected_datasets = total_counts[(yes_counts > 0) & (yes_counts < total_counts)]
# print(selected_datasets)

fig, ax = plt.subplots(figsize=(10, 15))

plot_df = df[
(df['steering_label'] == 'baseline') &
(df['dataset_label'] == 'baseline') &
(df['dataset_name'].isin(selected_datasets.index))
(df['dataset_label'] == 'baseline')
# (df['dataset_name'].isin(selected_datasets.index))
]
# Rename
plot_df = plot_df.rename(columns = {'slope': 'Steerability', 'dataset_name': 'Dataset'})
Expand All @@ -221,16 +226,18 @@ def plot_slope_vs_response_is_Yes(df):
plot_df['Positive Option'] = plot_df['pos_A'] + ' and ' + plot_df['pos_Yes']

order = plot_df[['Dataset', 'median_slope']].drop_duplicates().sort_values('median_slope', ascending=False)
fig, ax = plt.subplots(figsize=(6, 10))
hue_order = ['A and No', 'A and Yes', 'B and No', 'B and Yes']
sns.barplot(
data=plot_df,
hue = 'Positive Option',
x='Steerability',
y='Dataset',
ax=ax,
order = order['Dataset'],
hue_order = hue_order,
errorbar = None
)
fig.tight_layout()
fig.savefig('figures/slope_vs_pos_option_is_Yes.png')

plot_slope_vs_response_is_Yes(df)
Expand All @@ -254,6 +261,8 @@ def plot_response_is_Yes(df):
# Plot a stacked barplot of the fraction of A vs B responses in each dataset.
count_df = plot_df.groupby(['Dataset', 'Positive Option']).size().unstack().fillna(0)
fig, ax = plt.subplots(figsize=(10, 15))
# Set order by order
count_df = count_df.loc[order['Dataset']]
count_df.plot(kind='barh', stacked=True, ax = ax)
ax.set_ylabel('Dataset')
ax.set_xlabel('Count')
Expand All @@ -264,6 +273,72 @@ def plot_response_is_Yes(df):

plot_response_is_Yes(df)

# %%
# The above two in the same figure
def plot_slope_and_counts_for_response_is_Yes(df):

fig, axs = plt.subplots(
nrows = 1,
ncols = 2,
width_ratios=[5, 1],
figsize=(10, 10),
sharey=True
)
ax = axs[0]

plot_df = df[
(df['steering_label'] == 'baseline') &
(df['dataset_label'] == 'baseline')
# & (df['dataset_name'].isin(selected_datasets.index))
]
# Rename
plot_df = plot_df.rename(columns = {'slope': 'Steerability', 'dataset_name': 'Dataset'})

plot_df['pos_A'] = plot_df['pos_option_is_A'].apply(lambda x: 'A' if x else 'B')
plot_df['pos_Yes'] = plot_df['pos_option_is_Yes'].apply(lambda x: 'Yes' if x else 'No')
plot_df['Positive Option'] = plot_df['pos_A'] + ' and ' + plot_df['pos_Yes']

order = plot_df[['Dataset', 'median_slope']].drop_duplicates().sort_values('median_slope', ascending=False)
hue_order = ['A and No', 'A and Yes', 'B and No', 'B and Yes']
sns.barplot(
data=plot_df,
hue = 'Positive Option',
x='Steerability',
y='Dataset',
ax=ax,
order = order['Dataset'],
hue_order = hue_order,
errorbar = None
)
ax.set_title('Mean Steerability')

ax = axs[1]
plot_df = df[
(df['steering_label'] == 'baseline') &
(df['dataset_label'] == 'baseline') &
(df['multiplier'] == 0)
]

# Rename
plot_df = plot_df.rename(columns = {'slope': 'Steerability', 'dataset_name': 'Dataset'})
plot_df['pos_A'] = plot_df['pos_option_is_A'].apply(lambda x: 'A' if x else 'B')
plot_df['pos_Yes'] = plot_df['pos_option_is_Yes'].apply(lambda x: 'Yes' if x else 'No')
plot_df['Positive Option'] = plot_df['pos_A'] + ' and ' + plot_df['pos_Yes']

# Plot a stacked barplot of the fraction of A vs B responses in each dataset.
count_df = plot_df.groupby(['Dataset', 'Positive Option']).size().unstack().fillna(0)
count_df.plot(kind='barh', stacked=True, ax = ax)
ax.set_ylabel('Dataset')
ax.set_xlabel('Count')
ax.set_title('Option Counts')
ax.get_legend().remove()
fig.tight_layout()
fig.savefig("figures/plot_slope_and_counts_for_response_is_Yes.png")
plt.show()

plot_slope_and_counts_for_response_is_Yes(df)


# %%

def compute_variance(df, dataset_name):
Expand Down

0 comments on commit b914302

Please sign in to comment.