-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnormalize_for_RPPA.py
37 lines (27 loc) · 1011 Bytes
/
normalize_for_RPPA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
def normalize_dataframe(df):
"""
Normalize each row of the DataFrame by subtracting the row average.
Args:
df (pandas.DataFrame): Input DataFrame
Returns:
pandas.DataFrame: Normalized DataFrame
"""
return df.sub(df.mean(axis=1), axis=0)
# Path to your existing CSV file
input_file = '/Users/stanleychen/git/Melanoma/data/combined_RPPA_data.csv'
# Read the CSV file
# Assuming the first column is the gene names and should be used as index
df = pd.read_csv(input_file, index_col=0)
# Display the first few rows of the original DataFrame
print("Original DataFrame:")
print(df.head())
# Normalize the DataFrame
normalized_df = normalize_dataframe(df)
# Display the first few rows of the normalized DataFrame
print("\nNormalized DataFrame:")
print(normalized_df.head())
# Save the normalized DataFrame to a new CSV file
output_file = 'normalized_RPPA_data.csv'
normalized_df.to_csv(output_file)
print(f"\nNormalized data saved to {output_file}")