forked from bnsreenu/python_for_microscopists
-
Notifications
You must be signed in to change notification settings - Fork 0
/
039-data_grouping_using_Pandas.py
41 lines (31 loc) · 1.41 KB
/
039-data_grouping_using_Pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python
__author__ = "Sreenivas Bhattiprolu"
__license__ = "Feel free to copy, I appreciate if you acknowledge Python for Microscopists"
# https://www.youtube.com/watch?v=p7KyukHE9xU
##########################################
#Using group by
#Group-by’s can be used to build groups of rows based off a specific feature
#eg. the Set name in our csv dataset, we can group by set 1, 2, 3, and 4
#We can then perform an operation such as mean, min, max, std on the individual groups
import pandas as pd
df = pd.read_csv('manual_vs_auto.csv')
#Let us rename Unnamed column and drop Manual 2 column
df = df.rename(columns = {'Unnamed: 0':'Image_set'})
df = df.drop("Manual2", axis=1)
print(df.head())
group_by_file = df.groupby(by=['Image_set'])
set_data_count = group_by_file.count() #Count for each value per group
set_data_avg = group_by_file.mean() #Mean for each value per group
print(set_data_count)
print(set_data_avg)
#Correlation between data
print(df.corr()) #Correlation between all columns
#To check correlation for specific columns
import pandas as pd
df = pd.read_csv('manual_vs_auto.csv')
print(df['Manual'].corr(df['Auto_th_2']))
"""
Positive numbers indicate a positive correlation — one goes up
the other goes up — and negative numbers represent an inverse correlation —
one goes up the other goes down. 1.0 indicates a perfect correlation.
"""