Skip to content

Commit

Permalink
Adding the option of using different target columns
Browse files Browse the repository at this point in the history
+ pandas update
  • Loading branch information
tuetschek committed Mar 3, 2019
1 parent 26b5f33 commit 1960a99
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions util/williams.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,22 @@
def main(args):
print
print args.pred_file_a, args.pred_file_b
data_a = pd.DataFrame.from_csv(args.pred_file_a, index_col=None, sep='\t')
data_b = pd.DataFrame.from_csv(args.pred_file_b, index_col=None, sep='\t')
data_a = pd.read_csv(args.pred_file_a, index_col=None, sep='\t')
data_b = pd.read_csv(args.pred_file_b, index_col=None, sep='\t')
assert(len(data_a) == len(data_b))
data_a = data_a.sort_values(by=['mr', 'system_output', 'human_rating']).reset_index()
data_b = data_b.sort_values(by=['mr', 'system_output', 'human_rating']).reset_index()
ref = np.array(data_a['human_rating'])
pred_a = np.array(data_a['system_rating'])
pred_b = np.array(data_b['system_rating'])
# ensure the order of instances is the same
data_a = data_a.sort_values(by=['mr', 'system_output', args.target + '_human_rating']).reset_index()
data_b = data_b.sort_values(by=['mr', 'system_output', args.target + '_human_rating']).reset_index()
# get the relevant columns
ref = np.array(data_a[args.target + '_human_rating'])
pred_a = np.array(data_a[args.target + '_system_rating'])
pred_b = np.array(data_b[args.target + '_system_rating'])
for corr in [scipy.stats.pearsonr, scipy.stats.spearmanr]:
c12, _ = corr(ref, pred_a)
c13, _ = corr(ref, pred_b)
c23, _ = corr(pred_a, pred_b)
print corr.__name__, c12, c13, c23, len(data_a)
if c12 < c13: # swap that 1st is always bigger
if c12 < c13: # swap so that 1st is always bigger
print 'SWAPPING A-B'
c12, c13 = c13, c12
os.system("R --no-save --args %f %f %f %d < %s/williams.R | grep '^P-value'" %
Expand All @@ -37,6 +39,8 @@ def main(args):
if __name__ == '__main__':
ap = ArgumentParser(description='Williams correlation test ' +
'(a wrapper for the R script by Y. Graham)')
ap.add_argument('-t', '--target', type=str, default='quality',
help='Target column (default: quality)')
ap.add_argument('pred_file_a', type=str, help='1st file with predictions to compare')
ap.add_argument('pred_file_b', type=str, help='2nd file with predictions to compare')
main(ap.parse_args())

0 comments on commit 1960a99

Please sign in to comment.