-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathPythonROUGE.py
129 lines (115 loc) · 5.32 KB
/
PythonROUGE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Created on Mon Aug 13 10:31:58 2012
author: Miguel B. Almeida
mail: mba@priberam.pt
"""
import os
import re
import subprocess
# Wrapper function to use ROUGE from Python easily
# Inputs:
# guess_summ_list, a string with the absolute path to the file with your guess summary
# ref_summ_list, a list of lists of paths to multiple reference summaries.
# IMPORTANT: all the reference summaries must be in the same directory!
# (optional) ngram_order, the order of the N-grams used to compute ROUGE
# the default is 1 (unigrams)
# Output: a tuple of the form (recall,precision,F_measure)
#
# Example usage: PythonROUGE('/home/foo/my_guess_summary.txt',[/home/bar/my_ref_summary_1.txt,/home/bar/my_ref_summary_2.txt])
def PythonROUGE(guess_summ_list,ref_summ_list,ngram_order=2, byte=0):
""" Wrapper function to use ROUGE from Python easily. """
# even though we ask that the first argument is a list,
# if it is a single string we can handle it
if type(guess_summ_list) == str:
temp = list()
temp.append(guess_summ_list)
guess_summ_list = temp
del temp
# even though we ask that the second argument is a list of lists,
# if it is a single string we can handle it
if type(ref_summ_list[0]) == str:
temp = list()
temp.append(ref_summ_list)
ref_summ_list = temp
del temp
# this is the path to your ROUGE distribution
ROUGE_path = 'data/script/ROUGE-1.5.5.pl'
data_path = 'data/script/ROUGE'
# these are the options used to call ROUGE
# feel free to edit this is you want to call ROUGE with different options
options = '-w 1.2 -a -m -n ' + str(ngram_order)
if byte > 0:
options = '-b ' + str(byte) + ' ' + options
# this is a temporary XML file which will contain information
# in the format ROUGE uses
xml_path = 'data/script/temp.xml'
xml_file = open(xml_path,'w')
xml_file.write('<ROUGE-EVAL version="1.0">\n')
for guess_summ_index,guess_summ_file in enumerate(guess_summ_list):
xml_file.write('<EVAL ID="' + str(guess_summ_index+1) + '">\n')
create_xml(xml_file,guess_summ_file,ref_summ_list[guess_summ_index])
xml_file.write('</EVAL>\n')
xml_file.write('</ROUGE-EVAL>\n')
xml_file.close()
# this is the file where the output of ROUGE will be stored
ROUGE_output_path = 'data/script/ROUGE_result.txt'
# this is where we run ROUGE itself
exec_command = 'perl ' + ROUGE_path + ' -e ' + data_path + ' ' + options + ' ' + xml_path + ' > ' + ROUGE_output_path
os.system(exec_command)
# here, we read the file with the ROUGE output and
# look for the recall, precision, and F-measure scores
recall_list = list()
precision_list = list()
F_measure_list = list()
ROUGE_output_file = open(ROUGE_output_path,'r')
index = ['1', '2', 'L']
for idx in index:
ROUGE_output_file.seek(0)
for line in ROUGE_output_file:
match = re.findall('X ROUGE-' + idx + ' Average_R: ([0-9.]+)',line)
if match != []:
recall_list.append(float(match[0]))
match = re.findall('X ROUGE-' + idx + ' Average_P: ([0-9.]+)',line)
if match != []:
precision_list.append(float(match[0]))
match = re.findall('X ROUGE-' + idx + ' Average_F: ([0-9.]+)',line)
if match != []:
F_measure_list.append(float(match[0]))
ROUGE_output_file.close()
# remove temporary files which were created
#os.remove(xml_path)
#os.remove(ROUGE_output_path)
return (recall_list,precision_list,F_measure_list)
# This is an auxiliary function
# It creates an XML file which ROUGE can read
# Don't ask me how ROUGE works, because I don't know!
def create_xml(xml_file,guess_summ_file,ref_summ_list):
xml_file.write('<PEER-ROOT>\n')
guess_summ_dir = os.path.dirname(guess_summ_file)
xml_file.write(guess_summ_dir + '\n')
xml_file.write('</PEER-ROOT>\n')
xml_file.write('<MODEL-ROOT>\n')
ref_summ_dir = os.path.dirname(ref_summ_list[0] + '\n')
xml_file.write(ref_summ_dir + '\n')
xml_file.write('</MODEL-ROOT>\n')
xml_file.write('<INPUT-FORMAT TYPE="SPL">\n')
xml_file.write('</INPUT-FORMAT>\n')
xml_file.write('<PEERS>\n')
guess_summ_basename = os.path.basename(guess_summ_file)
xml_file.write('<P ID="X">' + guess_summ_basename + '</P>\n')
xml_file.write('</PEERS>\n')
xml_file.write('<MODELS>')
letter_list = ['A','B','C','D','E','F','G','H','I','J']
for ref_summ_index,ref_summ_file in enumerate(ref_summ_list):
ref_summ_basename = os.path.basename(ref_summ_file)
xml_file.write('<M ID="' + letter_list[ref_summ_index] + '">' + ref_summ_basename + '</M>\n')
xml_file.write('</MODELS>\n')
# This is only called if this file is executed as a script.
# It shows an example of usage.
if __name__ == '__main__':
guess_summary_list = ['Example/Guess_Summ_1.txt','Example/Guess_Summ_2.txt']
ref_summ_list = [['Example/Ref_Summ_1_1.txt','Example/Ref_Summ_1_2.txt'] , ['Example/Ref_Summ_2_1.txt','Example/Ref_Summ_2_2.txt','Example/Ref_Summ_2_3.txt']]
recall_list,precision_list,F_measure_list = PythonROUGE(guess_summary_list,ref_summ_list)
#print 'recall = ' + str(recall_list)
#print 'precision = ' + str(precision_list)
#print 'F = ' + str(F_measure_list)