-
Notifications
You must be signed in to change notification settings - Fork 1
/
interpretability_report.py
242 lines (191 loc) · 10.8 KB
/
interpretability_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import logging
from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import inch
from reportlab.lib.utils import ImageReader
from itertools import zip_longest
from os.path import join, basename, exists
from sys import stdout
from reportlab.platypus import Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch
from reportlab.platypus import Spacer, Paragraph
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import styles
report_handler = logging.StreamHandler(stream=stdout)
report_handler.setLevel(logging.WARNING)
report_handler.setFormatter(logging.Formatter("%(name)s - %(levelname)s - %(message)s"))
report_logger = logging.getLogger(basename(__file__))
report_logger.addHandler(report_handler)
REPORT_MAIN_TITLE_MULTICLASS = "Model Interpretability Report (Multiclass)"
REPORT_SHAP_PREAMBLE = (
"This report sustains the idea of being able to interpret and explain how and why the chosen model is classifying "
"each entry as it is. All of Interpretability are based in the SHAP method, in which calculates what's the importance "
"level for each feature in the classification process. It uses Shapley Values, a Game Theory concept, "
"as a descriptive metric to create an hierarquical structure between the features."
)
REPORT_SUMMARY_TITLE = "Summary Plots"
REPORT_SHAP_SUMMARY_1 = (
"The above plot is called Summary plot and it shows, for each class, how low/high values of each feature "
"contributed for the classification with that class. The features are ranked from most descriptive to "
"least descriptive. This plot is a summarization of all the entries in the test set."
)
REPORT_SHAP_SUMMARY_2 = (
"An impact with positive SHAP value means that a high (red dots), medium (purple dots) or low (blue dots) "
"feature value contributes positively of an entry to be classified with that class. The inverse happens with "
"negative SHAP values."
)
REPORT_WATERFALL_TITLE = "Waterfall Plots"
REPORT_SHAP_WATERFALL_1 = lambda n_samples: (
"A Waterfall plot shows, for some entries, how each of the features contributed for it to be classified with "
f"its classified class. In this case, {n_samples} samples for each class were chosen randomly to be analyzed."
)
REPORT_SHAP_WATERFALL_2 = (
"From a base expected value, E[f(x)], each feature contributes positively of negatively towards the entry's given "
"class. At the end, when all the contributions are summed with E[f(x)], we get the final value of f(x) which led "
"to the classification result."
)
REPORT_MAIN_TITLE_BINARY = "Model Interpretability Report (BioAutoML)"
REPORT_SHAP_PREAMBLE_BINARY = "SHAP: For each sample the SHAP do calculate the feature importance for the classification decision."
REPORT_SHAP_BAR_BINARY = """
This graph shows the average contribution of each feature, for then highlighting the best features for the model.
Through this graph it is possible to understand which are the features most important for the problem.
"""
REPORT_SHAP_BEESWARM_BINARY= """
Each line in this graph represents a feature and each dot a sample of the trainament conjunction.
Through this graph it is possible to try to establish a correlation between the value of the sample, being high or low,
with your contribution to the prediction.
"""
REPORT_SHAP_WATERFALL_BINARY = """
Each graph above it is referent to a specific sample, being that the title describes the sample label.
Each line shows a feature, on the left side can see the sample value for this feature and in the colorful bars can see the contribution value for the classification in this class.
And can see the limite E[f(x)], values below this number belong one class and values above this same number belong the other class.
"""
make_bold = lambda s: f"<b>{s}</b>"
make_font_size = lambda s, size: f"<font size={size}>{s}</font>"
class Report:
styles = None
story = None
doc = None
text_width = None
def __init__(self, report_name, directory=".", lr_margin=float(0.5*inch), tb_margin=float(0.25*inch)):
"""Create a new PDF report with filename 'report_name'"""
self.styles = getSampleStyleSheet()
self.story = []
self.doc = SimpleDocTemplate(
join(directory, report_name),
leftMargin=lr_margin,
rightMargin=lr_margin,
topMargin=tb_margin,
bottomMargin=tb_margin,
pagesize=A4
)
self.styles.add(ParagraphStyle(name='Justify', fontName="Helvetica",
alignment=TA_JUSTIFY, firstLineIndent=0.3*inch))
self.styles.add(ParagraphStyle(name='Center', fontName="Helvetica",
alignment=TA_CENTER))
page_width, _ = A4
self.text_width = page_width - 2*lr_margin
def __get_image_preserving_ratio(self, path, width, **kwargs):
"""Load and resize an image preserving aspect ratio"""
img = ImageReader(path)
w, h = img.getSize()
return Image(path, width=width, height=(width * (h / float(w))), **kwargs)
def insert_doc_header(self, title, font_size=16, logo_fig=None, pre_margin=1, pos_margin=18, bold=True):
"""Insert a header with given title and logo on the file"""
if not logo_fig:
self.insert_text_on_doc(title, font_size=font_size, style='Center', pos_margin=pos_margin, bold=bold)
return
if pre_margin > 0:
self.story.append(Spacer(1, pre_margin))
else:
report_logger.warning(f"'pre_margin' can't be negative. Ignoring it " +\
f"and using default value (1). [pre_margin={pre_margin}]")
assert exists(logo_fig), f"Logo figure in path {logo_fig} does not exist."
fmt = make_font_size(make_bold(title) if bold else title, font_size)
self.story.append(Table(
[
[Paragraph(fmt, self.styles['Center']),
self.__get_image_preserving_ratio(logo_fig, 0.15*self.text_width)]
],
style=TableStyle([('VALIGN', (0,0), (1,0), 'MIDDLE')]),
colWidths=[0.8*self.text_width, 0.2*self.text_width]
))
if pos_margin > 0:
self.story.append(Spacer(1, pos_margin))
else:
report_logger.warning(f"'pos_margin' can't be negative. Ignoring it " +\
f"and using default value (18). [pos_margin={pos_margin}]")
def insert_text_on_doc(self, text, font_size=12, style='Justify', pre_margin=1, pos_margin=12, bold=False):
"""Insert a new paragraph on report with given text customization"""
if pre_margin > 0:
self.story.append(Spacer(1, pre_margin))
else:
report_logger.warning(f"'pre_margin' can't be negative. Ignoring it " +\
f"and using default value (1). [pre_margin={pre_margin}]")
assert font_size > 0, f"Error: 'font_size' can't be negative. Aborted. [font_size={font_size}]"
fmt = make_font_size(make_bold(text) if bold else text, font_size)
self.story.append(Paragraph(fmt, self.styles[style]))
if pos_margin > 0:
self.story.append(Spacer(1, pos_margin))
else:
report_logger.warning(f"'pos_margin' can't be negative. Ignoring it " +\
f"and using default value (12). [pos_margin={pos_margin}]")
def insert_figure_on_doc(self, fig_paths, pre_margin=1, pos_margin=24):
"""
Insert a list of figures pairwise into the report
If the size of the list is odd, the last one will be centered
"""
pairwise = lambda iterable: list(zip_longest(*[iter(iterable)] * 2, fillvalue=None))
assert len(fig_paths) > 0, "List of figures (fig_paths) is empty."
w, h = ImageReader(fig_paths[0]).getSize()
ratio = h / float(w)
for fig, fig2 in pairwise(fig_paths):
assert exists(fig), f"Figure in path {fig} does not exist."
if pre_margin > 0:
self.story.append(Spacer(1, pre_margin))
else:
report_logger.warning(f"'pre_margin' can't be negative. Ignoring it " +\
f"and using default value (1). [pre_margin={pre_margin}]")
if not fig2:
self.story.append(Image(fig, width=0.5*self.text_width, height=0.5*self.text_width * ratio))
else:
assert exists(fig2), f"Figure in path {fig2} does not exist."
self.story.append(Table(
[[Image(fig, width=0.5*self.text_width, height=0.5*self.text_width * ratio),\
Image(fig2, width=0.5*self.text_width, height=0.5*self.text_width * ratio)]]
))
if pos_margin > 0:
self.story.append(Spacer(1, pos_margin))
else:
report_logger.warning(f"'pos_margin' can't be negative. Ignoring it " +\
f"and using default value (12). [pos_margin={pos_margin}]")
def build(self):
"""Build report from built story list"""
self.doc.build(self.story)
def insert_justified_text(self, text, font_size=8, bold=False, left_margin=0.5 * inch, right_margin=0.5 * inch):
"""
Insere um parágrafo de texto justificado com margens à esquerda e à direita no Story.
Args:
text: O texto a ser inserido no parágrafo.
font_size: O tamanho da fonte para o texto.
bold: Indica se o texto deve ser em negrito.
left_margin: A margem à esquerda do parágrafo em unidades como inch.
right_margin: A margem à direita do parágrafo em unidades como inch.
Exemplo de uso:
insert_justified_text(story, "Este é um exemplo de texto justificado com margens.", font_size=14, bold=True, left_margin=1 * inch, right_margin=1 * inch)
"""
assert font_size > 0, f"Erro: 'font_size' não pode ser negativo. Abortando. [font_size={font_size}]"
# Crie um estilo de parágrafo com alinhamento justificado, tamanho de fonte especificado e margens.
style = styles.getSampleStyleSheet()['Normal']
style.alignment = 4 # 4 representa o alinhamento justificado (justify)
style.leftIndent = left_margin
style.rightIndent = right_margin
style.fontSize = font_size
if bold:
text = f"<b>{text}</b>"
paragraph = Paragraph(text, style)
# Adicione o parágrafo ao Story sem espaçamento de parágrafo.
self.story.append(paragraph)