Skip to content

Commit 1ae309d

Browse files
committed
BUG: escaping <> in pandas HTML tables #2617
1 parent fe54b7b commit 1ae309d

File tree

3 files changed

+93
-52
lines changed

3 files changed

+93
-52
lines changed

pandas/core/common.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,7 @@ def _pprint_dict(seq, _nest_lvl=0):
13041304
return fmt % ", ".join(pairs)
13051305

13061306

1307-
def pprint_thing(thing, _nest_lvl=0, escape_chars=None):
1307+
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False):
13081308
"""
13091309
This function is the sanctioned way of converting objects
13101310
to a unicode representation.
@@ -1316,8 +1316,13 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None):
13161316
----------
13171317
thing : anything to be formatted
13181318
_nest_lvl : internal use only. pprint_thing() is mutually-recursive
1319-
with pprint_sequence, this argument is used to keep track of the
1320-
current nesting level, and limit it.
1319+
with pprint_sequence, this argument is used to keep track of the
1320+
current nesting level, and limit it.
1321+
escape_chars : list or dict, optional
1322+
Characters to escape. If a dict is passed the values are the
1323+
replacements
1324+
default_escapes : bool, default False
1325+
Whether the input escape characters replaces or adds to the defaults
13211326
13221327
Returns
13231328
-------
@@ -1355,7 +1360,14 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None):
13551360
'\n': r'\n',
13561361
'\r': r'\r',
13571362
}
1358-
escape_chars = escape_chars or tuple()
1363+
if isinstance(escape_chars, dict):
1364+
if default_escapes:
1365+
translate.update(escape_chars)
1366+
else:
1367+
translate = escape_chars
1368+
escape_chars = escape_chars.keys()
1369+
else:
1370+
escape_chars = escape_chars or tuple()
13591371
for c in escape_chars:
13601372
result = result.replace(c, translate[c])
13611373

pandas/core/format.py

+21-21
Original file line numberDiff line numberDiff line change
@@ -486,20 +486,11 @@ def __init__(self, formatter, classes=None):
486486
self.frame = self.fmt.frame
487487
self.columns = formatter.columns
488488
self.elements = []
489-
490-
_bold_row = self.fmt.kwds.get('bold_rows', False)
491-
_temp = '<strong>%s</strong>'
492-
493-
def _maybe_bold_row(x):
494-
if _bold_row:
495-
return ([_temp % y for y in x] if isinstance(x, tuple)
496-
else _temp % x)
497-
else:
498-
return x
499-
self._maybe_bold_row = _maybe_bold_row
489+
self.bold_rows = self.fmt.kwds.get('bold_rows', False)
500490

501491
def write(self, s, indent=0):
502-
self.elements.append(' ' * indent + com.pprint_thing(s))
492+
rs = com.pprint_thing(s)
493+
self.elements.append(' ' * indent + rs)
503494

504495
def write_th(self, s, indent=0, tags=None):
505496
if (self.fmt.col_space is not None
@@ -517,11 +508,14 @@ def _write_cell(self, s, kind='td', indent=0, tags=None):
517508
start_tag = '<%s %s>' % (kind, tags)
518509
else:
519510
start_tag = '<%s>' % kind
511+
512+
esc = {'<' : r'&lt;', '>' : r'&gt;'}
513+
rs = com.pprint_thing(s, escape_chars=esc)
520514
self.write(
521-
'%s%s</%s>' % (start_tag, com.pprint_thing(s), kind), indent)
515+
'%s%s</%s>' % (start_tag, rs, kind), indent)
522516

523517
def write_tr(self, line, indent=0, indent_delta=4, header=False,
524-
align=None, tags=None):
518+
align=None, tags=None, nindex_levels=0):
525519
if tags is None:
526520
tags = {}
527521

@@ -533,7 +527,7 @@ def write_tr(self, line, indent=0, indent_delta=4, header=False,
533527

534528
for i, s in enumerate(line):
535529
val_tag = tags.get(i, None)
536-
if header:
530+
if header or (self.bold_rows and i < nindex_levels):
537531
self.write_th(s, indent, tags=val_tag)
538532
else:
539533
self.write_td(s, indent, tags=val_tag)
@@ -683,9 +677,10 @@ def _write_regular_rows(self, fmt_values, indent):
683677

684678
for i in range(len(self.frame)):
685679
row = []
686-
row.append(self._maybe_bold_row(index_values[i]))
680+
row.append(index_values[i])
687681
row.extend(fmt_values[j][i] for j in range(ncols))
688-
self.write_tr(row, indent, self.indent_delta, tags=None)
682+
self.write_tr(row, indent, self.indent_delta, tags=None,
683+
nindex_levels=1)
689684

690685
def _write_hierarchical_rows(self, fmt_values, indent):
691686
template = 'rowspan="%d" valign="top"'
@@ -706,27 +701,32 @@ def _write_hierarchical_rows(self, fmt_values, indent):
706701
row = []
707702
tags = {}
708703

704+
sparse_offset = 0
709705
j = 0
710706
for records, v in zip(level_lengths, idx_values[i]):
711707
if i in records:
712708
if records[i] > 1:
713709
tags[j] = template % records[i]
714710
else:
711+
sparse_offset += 1
715712
continue
713+
716714
j += 1
717-
row.append(self._maybe_bold_row(v))
715+
row.append(v)
718716

719717
row.extend(fmt_values[j][i] for j in range(ncols))
720-
self.write_tr(row, indent, self.indent_delta, tags=tags)
718+
self.write_tr(row, indent, self.indent_delta, tags=tags,
719+
nindex_levels=len(levels) - sparse_offset)
721720
else:
722721
for i in range(len(frame)):
723722
idx_values = zip(*frame.index.format(sparsify=False,
724723
adjoin=False,
725724
names=False))
726725
row = []
727-
row.extend(self._maybe_bold_row(x) for x in idx_values[i])
726+
row.extend(idx_values[i])
728727
row.extend(fmt_values[j][i] for j in range(ncols))
729-
self.write_tr(row, indent, self.indent_delta, tags=None)
728+
self.write_tr(row, indent, self.indent_delta, tags=None,
729+
nindex_levels=len(frame.index.nlevels))
730730

731731

732732
def _get_level_lengths(levels):

pandas/tests/test_format.py

+56-27
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,35 @@ def test_to_html_unicode(self):
249249
df = DataFrame({'A': [u'\u03c3']})
250250
df.to_html()
251251

252+
def test_to_html_escaped(self):
253+
a = 'str<ing1'
254+
b = 'stri>ng2'
255+
256+
test_dict = {'co<l1':{a:type(a), b:type(b)},'co>l2':{a:type(a), b:type(b)}}
257+
rs = pd.DataFrame(test_dict).to_html()
258+
xp = """<table border="1" class="dataframe">
259+
<thead>
260+
<tr style="text-align: right;">
261+
<th></th>
262+
<th>co&lt;l1</th>
263+
<th>co&gt;l2</th>
264+
</tr>
265+
</thead>
266+
<tbody>
267+
<tr>
268+
<th>str&lt;ing1</th>
269+
<td> &lt;type 'str'&gt;</td>
270+
<td> &lt;type 'str'&gt;</td>
271+
</tr>
272+
<tr>
273+
<th>stri&gt;ng2</th>
274+
<td> &lt;type 'str'&gt;</td>
275+
<td> &lt;type 'str'&gt;</td>
276+
</tr>
277+
</tbody>
278+
</table>"""
279+
self.assertEqual(xp, rs)
280+
252281
def test_to_html_multiindex_sparsify(self):
253282
index = pd.MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]],
254283
names=['foo', None])
@@ -273,24 +302,24 @@ def test_to_html_multiindex_sparsify(self):
273302
</thead>
274303
<tbody>
275304
<tr>
276-
<td rowspan="2" valign="top"><strong>0</strong></td>
277-
<td><strong>0</strong></td>
305+
<th rowspan="2" valign="top">0</th>
306+
<th>0</th>
278307
<td> 0</td>
279308
<td> 1</td>
280309
</tr>
281310
<tr>
282-
<td><strong>1</strong></td>
311+
<th>1</th>
283312
<td> 2</td>
284313
<td> 3</td>
285314
</tr>
286315
<tr>
287-
<td rowspan="2" valign="top"><strong>1</strong></td>
288-
<td><strong>0</strong></td>
316+
<th rowspan="2" valign="top">1</th>
317+
<th>0</th>
289318
<td> 4</td>
290319
<td> 5</td>
291320
</tr>
292321
<tr>
293-
<td><strong>1</strong></td>
322+
<th>1</th>
294323
<td> 6</td>
295324
<td> 7</td>
296325
</tr>
@@ -326,24 +355,24 @@ def test_to_html_multiindex_sparsify(self):
326355
</thead>
327356
<tbody>
328357
<tr>
329-
<td rowspan="2" valign="top"><strong>0</strong></td>
330-
<td><strong>0</strong></td>
358+
<th rowspan="2" valign="top">0</th>
359+
<th>0</th>
331360
<td> 0</td>
332361
<td> 1</td>
333362
</tr>
334363
<tr>
335-
<td><strong>1</strong></td>
364+
<th>1</th>
336365
<td> 2</td>
337366
<td> 3</td>
338367
</tr>
339368
<tr>
340-
<td rowspan="2" valign="top"><strong>1</strong></td>
341-
<td><strong>0</strong></td>
369+
<th rowspan="2" valign="top">1</th>
370+
<th>0</th>
342371
<td> 4</td>
343372
<td> 5</td>
344373
</tr>
345374
<tr>
346-
<td><strong>1</strong></td>
375+
<th>1</th>
347376
<td> 6</td>
348377
<td> 7</td>
349378
</tr>
@@ -368,22 +397,22 @@ def test_to_html_index_formatter(self):
368397
</thead>
369398
<tbody>
370399
<tr>
371-
<td><strong>a</strong></td>
400+
<th>a</th>
372401
<td> 0</td>
373402
<td> 1</td>
374403
</tr>
375404
<tr>
376-
<td><strong>b</strong></td>
405+
<th>b</th>
377406
<td> 2</td>
378407
<td> 3</td>
379408
</tr>
380409
<tr>
381-
<td><strong>c</strong></td>
410+
<th>c</th>
382411
<td> 4</td>
383412
<td> 5</td>
384413
</tr>
385414
<tr>
386-
<td><strong>d</strong></td>
415+
<th>d</th>
387416
<td> 6</td>
388417
<td> 7</td>
389418
</tr>
@@ -795,7 +824,7 @@ def test_to_html(self):
795824
def test_to_html_with_no_bold(self):
796825
x = DataFrame({'x': randn(5)})
797826
ashtml = x.to_html(bold_rows=False)
798-
assert('<strong>' not in ashtml)
827+
assert('<strong>' not in ashtml[ashtml.find('</thead>')])
799828

800829
def test_to_html_columns_arg(self):
801830
result = self.frame.to_html(columns=['A'])
@@ -824,14 +853,14 @@ def test_to_html_multiindex(self):
824853
' </thead>\n'
825854
' <tbody>\n'
826855
' <tr>\n'
827-
' <td><strong>0</strong></td>\n'
856+
' <th>0</th>\n'
828857
' <td> a</td>\n'
829858
' <td> b</td>\n'
830859
' <td> c</td>\n'
831860
' <td> d</td>\n'
832861
' </tr>\n'
833862
' <tr>\n'
834-
' <td><strong>1</strong></td>\n'
863+
' <th>1</th>\n'
835864
' <td> e</td>\n'
836865
' <td> f</td>\n'
837866
' <td> g</td>\n'
@@ -866,14 +895,14 @@ def test_to_html_multiindex(self):
866895
' </thead>\n'
867896
' <tbody>\n'
868897
' <tr>\n'
869-
' <td><strong>0</strong></td>\n'
898+
' <th>0</th>\n'
870899
' <td> a</td>\n'
871900
' <td> b</td>\n'
872901
' <td> c</td>\n'
873902
' <td> d</td>\n'
874903
' </tr>\n'
875904
' <tr>\n'
876-
' <td><strong>1</strong></td>\n'
905+
' <th>1</th>\n'
877906
' <td> e</td>\n'
878907
' <td> f</td>\n'
879908
' <td> g</td>\n'
@@ -901,19 +930,19 @@ def test_to_html_justify(self):
901930
' </thead>\n'
902931
' <tbody>\n'
903932
' <tr>\n'
904-
' <td><strong>0</strong></td>\n'
933+
' <th>0</th>\n'
905934
' <td> 6</td>\n'
906935
' <td> 1</td>\n'
907936
' <td> 223442</td>\n'
908937
' </tr>\n'
909938
' <tr>\n'
910-
' <td><strong>1</strong></td>\n'
939+
' <th>1</th>\n'
911940
' <td> 30000</td>\n'
912941
' <td> 2</td>\n'
913942
' <td> 0</td>\n'
914943
' </tr>\n'
915944
' <tr>\n'
916-
' <td><strong>2</strong></td>\n'
945+
' <th>2</th>\n'
917946
' <td> 2</td>\n'
918947
' <td> 70000</td>\n'
919948
' <td> 1</td>\n'
@@ -935,19 +964,19 @@ def test_to_html_justify(self):
935964
' </thead>\n'
936965
' <tbody>\n'
937966
' <tr>\n'
938-
' <td><strong>0</strong></td>\n'
967+
' <th>0</th>\n'
939968
' <td> 6</td>\n'
940969
' <td> 1</td>\n'
941970
' <td> 223442</td>\n'
942971
' </tr>\n'
943972
' <tr>\n'
944-
' <td><strong>1</strong></td>\n'
973+
' <th>1</th>\n'
945974
' <td> 30000</td>\n'
946975
' <td> 2</td>\n'
947976
' <td> 0</td>\n'
948977
' </tr>\n'
949978
' <tr>\n'
950-
' <td><strong>2</strong></td>\n'
979+
' <th>2</th>\n'
951980
' <td> 2</td>\n'
952981
' <td> 70000</td>\n'
953982
' <td> 1</td>\n'

0 commit comments

Comments
 (0)