Skip to content

Commit

Permalink
add unicode roundtrip for FIF (#12080)
Browse files Browse the repository at this point in the history
  • Loading branch information
drammock authored Oct 6, 2023
1 parent 6aca4ec commit 23fa43c
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/changes/devel.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ Bugs
- Fix parsing of eye-link :class:`~mne.Annotations` when ``apply_offsets=False`` is provided to :func:`~mne.io.read_raw_eyelink` (:gh:`12003` by `Mathieu Scheltienne`_)
- Correctly prune channel-specific :class:`~mne.Annotations` when creating :class:`~mne.Epochs` without the channel(s) included in the channel specific annotations (:gh:`12010` by `Mathieu Scheltienne`_)
- Fix :func:`~mne.viz.plot_volume_source_estimates` with :class:`~mne.VolSourceEstimate` which include a list of vertices (:gh:`12025` by `Mathieu Scheltienne`_)
- Add support for non-ASCII characters in Annotations, Evoked comments, etc when saving to FIFF format (:gh:`12080` by `Daniel McCloy`_)
- Correctly handle passing ``"eyegaze"`` or ``"pupil"`` to :meth:`mne.io.Raw.pick` (:gh:`12019` by `Scott Huberty`_)

API changes
Expand Down
7 changes: 6 additions & 1 deletion mne/_fiff/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
# License: BSD-3-Clause

from functools import partial
import html
import struct
import re

import numpy as np
from scipy.sparse import csc_matrix, csr_matrix
Expand Down Expand Up @@ -265,7 +267,10 @@ def _read_string(fid, tag, shape, rlims):
"""Read a string tag."""
# Always decode to ISO 8859-1 / latin1 (FIFF standard).
d = _frombuffer_rows(fid, tag.size, dtype=">c", shape=shape, rlims=rlims)
return str(d.tobytes().decode("latin1", "ignore"))
string = str(d.tobytes().decode("latin1", "ignore"))
if re.search(r"&#[0-9a-fA-F]{6};", string):
string = html.unescape(string)
return string


def _read_complex_float(fid, tag, shape, rlims):
Expand Down
5 changes: 4 additions & 1 deletion mne/_fiff/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,10 @@ def write_julian(fid, kind, data):

def write_string(fid, kind, data):
"""Write a string tag."""
str_data = str(data).encode("latin1")
try:
str_data = str(data).encode("latin1")
except UnicodeEncodeError:
str_data = str(data).encode("latin1", errors="xmlcharrefreplace")
data_size = len(str_data) # therefore compute size here
my_dtype = ">a" # py2/3 compatible on writing -- don't ask me why
if data_size > 0:
Expand Down
6 changes: 6 additions & 0 deletions mne/tests/test_evoked.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,12 @@ def test_io_evoked(tmp_path):
ave_complex = read_evokeds(fname_temp)[0]
assert_allclose(ave.data, ave_complex.data.imag)

# test non-ascii comments (gh 11684)
aves1[0].comment = "🙃"
write_evokeds(tmp_path / "evoked-ave.fif", aves1, overwrite=True)
aves1_read = read_evokeds(tmp_path / "evoked-ave.fif")[0]
assert aves1_read.comment == aves1[0].comment

# test warnings on bad filenames
fname2 = tmp_path / "test-bad-name.fif"
with pytest.warns(RuntimeWarning, match="-ave.fif"):
Expand Down

0 comments on commit 23fa43c

Please sign in to comment.