From ebb5e9fc616726b4665cc42d7dc4926f36eb7d83 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Thu, 5 Oct 2023 23:06:22 +0300 Subject: [PATCH 1/3] allow unicode roundtrip --- mne/_fiff/tag.py | 7 ++++++- mne/_fiff/write.py | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/mne/_fiff/tag.py b/mne/_fiff/tag.py index 87ea6ca9640..f64d16e1ca6 100644 --- a/mne/_fiff/tag.py +++ b/mne/_fiff/tag.py @@ -4,7 +4,9 @@ # License: BSD-3-Clause from functools import partial +import html import struct +import re import numpy as np from scipy.sparse import csc_matrix, csr_matrix @@ -265,7 +267,10 @@ def _read_string(fid, tag, shape, rlims): """Read a string tag.""" # Always decode to ISO 8859-1 / latin1 (FIFF standard). d = _frombuffer_rows(fid, tag.size, dtype=">c", shape=shape, rlims=rlims) - return str(d.tobytes().decode("latin1", "ignore")) + string = str(d.tobytes().decode("latin1", "ignore")) + if re.search(r"&#[0-9a-fA-F]{6};", string): + string = html.unescape(string) + return string def _read_complex_float(fid, tag, shape, rlims): diff --git a/mne/_fiff/write.py b/mne/_fiff/write.py index c66e3245eb5..b8ed1d2b1d8 100644 --- a/mne/_fiff/write.py +++ b/mne/_fiff/write.py @@ -128,7 +128,10 @@ def write_julian(fid, kind, data): def write_string(fid, kind, data): """Write a string tag.""" - str_data = str(data).encode("latin1") + try: + str_data = str(data).encode("latin1") + except UnicodeEncodeError: + str_data = str(data).encode("latin1", errors="xmlcharrefreplace") data_size = len(str_data) # therefore compute size here my_dtype = ">a" # py2/3 compatible on writing -- don't ask me why if data_size > 0: From 6dcb1861df3c4dac192cea0644443bc35d62295a Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Thu, 5 Oct 2023 23:17:28 +0300 Subject: [PATCH 2/3] add test --- mne/tests/test_evoked.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mne/tests/test_evoked.py b/mne/tests/test_evoked.py index 7c45bab7a7e..040978c9ff9 100644 --- a/mne/tests/test_evoked.py +++ b/mne/tests/test_evoked.py @@ -263,6 +263,12 @@ def test_io_evoked(tmp_path): ave_complex = read_evokeds(fname_temp)[0] assert_allclose(ave.data, ave_complex.data.imag) + # test non-ascii comments (gh 11684) + aves1[0].comment = "🙃" + write_evokeds(tmp_path / "evoked-ave.fif", aves1, overwrite=True) + aves1_read = read_evokeds(tmp_path / "evoked-ave.fif")[0] + assert aves1_read.comment == aves1[0].comment + # test warnings on bad filenames fname2 = tmp_path / "test-bad-name.fif" with pytest.warns(RuntimeWarning, match="-ave.fif"): From 6cc5ca2d7f48ab310292c77be5f540befcf4ccc7 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Thu, 5 Oct 2023 23:19:51 +0300 Subject: [PATCH 3/3] changelog --- doc/changes/devel.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/changes/devel.rst b/doc/changes/devel.rst index ff753de08d4..8fc12edc619 100644 --- a/doc/changes/devel.rst +++ b/doc/changes/devel.rst @@ -65,6 +65,7 @@ Bugs - Fix parsing of eye-link :class:`~mne.Annotations` when ``apply_offsets=False`` is provided to :func:`~mne.io.read_raw_eyelink` (:gh:`12003` by `Mathieu Scheltienne`_) - Correctly prune channel-specific :class:`~mne.Annotations` when creating :class:`~mne.Epochs` without the channel(s) included in the channel specific annotations (:gh:`12010` by `Mathieu Scheltienne`_) - Fix :func:`~mne.viz.plot_volume_source_estimates` with :class:`~mne.VolSourceEstimate` which include a list of vertices (:gh:`12025` by `Mathieu Scheltienne`_) +- Add support for non-ASCII characters in Annotations, Evoked comments, etc when saving to FIFF format (:gh:`12080` by `Daniel McCloy`_) - Correctly handle passing ``"eyegaze"`` or ``"pupil"`` to :meth:`mne.io.Raw.pick` (:gh:`12019` by `Scott Huberty`_) API changes