forked from CMRR-C2P/MB
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreadCMRRPhysio.py
425 lines (365 loc) · 21.6 KB
/
readCMRRPhysio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
#!/usr/bin/env python
"""
Reads and plots active (i.e. non-zero) signals from SIEMENS advanced physiological log / DICOM files
This function expects to find either a combination of individual logfiles (*_ECG.log, *_RESP.log,
*_PULS.log, *_EXT.log, *_Info.log) generated by >=R013 sequences, or a single encoded "_PHYSIO" DICOM
file generated by >=R015 sequences. You can also import the internal 'readCMRRPhysio` library function in python
to obtain active physio traces for ECG1, ECG2, ECG3, ECG4, RESP, PULS, EXT/EXT1 and EXT2 signals:
physio['UUID']: Universally unique identifier string for this measurement
physio['Freq']: Sampling frequency in Hz (= 1/clock-tick; The unit of time is clock-ticks, which normally is 2.5 ms)
physio['SliceMap']: [2 x Volumes x Slices] [1:2,:,:] = start & finish time stamp of each volume/slice
physio['ACQ']: [length = nr of samples] True if acquisition is active at this time; False if not
physio['ECG1']: [length = nr of samples] ECG signal on this channel
physio['ECG2']: [length = nr of samples] [..]
physio['ECG3']: [length = nr of samples] [..]
physio['ECG4']: [length = nr of samples] [..]
physio['RESP']: [length = nr of samples] RESP signal on this channel
physio['PULS']: [length = nr of samples] PULS signal on this channel
physio['EXT1']: [length = nr of samples] True if EXT/EXT1 signal detected; False if not
physio['EXT2']: [length = nr of samples] True if EXT2 signal detected; False if not
This function has been derived from the readCMRRPhysio.m function of https://github.com/CMRR-C2P/MB
A BIDS aware version of this code is included in the BIDScoin toolbox (https://github.com/Donders-Institute/bidscoin)
This module is ported from the Matlab code in readCMRRPhysio.m (E. Auerbach, CMRR, 2015-2023)
@author: Marcel Zwiers
"""
import struct
import logging, coloredlogs
import numpy as np
import matplotlib.pyplot as plt
from typing import Union
from pydicom import dcmread, tag
from pathlib import Path
# Defaults
LOGVERSION = 'EJA_1' # This is the file format this function expects; must match log file version
FREQ = 1 / 2.5E-3 # Sampling frequency (the unit of SIEMENS ticks is 2.5 ms)
# Set-up logging
LOGGER = logging.getLogger('readCMRRPhysio')
if not LOGGER.handlers:
coloredlogs.install(fmt='%(asctime)s - %(name)s - %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
def readparsefile(fn: Union[bytes,Path], logdatatype: str, firsttime: int=0, expectedsamples: int=0) -> tuple:
"""
Read and parse physiologal traces from the DICOM data or from individual logfiles
:param fn: Physiological data from DICOM or the basename of the physiological logfiles
:param logdatatype: Datatype that is extracted, e.g. 'ECG', 'RESP', 'PULS' or 'EXT'. Additional meta data is extracted if 'ACQUISITION_INFO'
:param firsttime: Value from readparsefile('ACQUISITION_INFO', ..) that has to be passed for parsing other logdatatypes
:param expectedsamples: Number of samples of the parsed traces
:return: traces, UUID[, nrslices, nrvolumes, firsttime, lasttime, nrechoes] ([..] if logdatatype=='ACQUISITION_INFO')
"""
# Echoes parameter was not added until R015a, so prefill a default value for compatibility with older data
nrechoes = 1
traces = None
# Parse the input data into a list of lines
if isinstance(fn, bytes):
# If fn is a bytestring, we read it directly from DICOM
lines = fn.decode('UTF-8').splitlines()
elif isinstance(fn, Path):
# Otherwise, fn must be a filename
LOGGER.info(f"Reading physio log-file: {fn}")
lines = fn.read_text().splitlines()
else:
LOGGER.error(f"Wrong input {fn}: {type(fn)}"); raise FileNotFoundError
# Extract the meta data and physiological traces
LOGGER.info(f"Parsing {logdatatype} data...")
for line in [line for line in lines if line]:
# Strip any leading and trailing whitespace and comments
line = line.split('#')[0].strip()
if '=' in line:
# This is an assigned value; parse it
varname, value = [item.strip() for item in line.split('=')]
if varname == 'UUID':
UUID = value
if varname == 'LogVersion':
if value != LOGVERSION:
LOGGER.error(f"File format [{value}] not supported by this function (expected [{LOGVERSION}])"); raise NotImplementedError
if varname == 'LogDataType':
if value != logdatatype:
LOGGER.error(f"Expected [{logdatatype}] data, found [{value}]? Check filenames?"); raise RuntimeError
if varname == 'SampleTime':
if logdatatype == 'ACQUISITION_INFO':
LOGGER.error(f"Invalid [{varname}] parameter found"); raise RuntimeError
sampletime = int(value)
if varname == 'NumSlices':
if logdatatype != 'ACQUISITION_INFO':
LOGGER.error(f"Invalid [{varname}] parameter found"); raise RuntimeError
nrslices = int(value)
if varname == 'NumVolumes':
if logdatatype != 'ACQUISITION_INFO':
LOGGER.error(f"Invalid [{varname}] parameter found"); raise RuntimeError
nrvolumes = int(value)
if varname == 'FirstTime':
if logdatatype != 'ACQUISITION_INFO':
LOGGER.error(f"Invalid [{varname}] parameter found"); raise RuntimeError
firsttime = int(value)
if varname == 'LastTime':
if logdatatype != 'ACQUISITION_INFO':
LOGGER.error(f"Invalid [{varname}] parameter found"); raise RuntimeError
lasttime = int(value)
if varname == 'NumEchoes':
if logdatatype != 'ACQUISITION_INFO':
LOGGER.error(f"Invalid [{varname}] parameter found"); raise RuntimeError
nrechoes = int(value)
else:
# This must be data; currently it is 3-5 columns, pad it with '0' if needed to always have 5 columns
dataitems = line.split()
dataitems = [dataitems[n] if n < len(dataitems) else '0' for n in range(5)]
# If the first column isn't numeric, it is probably the header
if not dataitems[0].isdigit():
continue
# Store data in output array based on the file type
if logdatatype == 'ACQUISITION_INFO':
if ('nrvolumes' not in locals() or nrvolumes < 1 or
'nrslices' not in locals() or nrslices < 1 or
'nrechoes' not in locals() or nrechoes < 1):
LOGGER.error('Failed reading ACQINFO header'); raise RuntimeError
if nrvolumes == 1:
# This is probably R016a or earlier diffusion data, where NumVolumes is 1 (incorrect)
nrvolumes = (len(lines) - 11) / (nrslices * nrechoes)
LOGGER.warning(f"Found NumVolumes = 1; correcting to {nrvolumes} for R016a and earlier diffusion data")
if traces is None:
traces = np.zeros((2, nrvolumes, nrslices, nrechoes), dtype=int)
curvol = int(dataitems[0])
curslc = int(dataitems[1])
curstart = int(dataitems[2])
curfinish = int(dataitems[3])
if len(dataitems[4]):
cureco = int(dataitems[4])
if traces[:, curvol, curslc, cureco].any():
LOGGER.error(f"Received duplicate timing data for vol{curvol} slc{curslc} eco{cureco}"); raise RuntimeError
else:
cureco = 0
if traces[:, curvol, curslc, cureco]:
LOGGER.warning(f"Received duplicate timing data for vol{curvol} slc{curslc} (ignore for pre-R015a multi-echo data)")
traces[:, curvol, curslc, cureco] = [curstart, curfinish]
else:
curstart = int(dataitems[0]) - firsttime
curchannel = dataitems[1]
curvalue = int(dataitems[2])
if logdatatype == 'ECG':
if traces is None:
traces = np.zeros((expectedsamples, 4), dtype=int)
if curchannel not in ['ECG1', 'ECG2', 'ECG3', 'ECG4']:
LOGGER.error(f"Invalid ECG channel ID [{curchannel}]"); raise RuntimeError
chaidx = ['ECG1', 'ECG2', 'ECG3', 'ECG4'].index(curchannel)
elif logdatatype == 'EXT':
if traces is None:
traces = np.zeros((expectedsamples, 2), dtype=int)
if curchannel not in ['EXT', 'EXT1', 'EXT2']:
LOGGER.error(f"Invalid EXT channel ID [{curchannel}]"); raise RuntimeError
if curchannel == 'EXT':
chaidx = 0
else:
chaidx = ['EXT1', 'EXT2'].index(curchannel)
else:
if traces is None:
traces = np.zeros((expectedsamples, 1), dtype=int)
chaidx = 0
traces[curstart:curstart+int(sampletime), chaidx] = curvalue
if logdatatype == 'ACQUISITION_INFO':
traces = traces - firsttime
return traces, UUID, nrslices, nrvolumes, firsttime, lasttime, nrechoes
else:
return traces, UUID
def plotphysio(physio:dict, showsamples: int=1000):
"""Plot the samples of the physiological traces in a rudimentary way. If too large, only plot the middle 'showsamples' ticks"""
miny, maxy = 5E4, -5E4 # Actual range is 0..4095
nrsamples = len(physio['ACQ'])
starttick = 0
endtick = nrsamples
if nrsamples > showsamples:
starttick = int(nrsamples / 2) - int(showsamples / 2)
endtick = starttick + showsamples
ticks = np.arange(starttick, endtick)
def plot_trace(logdatatype, scale, color):
"""Plot the trace and update minimum and maximum values"""
if logdatatype not in physio: return
nonlocal miny, maxy
trace = physio[logdatatype][starttick:endtick]
mintrace = int(min(trace)) # type(ACQ)==bool
maxtrace = int(max(trace))
if scale and (miny != mintrace or maxy != maxtrace) and mintrace != maxtrace:
trace = (trace - mintrace) * (maxy - miny)/(maxtrace - mintrace) + miny
else:
miny = min(miny, mintrace) # Update the (non-local) minimum
maxy = max(maxy, maxtrace) # Update the (non-local) maximum
if logdatatype == 'ACQ':
plt.fill_between(ticks, trace, miny, color=color, label=logdatatype)
else:
plt.plot(ticks, trace, color=color, label=logdatatype)
plt.figure(num=f"readCMRRPhysio: UUID {physio['UUID']}")
plot_trace('ECG1', False, 'green')
plot_trace('ECG2', False, 'green')
plot_trace('ECG3', False, 'green')
plot_trace('ECG4', False, 'green')
plot_trace('RESP', False, 'blue')
plot_trace('PULS', False, 'red')
plot_trace('EXT1', True, 'cyan')
plot_trace('EXT2', True, 'olive')
plot_trace('ACQ', True, 'lightgray')
plt.legend(loc='lower right')
plt.axis([starttick, endtick-1, miny - maxy*0.05, maxy + maxy*0.05])
plt.title('Physiological traces')
plt.xlabel('Samples')
plt.show()
def readCMRRPhysio(fn: Union[str,Path], showsamples: int=0, outputpath: str='') -> dict:
"""
Read and plots active (i.e. non-zero) signals from SIEMENS advanced physiological log / DICOM files (>=R013, >=VD13A)
This function expects to find either a combination of individual logfiles (*_ECG.log, *_RESP.log, *_PULS.log, *_EXT.log,
*_Info.log) generated by >=R013 sequences, or a single encoded "_PHYSIO" DICOM file generated by >=R015 sequences. It
returns active (i.e. non-zero) physio traces for ECG1, ECG2, ECG3, ECG4, RESP, PULS, EXT/EXT1 and EXT2 signals:
physio['UUID']: Universally unique identifier string for this measurement
physio['Freq']: Sampling frequency in Hz (= 1/clock-tick; The unit of time is clock-ticks, which normally is 2.5 ms)
physio['SliceMap']: [2 x Volumes x Slices] [1:2,:,:] = start & finish time stamp of each volume/slice
physio['ACQ']: [length = nr of samples] True if acquisition is active at this time; False if not
physio['ECG1']: [length = nr of samples] ECG signal on this channel
physio['ECG2']: [length = nr of samples] [..]
physio['ECG3']: [length = nr of samples] [..]
physio['ECG4']: [length = nr of samples] [..]
physio['RESP']: [length = nr of samples] RESP signal on this channel
physio['PULS']: [length = nr of samples] PULS signal on this channel
physio['EXT1']: [length = nr of samples] True if EXT/EXT1 signal detected; False if not
physio['EXT2']: [length = nr of samples] True if EXT2 signal detected; False if not
:param fn: Either the fullpath of the DICOM file or the basename of the PHYSIO logfiles (fullpath without suffix and file extension, e.g. 'foo/bar/Physio_DATE_TIME_UUID')
:param showsamples: The nr of plotted samples of the physiological traces (nothing is plotted if showsamples==0)
:param outputpath: If the input is in DICOM format and the output path is specified, then the extracted log data will be written to disk
:return: The active (non-zero) physio traces for ECG1, ECG2, ECG3, ECG4, RESP, PULS, EXT1 and EXT2 signals
"""
foundECG = foundRESP = foundPULS = foundEXT = False
# Check input
fn = Path(fn).resolve()
# First, check if the input points to a valid DICOM file. If so, extract the physiological data
if fn.is_file():
LOGGER.info(f"Reading physio DICOM file: {fn}")
dicomdata = dcmread(fn, force=True) # The DICM tag may be missing for anonymized DICOM files
manufacturer = dicomdata.get('Manufacturer')
physiotag = tag.Tag('7fe1', '1010')
if manufacturer and manufacturer != 'SIEMENS':
LOGGER.warning(f"Unsupported manufacturer: '{manufacturer}', this function is designed for SIEMENS advanced physiological logging data")
if (dicomdata.get('ImageType')==['ORIGINAL','PRIMARY','RAWDATA','PHYSIO'] and dicomdata.get(physiotag).private_creator=='SIEMENS CSA NON-IMAGE') or \
(dicomdata.get('ImageType')==['ORIGINAL','PRIMARY','RAWDATA', 'NONE'] and dicomdata.get('SpectroscopyData')) or \
(dicomdata.get('ImageType') in (['ORIGINAL','PRIMARY','RAWDATA','NONE'], ['ORIGINAL','PRIMARY','OTHER','NONE']) and dicomdata.get(physiotag).private_creator=='SIEMENS MR IMA'):
if dicomdata.get('SpectroscopyData'):
physiodata = dicomdata['SpectroscopyData'].value # XA30-bug. NB: The original Matlab code casts this to uint8 (i.e. to struct.unpack('<'+len(physiodata)*'B', physiodata)
else:
physiodata = dicomdata[physiotag].value
rows = int(dicomdata.AcquisitionNumber)
columns = len(physiodata)/rows
nrfiles = columns/1024
if columns % 1 or nrfiles % 1:
LOGGER.error(f"Invalid image size: [rows x columns] = [{rows} x {columns}]"); raise RuntimeError
# Encoded DICOM format: columns = 1024*nrfiles
# first row: uint32 datalen, uint32 filenamelen, char[filenamelen] filename
# remaining rows: char[datalen] data
for idx in range(int(nrfiles)):
filedata = physiodata[idx*rows*1024:(idx+1)*rows*1024]
datalen = struct.unpack('<L', filedata[0:4])[0]
filenamelen = struct.unpack('<L', filedata[4:8])[0]
filename = filedata[8:8+filenamelen].decode('UTF-8')
logdata = filedata[1024:1024+datalen]
LOGGER.info(f"Decoded: {filename}")
if filename.endswith('_Info.log'):
fnINFO = logdata
elif filename.endswith('_ECG.log'):
fnECG = logdata
foundECG = True
elif filename.endswith('_RESP.log'):
fnRESP = logdata
foundRESP = True
elif filename.endswith('_PULS.log'):
fnPULS = logdata
foundPULS = True
elif filename.endswith('_EXT.log'):
fnEXT = logdata
foundEXT = True
if outputpath:
outputfile = Path(outputpath)/filename
LOGGER.info(f"Writing physio data to: {outputfile}")
outputfile.write_text(logdata.decode('UTF-8'))
else:
LOGGER.error(f"{fn} is not a valid DICOM format file"); raise RuntimeError
# If we don't have an encoded DICOM, check what text log files we have
else:
fnINFO = fn.with_name(fn.name + '_Info.log')
fnECG = fn.with_name(fn.name + '_ECG.log')
fnRESP = fn.with_name(fn.name + '_RESP.log')
fnPULS = fn.with_name(fn.name + '_PULS.log')
fnEXT = fn.with_name(fn.name + '_EXT.log')
if not fnINFO.is_file():
LOGGER.error(f"{fnINFO} not found"); raise FileNotFoundError
foundECG = fnECG.is_file()
foundRESP = fnRESP.is_file()
foundPULS = fnPULS.is_file()
foundEXT = fnEXT.is_file()
if not foundECG and not foundRESP and not foundPULS and not foundEXT:
LOGGER.error('No data files (ECG/RESP/PULS/EXT) found'); raise FileNotFoundError
# Read in and/or parse the data
slicemap, UUID1, nrslices, nrvolumes, firsttime, lasttime, nrechoes = readparsefile(fnINFO, 'ACQUISITION_INFO')
if lasttime <= firsttime:
LOGGER.error(f"Last timestamp {lasttime} is not greater than first timestamp {firsttime}, aborting..."); raise RuntimeError
actualsamples = lasttime - firsttime + 1
expectedsamples = actualsamples + 8 # Some padding at the end for worst case EXT sample at last timestamp
if foundECG:
ECG, UUID2 = readparsefile(fnECG, 'ECG', firsttime, expectedsamples)
if UUID1 != UUID2:
LOGGER.error('UUID mismatch between Info and ECG files'); raise RuntimeError
if foundRESP:
RESP, UUID3 = readparsefile(fnRESP, 'RESP', firsttime, expectedsamples)
if UUID1 != UUID3:
LOGGER.error('UUID mismatch between Info and RESP files'); raise RuntimeError
if foundPULS:
PULS, UUID4 = readparsefile(fnPULS, 'PULS', firsttime, expectedsamples)
if UUID1 != UUID4:
LOGGER.error('UUID mismatch between Info and PULS files'); raise RuntimeError
if foundEXT:
EXT, UUID5 = readparsefile(fnEXT, 'EXT', firsttime, expectedsamples)
if UUID1 != UUID5:
LOGGER.error('UUID mismatch between Info and EXT files'); raise RuntimeError
LOGGER.info(f"Slices in scan: {nrslices}")
LOGGER.info(f"Volumes in scan: {nrvolumes}")
LOGGER.info(f"Echoes per slc/vol: {nrechoes}")
LOGGER.info(f"First timestamp: {firsttime}")
LOGGER.info(f"Last timestamp: {lasttime}")
LOGGER.info(f"Total scan duration: {actualsamples} ticks = {actualsamples / FREQ:.4f} s")
LOGGER.info('Formatting ACQ data...')
ACQ = np.full((expectedsamples, 1), False)
for v in range(nrvolumes):
for s in range(nrslices):
for e in range(nrechoes):
ACQ[slicemap[0,v,s,e]:slicemap[1,v,s,e]+1, 0] = True
# Only return active (nonzero) physio traces
physio = dict()
physio['UUID'] = UUID1
physio['Freq'] = FREQ
physio['SliceMap'] = slicemap
physio['ACQ'] = ACQ[:,0]
if foundECG and ECG.any():
if sum(ECG[:,0]): physio['ECG1'] = ECG[:,0]
if sum(ECG[:,1]): physio['ECG2'] = ECG[:,1]
if sum(ECG[:,2]): physio['ECG3'] = ECG[:,2]
if sum(ECG[:,3]): physio['ECG4'] = ECG[:,3]
if foundRESP and RESP.any():
if sum(RESP): physio['RESP'] = RESP[:,0]
if foundPULS and PULS.any():
if sum(PULS): physio['PULS'] = PULS[:,0]
if foundEXT and EXT.any():
if sum(EXT[:,0]): physio['EXT1'] = EXT[:,0]
if sum(EXT[:,1]): physio['EXT2'] = EXT[:,1]
# Plot the data if requested
if showsamples > 0:
plotphysio(physio, showsamples)
return physio
def main():
"""Console script usage"""
# Parse the input arguments and run readCMRRPhysio(args)
import argparse
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
description=__doc__,
epilog='examples:\n'
' readCMRRPhysio.py /project/3022026.01/sub-001/MR000000.dcm\n'
' readCMRRPhysio.py -s 2000 /project/3022026.01/sub-001/Physio_20200428_142451_007e910e-02d9-4d7a-8fdb-8e3568be8322\n ')
parser.add_argument('filename', help="Either the fullpath of the DICOM file or the basename of the PHYSIO logfiles (fullpath without suffix and file extension, e.g. 'foo/bar/Physio_DATE_TIME_UUID'")
parser.add_argument('-s','--showsamples', help='The nr of plotted samples of the physiological traces (default: 1000, nothing is plotted if 0)', default=1000, type=int)
parser.add_argument('-o','--outputpath', help='If the input is in DICOM format and the output path is specified, then the extracted log data will be written to disk')
args = parser.parse_args()
readCMRRPhysio(fn=args.filename, showsamples=args.showsamples, outputpath=args.outputpath)
if __name__ == "__main__":
main()