From 759eecc713add621766e3cc2e22a76d9b57e5738 Mon Sep 17 00:00:00 2001 From: williamsrms Date: Wed, 18 Dec 2019 23:11:55 -0500 Subject: [PATCH 1/3] Initial support for Python 3 (as per 2to3 recommendations) --- anonymize-slide.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/anonymize-slide.py b/anonymize-slide.py index 22f462c..1ac4123 100755 --- a/anonymize-slide.py +++ b/anonymize-slide.py @@ -22,9 +22,9 @@ # Boston, MA 02110-1301 USA. # -from __future__ import division -from ConfigParser import RawConfigParser -from cStringIO import StringIO + +from configparser import RawConfigParser +from io import StringIO from glob import glob from optparse import OptionParser import os @@ -110,7 +110,7 @@ def __init__(self, path): # the first directory is beyond 4 GB. if NDPI_MAGIC in directory.entries: if DEBUG: - print 'Enabling NDPI mode.' + print('Enabling NDPI mode.') self._ndpi = True self.directories.append(directory) if not self.directories: @@ -180,7 +180,7 @@ def delete(self, expected_prefix=None): for offset, length in zip(offsets, lengths): offset = self._fh.near_pointer(self._out_pointer_offset, offset) if DEBUG: - print 'Zeroing', offset, 'for', length + print('Zeroing', offset, 'for', length) self._fh.seek(offset) if expected_prefix: buf = self._fh.read(len(expected_prefix)) @@ -191,7 +191,7 @@ def delete(self, expected_prefix=None): # Remove directory if DEBUG: - print 'Deleting directory', self._number + print('Deleting directory', self._number) self._fh.seek(self._out_pointer_offset) out_pointer = self._fh.read_fmt('D') self._fh.seek(self._in_pointer_offset) @@ -326,9 +326,9 @@ def _zero_record(self, record): do_truncate = (fh.tell() == offset + length) if DEBUG: if do_truncate: - print 'Truncating', path, 'to', offset + print('Truncating', path, 'to', offset) else: - print 'Zeroing', path, 'at', offset, 'for', length + print('Zeroing', path, 'at', offset, 'for', length) fh.seek(offset) buf = fh.read(len(JPEG_SOI)) if buf != JPEG_SOI: @@ -341,7 +341,7 @@ def _zero_record(self, record): def _delete_index_record(self, record): if DEBUG: - print 'Deleting record', record + print('Deleting record', record) with open(self._indexfile, 'r+b') as fh: entries_to_move = len(self._level_list) - record - 1 if entries_to_move == 0: @@ -368,35 +368,35 @@ def _hier_keys_for_level(self, level): def _rename_section(self, old, new): if self._dat.has_section(old): if DEBUG: - print '[%s] -> [%s]' % (old, new) + print('[%s] -> [%s]' % (old, new)) self._dat.add_section(new) for k, v in self._dat.items(old): self._dat.set(new, k, v) self._dat.remove_section(old) elif DEBUG: - print '[%s] does not exist' % old + print('[%s] does not exist' % old) def _delete_section(self, section): if DEBUG: - print 'Deleting [%s]' % section + print('Deleting [%s]' % section) self._dat.remove_section(section) def _set_key(self, section, key, value): if DEBUG: prev = self._dat.get(section, key) - print '[%s] %s: %s -> %s' % (section, key, prev, value) + print('[%s] %s: %s -> %s' % (section, key, prev, value)) self._dat.set(section, key, value) def _rename_key(self, section, old, new): if DEBUG: - print '[%s] %s -> %s' % (section, old, new) + print('[%s] %s -> %s' % (section, old, new)) v = self._dat.get(section, old) self._dat.remove_option(section, old) self._dat.set(section, new, v) def _delete_key(self, section, key): if DEBUG: - print 'Deleting [%s] %s' % (section, key) + print('Deleting [%s] %s' % (section, key)) self._dat.remove_option(section, key) def _write(self): @@ -465,7 +465,7 @@ def __init__(self, dat, layer_id, level_id, record): def accept(filename, format): if DEBUG: - print filename + ':', format + print(filename + ':', format) def do_aperio_svs(filename): @@ -551,10 +551,10 @@ def _main(): pass else: raise IOError('Unrecognized file type') - except Exception, e: + except Exception as e: if DEBUG: raise - print >>sys.stderr, '%s: %s' % (filename, str(e)) + sys.stderr.write('%s: %s %s' % (filename, str(e), os.linesep)) exit_code = 1 sys.exit(exit_code) From 194b0b4aea993cd829933e3bd13e1347a2d38a13 Mon Sep 17 00:00:00 2001 From: williamsrms Date: Wed, 15 Jan 2020 13:31:02 -0500 Subject: [PATCH 2/3] Upgrade to Python 3 --- anonymize-slide.py | 82 +++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/anonymize-slide.py b/anonymize-slide.py index 1ac4123..cd8ffdf 100755 --- a/anonymize-slide.py +++ b/anonymize-slide.py @@ -24,7 +24,7 @@ from configparser import RawConfigParser -from io import StringIO +import io from glob import glob from optparse import OptionParser import os @@ -54,9 +54,9 @@ NDPI_SOURCELENS = 65421 # Format headers -LZW_CLEARCODE = '\x80' -JPEG_SOI = '\xff\xd8' -UTF8_BOM = '\xef\xbb\xbf' +LZW_CLEARCODE = b'\x80' +JPEG_SOI = b'\xff\xd8' +UTF8_BOM = b'\xef\xbb\xbf' # MRXS MRXS_HIERARCHICAL = 'HIERARCHICAL' @@ -67,15 +67,20 @@ class UnrecognizedFile(Exception): pass -class TiffFile(file): +class TiffFile(object): def __init__(self, path): - file.__init__(self, path, 'r+b') + mode = 'r+b' + self.file = io.open(path, mode) + self.close_file = (self.file is not path) # Check header, decide endianness - endian = self.read(2) - if endian == 'II': + endian = self.file.read(2) + endianII = bytes('II', 'utf-8') + endianMM = bytes('MM', 'utf-8') + + if endian == endianII: self._fmt_prefix = '<' - elif endian == 'MM': + elif endian == endianMM: self._fmt_prefix = '>' else: raise UnrecognizedFile @@ -97,11 +102,11 @@ def __init__(self, path): # Read directories self.directories = [] while True: - in_pointer_offset = self.tell() + in_pointer_offset = self.file.tell() directory_offset = self.read_fmt('D') if directory_offset == 0: break - self.seek(directory_offset) + self.file.seek(directory_offset) directory = TiffDirectory(self, len(self.directories), in_pointer_offset) if not self.directories and not self._bigtiff: @@ -116,6 +121,27 @@ def __init__(self, path): if not self.directories: raise IOError('No directories') + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + if (not self.close_file): + return # do nothing + # clean up + exit = getattr(self.file, '__exit__', None) + if exit is not None: + return exit(*args, **kwargs) + else: + exit = getattr(self.file, 'close', None) + if exit is not None: + exit() + + def __getattr__(self, attr): + return getattr(self.file, attr) + + def __iter__(self): + return iter(self.file) + def _convert_format(self, fmt): # Format strings can have special characters: # y: 16-bit signed on little TIFF, 64-bit signed on BigTIFF @@ -124,11 +150,11 @@ def _convert_format(self, fmt): # Z: 32-bit unsigned on little TIFF, 64-bit unsigned on BigTIFF # D: 32-bit unsigned on little TIFF, 64-bit unsigned on BigTIFF/NDPI if self._bigtiff: - fmt = fmt.translate(string.maketrans('yYzZD', 'qQqQQ')) + fmt = fmt.translate(str.maketrans('yYzZD', 'qQqQQ')) elif self._ndpi: - fmt = fmt.translate(string.maketrans('yYzZD', 'hHiIQ')) + fmt = fmt.translate(str.maketrans('yYzZD', 'hHiIQ')) else: - fmt = fmt.translate(string.maketrans('yYzZD', 'hHiII')) + fmt = fmt.translate(str.maketrans('yYzZD', 'hHiII')) return self._fmt_prefix + fmt def fmt_size(self, fmt): @@ -183,24 +209,25 @@ def delete(self, expected_prefix=None): print('Zeroing', offset, 'for', length) self._fh.seek(offset) if expected_prefix: - buf = self._fh.read(len(expected_prefix)) + buf = self._fh.file.read(len(expected_prefix)) if buf != expected_prefix: raise IOError('Unexpected data in image strip') - self._fh.seek(offset) - self._fh.write('\0' * length) + self._fh.file.seek(offset) + write_byte = b'\0' + self._fh.file.write(write_byte * length) # Remove directory if DEBUG: print('Deleting directory', self._number) - self._fh.seek(self._out_pointer_offset) + self._fh.file.seek(self._out_pointer_offset) out_pointer = self._fh.read_fmt('D') - self._fh.seek(self._in_pointer_offset) + self._fh.file.seek(self._in_pointer_offset) self._fh.write_fmt('D', out_pointer) class TiffEntry(object): def __init__(self, fh): - self.start = fh.tell() + self.start = fh.file.tell() self.tag, self.type, self.count, self.value_offset = \ fh.read_fmt('HHZZ') self._fh = fh @@ -225,15 +252,16 @@ def value(self): len = self._fh.fmt_size(fmt) if len <= self._fh.fmt_size('Z'): # Inline value - self._fh.seek(self.start + self._fh.fmt_size('HHZ')) + self._fh.file.seek(self.start + self._fh.fmt_size('HHZ')) else: # Out-of-line value - self._fh.seek(self._fh.near_pointer(self.start, self.value_offset)) + self._fh.file.seek(self._fh.near_pointer(self.start, self.value_offset)) items = self._fh.read_fmt(fmt, force_list=True) if self.type == ASCII: - if items[-1] != '\0': + utf8_zero = bytes('\0', 'utf-8') + if items[-1] != utf8_zero: raise ValueError('String not null-terminated') - return ''.join(items[:-1]) + return b''.join(items[:-1]) else: return items @@ -473,7 +501,8 @@ def do_aperio_svs(filename): # Check for SVS file try: desc0 = fh.directories[0].entries[IMAGE_DESCRIPTION].value() - if not desc0.startswith('Aperio'): + aperio_bytes = bytes('Aperio', 'utf-8') + if not desc0.startswith(aperio_bytes): raise UnrecognizedFile except KeyError: raise UnrecognizedFile @@ -482,7 +511,8 @@ def do_aperio_svs(filename): # Find and delete label for directory in fh.directories: lines = directory.entries[IMAGE_DESCRIPTION].value().splitlines() - if len(lines) >= 2 and lines[1].startswith('label '): + label_bytes = b'label ' + if len(lines) >= 2 and lines[1].startswith(label_bytes): directory.delete(expected_prefix=LZW_CLEARCODE) break else: From bb091b4fb9961acecfbc88abaa7ea75e67ca4b83 Mon Sep 17 00:00:00 2001 From: williamsrms Date: Fri, 17 Jan 2020 21:54:18 -0500 Subject: [PATCH 3/3] Upgrade MRXS handling to Python3 --- anonymize-slide.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/anonymize-slide.py b/anonymize-slide.py index cd8ffdf..f51d66d 100755 --- a/anonymize-slide.py +++ b/anonymize-slide.py @@ -278,11 +278,12 @@ def __init__(self, filename): self._dat = RawConfigParser() self._dat.optionxform = str try: - with open(self._slidedatfile, 'rb') as fh: + with open(self._slidedatfile, 'rb') as fh_prewrapped: + fh = io.TextIOWrapper(fh_prewrapped) self._have_bom = (fh.read(len(UTF8_BOM)) == UTF8_BOM) if not self._have_bom: fh.seek(0) - self._dat.readfp(fh) + self._dat.read_file(fh) except IOError: raise UnrecognizedFile @@ -389,7 +390,10 @@ def _delete_index_record(self, record): def _hier_keys_for_level(self, level): ret = [] for k, _ in self._dat.items(MRXS_HIERARCHICAL): - if k == level.key_prefix or k.startswith(level.key_prefix + '_'): + k_bytes = bytes(k, 'utf-8') + level_key_prefix_bytes = bytes(level.key_prefix, 'utf-8') + level_key_prefix_underscore_bytes = bytes(level.key_prefix + '_', 'utf-8') + if k_bytes == level_key_prefix_bytes or k_bytes.startswith(level_key_prefix_underscore_bytes): ret.append(k) return ret @@ -428,12 +432,15 @@ def _delete_key(self, section, key): self._dat.remove_option(section, key) def _write(self): - buf = StringIO() + buf = io.StringIO() self._dat.write(buf) - with open(self._slidedatfile, 'wb') as fh: + with open(self._slidedatfile, 'wb') as fh_prewrapped: + fh = io.TextIOWrapper(fh_prewrapped) if self._have_bom: fh.write(UTF8_BOM) - fh.write(buf.getvalue().replace('\n', '\r\n')) + buf_replaced = buf.getvalue().replace('\n', '\r\n') + fh.write(buf_replaced) + fh.close() def delete_level(self, layer_name, level_name): level = self._levels[(layer_name, level_name)] @@ -512,8 +519,10 @@ def do_aperio_svs(filename): for directory in fh.directories: lines = directory.entries[IMAGE_DESCRIPTION].value().splitlines() label_bytes = b'label ' - if len(lines) >= 2 and lines[1].startswith(label_bytes): - directory.delete(expected_prefix=LZW_CLEARCODE) + if len(lines) >= 2: + line1_bytes = bytes(lines[1], 'utf-8') + if line1_bytes.startswith(label_bytes): + directory.delete(expected_prefix=LZW_CLEARCODE) break else: raise IOError("No label in SVS file")