Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-13153: Use OS native encoding for converting between Python and Tcl. #16545

Merged
merged 7 commits into from
Oct 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 3 additions & 26 deletions Lib/idlelib/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,21 +358,6 @@ def set_width(self):
Font(text, font=text.cget('font')).measure('0')
self.width = pixel_width // zero_char_width

def _filename_to_unicode(self, filename):
"""Return filename as BMP unicode so displayable in Tk."""
# Decode bytes to unicode.
if isinstance(filename, bytes):
try:
filename = filename.decode(self.filesystemencoding)
except UnicodeDecodeError:
try:
filename = filename.decode(self.encoding)
except UnicodeDecodeError:
# byte-to-byte conversion
filename = filename.decode('iso8859-1')
# Replace non-BMP char with diamond questionmark.
return re.sub('[\U00010000-\U0010FFFF]', '\ufffd', filename)

def new_callback(self, event):
dirname, basename = self.io.defaultfilename()
self.flist.new(dirname)
Expand Down Expand Up @@ -963,10 +948,8 @@ def update_recent_files_list(self, new_file=None):
menu.delete(0, END) # clear, and rebuild:
for i, file_name in enumerate(rf_list):
file_name = file_name.rstrip() # zap \n
# make unicode string to display non-ASCII chars correctly
ufile_name = self._filename_to_unicode(file_name)
callback = instance.__recent_file_callback(file_name)
menu.add_command(label=ulchars[i] + " " + ufile_name,
menu.add_command(label=ulchars[i] + " " + file_name,
command=callback,
underline=0)

Expand Down Expand Up @@ -1004,16 +987,10 @@ def reset_undo(self):

def short_title(self):
filename = self.io.filename
if filename:
filename = os.path.basename(filename)
else:
filename = "untitled"
# return unicode string to display non-ASCII chars correctly
return self._filename_to_unicode(filename)
return os.path.basename(filename) if filename else "untitled"

def long_title(self):
# return unicode string to display non-ASCII chars correctly
return self._filename_to_unicode(self.io.filename or "")
return self.io.filename or ""

def center_insert_event(self, event):
self.center()
Expand Down
12 changes: 0 additions & 12 deletions Lib/idlelib/idle_test/test_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,6 @@ def test_init(self):
e._close()


class EditorFunctionTest(unittest.TestCase):

def test_filename_to_unicode(self):
func = Editor._filename_to_unicode
class dummy():
filesystemencoding = 'utf-8'
pairs = (('abc', 'abc'), ('a\U00011111c', 'a\ufffdc'),
(b'abc', 'abc'), (b'a\xf0\x91\x84\x91c', 'a\ufffdc'))
for inp, out in pairs:
self.assertEqual(func(dummy, inp), out)


class TestGetLineIndent(unittest.TestCase):
def test_empty_lines(self):
for tabwidth in [1, 2, 4, 6, 8]:
Expand Down
18 changes: 0 additions & 18 deletions Lib/idlelib/pyshell.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,14 +679,6 @@ def runsource(self, source):
self.more = 0
# at the moment, InteractiveInterpreter expects str
assert isinstance(source, str)
#if isinstance(source, str):
# from idlelib import iomenu
# try:
# source = source.encode(iomenu.encoding)
# except UnicodeError:
# self.tkconsole.resetoutput()
# self.write("Unsupported characters in input\n")
# return
# InteractiveInterpreter.runsource() calls its runcode() method,
# which is overridden (see below)
return InteractiveInterpreter.runsource(self, source, filename)
Expand Down Expand Up @@ -1298,16 +1290,6 @@ def resetoutput(self):
self.set_line_and_column()

def write(self, s, tags=()):
if isinstance(s, str) and len(s) and max(s) > '\uffff':
# Tk doesn't support outputting non-BMP characters
# Let's assume what printed string is not very long,
# find first non-BMP character and construct informative
# UnicodeEncodeError exception.
for start, char in enumerate(s):
if char > '\uffff':
break
raise UnicodeEncodeError("UCS-2", char, start, start+1,
'Non-BMP character not supported in Tk')
serhiy-storchaka marked this conversation as resolved.
Show resolved Hide resolved
try:
self.text.mark_gravity("iomark", "right")
count = OutputWindow.write(self, s, tags, "iomark")
Expand Down
3 changes: 1 addition & 2 deletions Lib/idlelib/runscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ def _run_module_event(self, event, *, customize=False):
interp = self.shell.interp
if pyshell.use_subprocess and restart:
interp.restart_subprocess(
with_cwd=False, filename=
self.editwin._filename_to_unicode(filename))
with_cwd=False, filename=filename)
dirname = os.path.dirname(filename)
argv = [filename]
if self.cli_args:
Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_tcl.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,9 +429,12 @@ def passValue(value):
self.assertEqual(passValue(False), False if self.wantobjects else '0')
self.assertEqual(passValue('string'), 'string')
self.assertEqual(passValue('string\u20ac'), 'string\u20ac')
self.assertEqual(passValue('string\U0001f4bb'), 'string\U0001f4bb')
self.assertEqual(passValue('str\x00ing'), 'str\x00ing')
self.assertEqual(passValue('str\x00ing\xbd'), 'str\x00ing\xbd')
self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac')
self.assertEqual(passValue('str\x00ing\U0001f4bb'),
'str\x00ing\U0001f4bb')
self.assertEqual(passValue(b'str\x00ing'),
b'str\x00ing' if self.wantobjects else 'str\x00ing')
self.assertEqual(passValue(b'str\xc0\x80ing'),
Expand Down Expand Up @@ -490,6 +493,7 @@ def float_eq(actual, expected):
check('string')
check('string\xbd')
check('string\u20ac')
check('string\U0001f4bb')
check('')
check(b'string', 'string')
check(b'string\xe2\x82\xac', 'string\xe2\x82\xac')
Expand Down Expand Up @@ -531,6 +535,7 @@ def test_splitlist(self):
('a\n b\t\r c\n ', ('a', 'b', 'c')),
(b'a\n b\t\r c\n ', ('a', 'b', 'c')),
('a \u20ac', ('a', '\u20ac')),
('a \U0001f4bb', ('a', '\U0001f4bb')),
(b'a \xe2\x82\xac', ('a', '\u20ac')),
(b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')),
('a {b c}', ('a', 'b c')),
Expand Down
22 changes: 22 additions & 0 deletions Lib/tkinter/test/test_tkinter/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,28 @@ def callback():
with self.assertRaises(tkinter.TclError):
root.tk.call('after', 'info', idle1)

def test_clipboard(self):
root = self.root
root.clipboard_clear()
root.clipboard_append('Ùñî')
self.assertEqual(root.clipboard_get(), 'Ùñî')
root.clipboard_append('çōđě')
self.assertEqual(root.clipboard_get(), 'Ùñîçōđě')
root.clipboard_clear()
with self.assertRaises(tkinter.TclError):
root.clipboard_get()

def test_clipboard_astral(self):
root = self.root
root.clipboard_clear()
root.clipboard_append('𝔘𝔫𝔦')
self.assertEqual(root.clipboard_get(), '𝔘𝔫𝔦')
root.clipboard_append('𝔠𝔬𝔡𝔢')
self.assertEqual(root.clipboard_get(), '𝔘𝔫𝔦𝔠𝔬𝔡𝔢')
root.clipboard_clear()
with self.assertRaises(tkinter.TclError):
root.clipboard_get()


tests_gui = (MiscTest, )

Expand Down
13 changes: 3 additions & 10 deletions Lib/tkinter/test/test_ttk/test_widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,7 @@ def check_get_current(getval, currval):
expected=('mon', 'tue', 'wed', 'thur'))
self.checkParam(self.combo, 'values', ('mon', 'tue', 'wed', 'thur'))
self.checkParam(self.combo, 'values', (42, 3.14, '', 'any string'))
self.checkParam(self.combo, 'values', '',
expected='' if get_tk_patchlevel() < (8, 5, 10) else ())
self.checkParam(self.combo, 'values', '')

self.combo['values'] = ['a', 1, 'c']

Expand Down Expand Up @@ -1245,12 +1244,7 @@ def test_values(self):
expected=('mon', 'tue', 'wed', 'thur'))
self.checkParam(self.spin, 'values', ('mon', 'tue', 'wed', 'thur'))
self.checkParam(self.spin, 'values', (42, 3.14, '', 'any string'))
self.checkParam(
self.spin,
'values',
'',
expected='' if get_tk_patchlevel() < (8, 5, 10) else ()
)
self.checkParam(self.spin, 'values', '')

self.spin['values'] = ['a', 1, 'c']

Expand Down Expand Up @@ -1308,8 +1302,7 @@ def test_columns(self):
self.checkParam(widget, 'columns', 'a b c',
expected=('a', 'b', 'c'))
self.checkParam(widget, 'columns', ('a', 'b', 'c'))
self.checkParam(widget, 'columns', (),
expected='' if get_tk_patchlevel() < (8, 5, 10) else ())
self.checkParam(widget, 'columns', '')

def test_displaycolumns(self):
widget = self.create()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
OS native encoding is now used for converting between Python strings and
Tcl objects. This allows to display, copy and paste to clipboard emoji and
other non-BMP characters. Converting strings from Tcl to Python and back
now never fails (except MemoryError).
Loading