Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure get_contents() method of Node objects return 'bytes' and fix decoding errors in Value.get_csig() #3738

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion SCons/Action.py
Original file line number Diff line number Diff line change
Expand Up @@ -1405,7 +1405,7 @@ def get_contents(self, target, source, env):
except AttributeError:
# No __call__() method, so it might be a builtin
# or something like that. Do the best we can.
contents = repr(actfunc)
contents = repr(actfunc).encode()

return contents

Expand Down
6 changes: 3 additions & 3 deletions SCons/ActionTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2048,9 +2048,9 @@ def __call__(self):
af = SCons.Action.ActionFactory(str, strfunc)
ac = SCons.Action.ActionCaller(af, [], {})
c = ac.get_contents([], [], Environment())
assert c == "<built-in function str>" or \
c == "<type 'str'>" or \
c == "<class 'str'>", repr(c)
assert c == b"<built-in function str>" or \
c == b"<type 'str'>" or \
c == b"<class 'str'>", repr(c)
# ^^ class str for python3

def test___call__(self):
Expand Down
2 changes: 1 addition & 1 deletion SCons/Executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def get_action_side_effects(self):
def __call__(self, *args, **kw):
return 0
def get_contents(self):
return ''
return b''
def _morph(self):
"""Morph this Null executor to a real Executor object."""
batches = self.batches
Expand Down
2 changes: 1 addition & 1 deletion SCons/Node/Alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def get_contents(self):
"""The contents of an alias is the concatenation
of the content signatures of all its sources."""
childsigs = [n.get_csig() for n in self.children()]
return ''.join(childsigs)
return ''.join(childsigs).encode('utf-8')

def sconsign(self):
"""An Alias is not recorded in .sconsign files"""
Expand Down
4 changes: 2 additions & 2 deletions SCons/Node/AliasTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self, contents):
def get_csig(self):
return self.contents
def get_contents(self):
return self.contents
return self.contents.encode()

ans = SCons.Node.Alias.AliasNameSpace()

Expand All @@ -67,7 +67,7 @@ def get_contents(self):
a.sources = [ DummyNode('one'), DummyNode('two'), DummyNode('three') ]

c = a.get_contents()
assert c == 'onetwothree', c
assert c == 'onetwothree'.encode('utf-8'), c

def test_lookup(self):
"""Test the lookup() method
Expand Down
3 changes: 2 additions & 1 deletion SCons/Node/FS.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,7 +1856,8 @@ def scanner_key(self):
def get_text_contents(self):
"""We already emit things in text, so just return the binary
version."""
return self.get_contents()
# Function get_contents_dir() returns utf-8 encoded text.
return self.get_contents().decode('utf-8')

def get_contents(self):
"""Return content signatures and names of all our children
Expand Down
20 changes: 10 additions & 10 deletions SCons/Node/FSTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1411,7 +1411,7 @@ def nonexistent(method, s):
# test Entry.get_contents()
e = fs.Entry('does_not_exist')
c = e.get_contents()
assert c == "", c
assert c == b"", c
assert e.__class__ == SCons.Node.FS.Entry

test.write("file", "file\n")
Expand Down Expand Up @@ -1441,7 +1441,7 @@ def nonexistent(method, s):
test.subdir("dir")
e = fs.Entry('dir')
c = e.get_contents()
assert c == "", c
assert c == b"", c
assert e.__class__ == SCons.Node.FS.Dir

c = e.get_text_contents()
Expand All @@ -1455,7 +1455,7 @@ def nonexistent(method, s):
e = fs.Entry('dangling_symlink')
c = e.get_contents()
assert e.__class__ == SCons.Node.FS.Entry, e.__class__
assert c == "", c
assert c == b"", c
c = e.get_text_contents()
assert c == "", c

Expand Down Expand Up @@ -2049,14 +2049,14 @@ def test_get_contents(self):
e = self.fs.Dir(os.path.join('d', 'empty'))
s = self.fs.Dir(os.path.join('d', 'sub'))

files = d.get_contents().split('\n')
files = d.get_contents().split('\n'.encode('utf-8'))

assert e.get_contents() == '', e.get_contents()
assert e.get_contents() == b'', e.get_contents()
assert e.get_text_contents() == '', e.get_text_contents()
assert e.get_csig() + " empty" == files[0], files
assert f.get_csig() + " f" == files[1], files
assert g.get_csig() + " g" == files[2], files
assert s.get_csig() + " sub" == files[3], files
assert (e.get_csig() + " empty").encode('utf-8') == files[0], files
assert (f.get_csig() + " f").encode('utf-8') == files[1], files
assert (g.get_csig() + " g").encode('utf-8') == files[2], files
assert (s.get_csig() + " sub").encode('utf-8') == files[3], files

def test_implicit_re_scans(self):
"""Test that adding entries causes a directory to be re-scanned
Expand Down Expand Up @@ -2532,7 +2532,7 @@ def get_timestamp(self):
return self.timestamp

def get_contents(self):
return self.name
return self.name.encode()

def get_ninfo(self):
""" mocked to ensure csig will equal the filename"""
Expand Down
12 changes: 5 additions & 7 deletions SCons/Node/Python.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,10 @@ def get_contents(self):
Get contents for signature calculations.
:return: bytes
"""
text_contents = self.get_text_contents()
try:
return text_contents.encode()
except UnicodeDecodeError:
# Already encoded as python2 str are bytes
return text_contents
contents = str(self.value).encode('utf-8')
for kid in self.children(None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

child seems to be in use everywhere else

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer child as well but it looks like kid is still used frequently in SCons/Node/*.

contents = contents + kid.get_contents()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change this to contents = contents + kid.get_csig()? Otherwise the memory usage of Value targets gets out of control.

Copy link
Author

@eugenhu eugenhu Jul 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made a few extra commits, first changing Value.get_csig() to use child csigs instead of content, and then making Value.get_text_contents() to just be a concatenation of child csigs prepended with the stringified value attribute like you suggest. The Value.get_contents() then just return an encoded get_text_contents() and Value.get_csig() just wraps get_text_contents() as well.

I'm not sure where the 'contents' of a Value node is actually used, but the second change failed these tests:

SCons/SConfTests.py
test/Configure/ConfigureDryRunError.py
test/Configure/VariantDir-SConscript.py
test/Configure/VariantDir.py
test/Configure/basic.py
test/Configure/cache-not-ok.py
test/Configure/cache-ok.py
test/Configure/config-h.py
test/Configure/custom-tests.py
test/Configure/issue-3469/issue-3469.py
test/Configure/option--config.py
test/Value.py
test/explain/basic.py
test/option-n.py
test/question/Configure.py
test/sconsign/script/Configure.py
test/textfile/textfile.py

The first change passes existing tests (after they've been modified to expect bytes from get_contents() calls in the previous PR commits).

I am running tests with Python 3.8.2 on Ubuntu 20.04 in a virtual environment.

return contents

def changed_since_last_build(self, target, prev_ni):
cur_csig = self.get_csig()
Expand All @@ -178,7 +176,7 @@ def get_csig(self, calc=None):
except AttributeError:
pass

contents = self.get_text_contents()
contents = self.get_contents().decode(errors='backslashreplace')

self.get_ninfo().csig = contents
return contents
Expand Down
33 changes: 32 additions & 1 deletion SCons/Node/PythonTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,39 @@ def test_get_csig(self):
csig = v3.get_csig(None)
assert csig == 'None', csig

def test_get_content_with_child_binary_content(self):
class DummyNode:
def __init__(self, contents):
self.contents = contents
def get_contents(self):
return self.contents

# Node with binary content that is not valid utf-8.
node_with_binary = DummyNode(b'\xff')

v = SCons.Node.Python.Value('v')
v.add_dependency([node_with_binary])

# Just make sure this call doesn't fail. Not sure what to check the
# return value against.
v.get_contents()

def test_get_csig_with_child_binary_content(self):
class DummyNode:
def __init__(self, contents):
self.contents = contents
def get_contents(self):
return self.contents

# Node with binary content that is not valid utf-8.
node_with_binary = DummyNode(b'\xff')

v = SCons.Node.Python.Value('v')
v.add_dependency([node_with_binary])

# Just make sure this call doesn't fail. Not sure what to check the
# return value against.
v.get_csig()


class ValueNodeInfoTestCase(unittest.TestCase):
Expand All @@ -129,7 +160,7 @@ def test___init__(self):
node._func_get_contents = 2 # Pretend to be a Dir.
node.add_to_implicit([value])
contents = node.get_contents()
expected_contents = '%s %s\n' % (value.get_csig(), value.name)
expected_contents = ('%s %s\n' % (value.get_csig(), value.name)).encode('utf-8')
assert contents == expected_contents


Expand Down
6 changes: 3 additions & 3 deletions SCons/Node/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def get_contents_entry(node):
# string so calls to get_contents() in emitters and the
# like (e.g. in qt.py) don't have to disambiguate by hand
# or catch the exception.
return ''
return b''
else:
return _get_contents_map[node._func_get_contents](node)

Expand All @@ -215,8 +215,8 @@ def get_contents_dir(node):
separated by new-lines. Ensure that the nodes are sorted."""
contents = []
for n in sorted(node.children(), key=lambda t: t.name):
contents.append('%s %s\n' % (n.get_csig(), n.name))
return ''.join(contents)
contents.append(('%s %s\n' % (n.get_csig(), n.name)).encode('utf-8'))
return b''.join(contents)

def get_contents_file(node):
if not node.rexists():
Expand Down
2 changes: 1 addition & 1 deletion SCons/SConfTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def test_TryBuild(self):

class MyAction:
def get_contents(self, target, source, env):
return 'MyBuilder-MyAction $SOURCE $TARGET'
return b'MyBuilder-MyAction $SOURCE $TARGET'

class MyBuilder(SCons.Builder.BuilderBase):
def __init__(self):
Expand Down
2 changes: 1 addition & 1 deletion SCons/Scanner/ScannerTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ def rfile(self):
def exists(self):
return self._exists
def get_contents(self):
return self._contents
return self._contents.encode()
def get_text_contents(self):
return self._contents
def get_dir(self):
Expand Down