Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yield, YieldFrom, AugAssign propagate taint #155

Merged
merged 4 commits into from
Jul 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions examples/example_inputs/generator_with_multiple_yields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
def foo():
a = 1
if a == 1:
yield 0
yield a

foo()
21 changes: 21 additions & 0 deletions examples/vulnerable_code/yield.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import subprocess
from flask import Flask, request

app = Flask(__name__)


def things_to_run():
yield "echo hello"
yield from request.get_json()["commands"]
yield "echo done"


@app.route('/', methods=['POST'])
def home():
script = "; ".join(things_to_run())
subprocess.call(script, shell=True)
return 'Executed'


if __name__ == '__main__':
app.run(debug=True)
60 changes: 34 additions & 26 deletions pyt/cfg/expr_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
IgnoredNode,
Node,
RestoreNode,
ReturnNode
ReturnNode,
YieldNode
)
from .expr_visitor_helper import (
BUILTINS,
Expand Down Expand Up @@ -113,22 +114,25 @@ def visit_Yield(self, node):
label = LabelVisitor()
label.visit(node)

try:
rhs_visitor = RHSVisitor()
rhs_visitor.visit(node.value)
except AttributeError:
rhs_visitor.result = 'EmptyYield'
if node.value is None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You write such high quality code, but the amazing commit messages is what really separates you from the rest.

rhs_visitor_result = []
else:
rhs_visitor_result = RHSVisitor.result_for_node(node.value)

# Yield is a bit like augmented assignment to a return value
this_function_name = self.function_return_stack[-1]
LHS = 'yield_' + this_function_name
return self.append_node(ReturnNode(
LHS + ' = ' + label.result,
LHS = 'yld_' + this_function_name
return self.append_node(YieldNode(
LHS + ' += ' + label.result,
LHS,
node,
rhs_visitor.result,
rhs_visitor_result + [LHS],
path=self.filenames[-1])
)

def visit_YieldFrom(self, node):
return self.visit_Yield(node)

def visit_Attribute(self, node):
return self.visit_miscelleaneous_node(
node
Expand Down Expand Up @@ -449,24 +453,28 @@ def return_handler(
saved_function_call_index(int): Unique number for each call.
first_node(EntryOrExitNode or RestoreNode): Used to connect previous statements to this function.
"""
for node in function_nodes:
if any(isinstance(node, YieldNode) for node in function_nodes):
# Presence of a `YieldNode` means that the function is a generator
rhs_prefix = 'yld_'
elif any(isinstance(node, ConnectToExitNode) for node in function_nodes):
# Only `Return`s and `Raise`s can be of type ConnectToExitNode
if isinstance(node, ConnectToExitNode):
# Create e.g. ~call_1 = ret_func_foo RestoreNode
LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index)
RHS = 'ret_' + get_call_names_as_string(call_node.func)
return_node = RestoreNode(
LHS + ' = ' + RHS,
LHS,
[RHS],
line_number=call_node.lineno,
path=self.filenames[-1]
)
return_node.first_node = first_node
rhs_prefix = 'ret_'
else:
return # No return value

self.nodes[-1].connect(return_node)
self.nodes.append(return_node)
return
# Create e.g. ~call_1 = ret_func_foo RestoreNode
LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index)
RHS = rhs_prefix + get_call_names_as_string(call_node.func)
return_node = RestoreNode(
LHS + ' = ' + RHS,
LHS,
[RHS],
line_number=call_node.lineno,
path=self.filenames[-1]
)
return_node.first_node = first_node
self.nodes[-1].connect(return_node)
self.nodes.append(return_node)

def process_function(self, call_node, definition):
"""Processes a user defined function when it is called.
Expand Down
20 changes: 9 additions & 11 deletions pyt/cfg/stmt_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,6 @@ def visit_Return(self, node):
label = LabelVisitor()
label.visit(node)

try:
rhs_visitor = RHSVisitor()
rhs_visitor.visit(node.value)
except AttributeError:
rhs_visitor.result = 'EmptyReturn'

this_function_name = self.function_return_stack[-1]
LHS = 'ret_' + this_function_name

Expand All @@ -263,14 +257,17 @@ def visit_Return(self, node):
path=self.filenames[-1]
)
return_value_of_call.connect(return_node)
self.nodes.append(return_node)
return return_node
return self.append_node(return_node)
elif node.value is not None:
rhs_visitor_result = RHSVisitor.result_for_node(node.value)
else:
rhs_visitor_result = []

return self.append_node(ReturnNode(
LHS + ' = ' + label.result,
LHS,
node,
rhs_visitor.result,
rhs_visitor_result,
path=self.filenames[-1]
))

Expand Down Expand Up @@ -502,11 +499,12 @@ def visit_AugAssign(self, node):
rhs_visitor = RHSVisitor()
rhs_visitor.visit(node.value)

lhs = extract_left_hand_side(node.target)
return self.append_node(AssignmentNode(
label.result,
extract_left_hand_side(node.target),
lhs,
node,
rhs_visitor.result,
rhs_visitor.result + [lhs],
path=self.filenames[-1]
))

Expand Down
2 changes: 1 addition & 1 deletion pyt/core/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This directory contains miscellaneous code that is imported from different parts

- `get_call_names`_ used in `vars_visitor.py`_ when visiting a Subscript, and `framework_helper.py`_ on function decorators in `is_flask_route_function`_

- `get_call_names_as_string`_ used in `expr_visitor.py`_ to create ret_function_name as RHS and yield_function_name as LHS, and in stmt_visitor.py when connecting a function to a loop.
- `get_call_names_as_string`_ used in `expr_visitor.py`_ to create ret_function_name as RHS and yld_function_name as LHS, and in stmt_visitor.py when connecting a function to a loop.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for updating this 👍 I definitely would have forgot to.


- `Arguments`_ used in `expr_visitor.py`_ when processing the arguments of a user defined function and `framework_adaptor.py`_ to taint function definition arguments.

Expand Down
8 changes: 8 additions & 0 deletions pyt/core/node_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,11 @@ def __init__(
line_number=ast_node.lineno,
path=path
)


class YieldNode(AssignmentNode):
"""CFG Node that represents a yield or yield from.

The presence of a YieldNode means that a function is a generator.
"""
pass
9 changes: 9 additions & 0 deletions pyt/vulnerabilities/vulnerability_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from enum import Enum
from collections import namedtuple

from ..core.node_types import YieldNode


class VulnerabilityType(Enum):
FALSE = 0
Expand Down Expand Up @@ -56,13 +58,20 @@ def __init__(

self.reassignment_nodes = reassignment_nodes
self._remove_sink_from_secondary_nodes()
self._remove_non_propagating_yields()

def _remove_sink_from_secondary_nodes(self):
try:
self.reassignment_nodes.remove(self.sink)
except ValueError: # pragma: no cover
pass

def _remove_non_propagating_yields(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

"""Remove yield with no variables e.g. `yield 123` and plain `yield` from vulnerability."""
for node in list(self.reassignment_nodes):
if isinstance(node, YieldNode) and len(node.right_hand_side_variables) == 1:
self.reassignment_nodes.remove(node)

def __str__(self):
"""Pretty printing of a vulnerability."""
reassigned_str = _get_reassignment_str(self.reassignment_nodes)
Expand Down
44 changes: 44 additions & 0 deletions tests/cfg/cfg_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,14 @@ def test_assignment_starred_list(self):
[('a', ['d']), ('b', ['d']), ('c', ['e'])],
)

def test_augmented_assignment(self):
self.cfg_create_from_ast(ast.parse('a+=f(b,c)'))

(node,) = self.cfg.nodes[1:-1]
self.assertEqual(node.label, 'a += f(b, c)')
self.assertEqual(node.left_hand_side, 'a')
self.assertEqual(node.right_hand_side_variables, ['b', 'c', 'a'])


class CFGComprehensionTest(CFGBaseTestCase):
def test_nodes(self):
Expand Down Expand Up @@ -995,6 +1003,42 @@ def test_function_multiple_return(self):
(call_foo, exit_foo),
(_exit, call_foo)])

def test_generator_multiple_yield(self):
path = 'examples/example_inputs/generator_with_multiple_yields.py'
self.cfg_create_from_file(path)

self.assert_length(self.cfg.nodes, expected_length=9)

entry = 0
entry_foo = 1
a = 2
_if = 3
yld_if = 4
yld = 5
exit_foo = 6
call_foo = 7
_exit = 8

self.assertInCfg([
(entry_foo, entry),
(a, entry_foo),
(_if, a),
(yld_if, _if),
(yld, _if),
(yld, yld_if), # Different from return
(exit_foo, yld),
(call_foo, exit_foo),
(_exit, call_foo)
])

yld_if_node = self.cfg.nodes[yld_if]
self.assertEqual(yld_if_node.left_hand_side, 'yld_foo')
self.assertEqual(yld_if_node.right_hand_side_variables, ['yld_foo'])
yld_node = self.cfg.nodes[yld]
self.assertEqual(yld_node.left_hand_side, 'yld_foo')
self.assertEqual(yld_node.right_hand_side_variables, ['a', 'yld_foo'])
self.assertEqual(self.cfg.nodes[call_foo].right_hand_side_variables, ['yld_foo'])

def test_blackbox_call_after_if(self):
path = 'examples/vulnerable_code/blackbox_call_after_if.py'
self.cfg_create_from_file(path)
Expand Down
4 changes: 2 additions & 2 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ def test_targets_with_recursive(self):
excluded_files = ""

included_files = discover_files(targets, excluded_files, True)
self.assertEqual(len(included_files), 30)
self.assertEqual(len(included_files), 31)

def test_targets_with_recursive_and_excluded(self):
targets = ["examples/vulnerable_code/"]
excluded_files = "inter_command_injection.py"

included_files = discover_files(targets, excluded_files, True)
self.assertEqual(len(included_files), 29)
self.assertEqual(len(included_files), 30)
25 changes: 25 additions & 0 deletions tests/vulnerabilities/vulnerabilities_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,31 @@ def test_XSS_variable_multiple_assign_result(self):

self.assertAlphaEqual(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)

def test_yield(self):
vulnerabilities = self.run_analysis('examples/vulnerable_code/yield.py')
self.assert_length(vulnerabilities, expected_length=1)
vuln = vulnerabilities[0]
self.assertEqual(vuln.source.left_hand_side, "yld_things_to_run")
self.assertIn("yld_things_to_run", vuln.source.right_hand_side_variables)
EXPECTED_VULNERABILITY_DESCRIPTION = """
File: examples/vulnerable_code/yield.py
> User input at line 9, source "request.get_json(":
yld_things_to_run += request.get_json()['commands']
Reassigned in:
File: examples/vulnerable_code/yield.py
> Line 15: ~call_2 = yld_things_to_run
File: examples/vulnerable_code/yield.py
> Line 15: ~call_1 = ret_'; '.join(~call_2)
File: examples/vulnerable_code/yield.py
> Line 15: script = ~call_1
File: examples/vulnerable_code/yield.py
> reaches line 16, sink "subprocess.call(":
~call_3 = ret_subprocess.call(script, shell=True)
This vulnerability is unknown due to: Label: ~call_1 = ret_'; '.join(~call_2)
"""

self.assertAlphaEqual(str(vuln), EXPECTED_VULNERABILITY_DESCRIPTION)


class EngineDjangoTest(VulnerabilitiesBaseTestCase):
def run_analysis(self, path):
Expand Down