diff --git a/packages/code-analyzer-flow-engine/.gitignore b/packages/code-analyzer-flow-engine/.gitignore index 758f856e..69aad5eb 100644 --- a/packages/code-analyzer-flow-engine/.gitignore +++ b/packages/code-analyzer-flow-engine/.gitignore @@ -1,3 +1,3 @@ FlowScanner/build/**/** FlowScanner/**/__pycache__/** -FlowScanner/flowtest.egg-info/** \ No newline at end of file +FlowScanner/*.egg-info/** \ No newline at end of file diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_parser/parse.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_parser/parse.py index 010ff698..9a9979ae 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_parser/parse.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_parser/parse.py @@ -6,12 +6,15 @@ import sys import traceback +import re from flow_parser import expression_parser from public.flow_scanner_exceptions import InvalidFlowException -sys.modules['_elementtree'] = None -import xml.etree.ElementTree as ET +get_tags = re.compile(r'<[^>]*>') + +# We want to override with our own custom class loaded in CP +sys.modules['_elementtree'] = None # noqa from typing import Optional import logging @@ -24,6 +27,9 @@ from public.enums import RunMode, FlowType, TriggerType from public.data_obj import VariableType from public.enums import DataType, ReferenceType +from typing import TypeAlias + +El: TypeAlias = parse_utils.CP.ET.Element #: hardcoded sfdc metadata namespace ns: str = '{http://soap.sforce.com/2006/04/metadata}' @@ -35,7 +41,7 @@ logger: logging.Logger = logging.getLogger(__name__) -def get_root(path: str) -> ET.Element: +def get_root(path: str) -> El: """Get flow root Args: @@ -67,11 +73,14 @@ class Parser(FlowParser): def __init__(self, root): #: XMl root of a single flow - self.root: ET.Element = root + self.root: El = root #: current filepath of flow self.flow_path: str | None = None + #: top flow path + self.top_flow_path: str | None = None + #: run mode as declared in flow xml self.declared_run_mode: RunMode | None = None @@ -89,11 +98,17 @@ def __init__(self, root): #: frozen set of all elements that have a child of and are thus flow globals #: useful for setting scopes - self.all_named_elems: frozenset[ET.Element] | None = None + self.all_named_elems: frozenset[El] | None = None #: set of all names (names of named elements) self.all_names: tuple[str,] | None = None + #: all traversable elements that are not crawlable + self.not_crawlable_elems: list[El] | None = None + + #: for all elements, which traversable elements lead to this one + self.traversable_inbound: dict[str, list[str]] | None = None + #: variables marked 'available for input', as a pair (flow_path, name) self.input_variables: frozenset[tuple[str, str]] | None = None @@ -112,6 +127,9 @@ def __init__(self, root): #: populated when the flow is loaded. Only has parents. self.var_types: dict[tuple[str, str], VariableType] | None = None + #: user controlled input variables + self.tainted_inputs: Optional[set[tuple[str, str]]] = None + def get_all_named_elems(self) -> frozenset[tuple[str, str]] | None: return self.all_named_elems @@ -127,31 +145,61 @@ def get_declared_run_mode(self) -> RunMode: def get_filename(self) -> str: return self.flow_path - def get_root(self) -> ET.Element: + def get_root(self) -> El: return self.root def get_literal_var(self) -> VariableType: return self.literal_var - def get_action_call_map(self) -> dict[str, list[tuple[str, str]]] | None: + + def get_traversable_inbound(self) -> dict[str, list[str]]: + """Returns dict from element name to list of all inbound element names + will be empty list if no inbound. + """ + if self.traversable_inbound is None: + self.set_traversable_inbound() + + return self.traversable_inbound + + def set_traversable_inbound(self) -> None: + start_el = self.get_start_elem() + node_els = self.get_all_traversable_flow_elements() + node_els.append(start_el) + + # initialize + accum = {get_name(x):[] for x in node_els} + for el in node_els: + el_name = get_name(el) + conns = parse_utils.get_conn_target_map(el) + if conns: + outbound = [x[0] for x in conns.values()] + for target in outbound: + if target not in accum: + logger.critical(f"non-traversable element {target} in flow {self.flow_path}") + continue + accum[target].append(el_name) + + self.traversable_inbound = accum + + + def get_action_call_map(self) -> dict[str, list[tuple[El, str]]] | None: """Gets all actionCalls in the flow element - Returns: actionCall type -> (element name, action name) + Returns: actionCall type -> (action_element, action name) """ accum = {} - action_calls = parse_utils.get_by_tag(self.root, 'actionCalls') - for action_call in action_calls: - action_name_els = parse_utils.get_by_tag(action_call, 'actionName') - action_type_els = parse_utils.get_by_tag(action_call, 'actionType') - elem_name = parse_utils.get_name(action_call) - if (len(action_name_els) != 1 or len(action_type_els) != 1 or - action_name_els[0].text is None or action_type_els[0].text is None): - logger.error(f"found invalid actionCall {elem_name} in flow {self.flow_path}") + action_call_els = parse_utils.get_by_tag(self.root, 'actionCalls') + for action_call_elem in action_call_els: + action_name = parse_utils.get_name(action_call_elem) + action_type = parse_utils.get_text_of_tag(action_call_elem, 'actionType') + if action_type is None: + logger.error(f"found invalid actionCall that has no actionType:" + f" {parse_utils.get_elem_string(action_call_elem)} in flow {self.flow_path}") continue - to_add = (action_type_els[0].text, elem_name, action_name_els[0].text) - if to_add[0] not in accum: - accum[to_add[0]] = [(to_add[1], to_add[2])] + to_add = (action_call_elem, action_name) + if action_type not in accum: + accum[action_type] = [to_add] else: - accum[to_add[0]].append((to_add[1], to_add[2])) + accum[action_type].append(to_add) if len(accum) == 0: return None @@ -467,6 +515,11 @@ def update(self, old_parser: Parser = None, is_return=False) -> Parser: None """ + if old_parser is not None: + self.top_flow_path = old_parser.flow_path + else: + self.top_flow_path = self.flow_path + all_named, all_names, vars_, inputs, outputs = _get_global_flow_data(self.flow_path, self.root) # all_named_elems are ET elements that have a tag as a child self.all_named_elems = all_named @@ -501,6 +554,16 @@ def update(self, old_parser: Parser = None, is_return=False) -> Parser: # we always update parsed variables, so we have full resolutions available self.cached_resolutions.update(old_parser.cached_resolutions) self.var_types.update(old_parser.var_types) + + + # handle tainted inputs, this will populate the member variable + tainted = self.get_tainted_inputs() + if old_parser is not None and old_parser.tainted_inputs: + if not tainted: + self.tainted_inputs = old_parser.tainted_inputs + else: + self.tainted_inputs.update(old_parser.tainted_inputs) + return self def get_output_variables(self, path: str | None = None) -> set[tuple[str, str]]: @@ -513,10 +576,10 @@ def get_input_variables(self, path: str | None = None) -> set[tuple[str, str]]: path = self.flow_path return {(x, y) for (x, y) in self.input_variables if x == path} - def get_input_field_elems(self) -> set[ET.Element] | None: + def get_input_field_elems(self) -> set[El] | None: return parse_utils.get_input_fields(self.root) - def get_input_output_elems(self) -> dict[str, set[ET.Element]]: + def get_input_output_elems(self) -> dict[str, set[El]]: """ Returns:: {"input": input variable elements, @@ -538,7 +601,7 @@ def get_input_output_elems(self) -> dict[str, set[ET.Element]]: "output": output_accum } - def get_by_name(self, name_to_match: str, scope: ET.Element | None = None) -> ET.Element | None: + def get_by_name(self, name_to_match: str, scope: El | None = None) -> El | None: """returns the first elem with the given name that is a child of the scope element""" if name_to_match == '*': return self.get_start_elem() @@ -561,7 +624,7 @@ def get_flow_name(self) -> str: """we assume there is always a flow label.""" res = get_by_tag(self.root, 'label') if len(res) == 0: - raise InvalidFlowException(f"Flow {self.flow_path} has no name, skipping..") + raise InvalidFlowException(f"Flow {self.flow_path} has no name, skipping..", flow_path=self.flow_path) else: return res[0].text @@ -593,58 +656,58 @@ def get_run_mode(self) -> RunMode: def get_api_version(self) -> str: return get_by_tag(self.root, 'apiVersion')[0].text - def get_all_traversable_flow_elements(self) -> list[ET.Element]: + def get_all_traversable_flow_elements(self) -> list[El]: """ ignore start""" return [child for child in self.root if get_tag(child) in parse_utils.CTRL_FLOW_ELEM] - def get_all_variable_elems(self) -> list[ET.Element] | None: + def get_all_variable_elems(self) -> list[El] | None: elems = get_by_tag(self.root, 'variables') if len(elems) == 0: return None else: return elems - def get_templates(self) -> list[ET.Element]: + def get_templates(self) -> list[El]: """Grabs all template elements. Returns empty list if none found """ templates = get_by_tag(self.root, 'textTemplates') return templates - def get_formulas(self) -> list[ET.Element]: + def get_formulas(self) -> list[El]: """Grabs all formula elements. Returns empty list if none found """ formulas = get_by_tag(self.root, 'formulas') return formulas - def get_choices(self) -> list[ET.Element]: + def get_choices(self) -> list[El]: choices = get_by_tag(self.root, 'choices') return choices - def get_dynamic_choice_sets(self) -> list[ET.Element]: + def get_dynamic_choice_sets(self) -> list[El]: dcc = get_by_tag(self.root, 'dynamicChoiceSets') return dcc - def get_constants(self) -> list[ET.Element]: + def get_constants(self) -> list[El]: constants = get_by_tag(self.root, 'constants') return constants - def get_start_elem(self) -> ET.Element | None: + def get_start_elem(self) -> El: """Get first element of flow - + Raises InvalidFlowException if no start element Returns: element or element pointed to in """ res = parse_utils.get_start_element(self.root) if res is None: - raise InvalidFlowException(f"No start element found in {self.flow_path}") + raise InvalidFlowException(f"No start element found in {self.flow_path}", flow_path=self.flow_path) else: return res - def get_all_indirect_tuples(self) -> list[tuple[str, ET.Element]]: + def get_all_indirect_tuples(self) -> list[tuple[str, El]]: """returns a list of tuples of all indirect references, e.g. str, elem, where str influences elem. The elem is a formula or template element and @@ -656,6 +719,9 @@ def get_all_indirect_tuples(self) -> list[tuple[str, ET.Element]]: expr = None if elem.tag == f'{ns}textTemplates': expr = elem.find(f'{ns}text').text + is_plaintext = parse_utils.get_text_of_tag(elem, 'isViewedAsPlainText') + if is_plaintext == 'false': + expr = re.sub(get_tags, '', expr) if expr is not None: influencers = expression_parser.extract_expression(expr) [accum.append((var, elem)) for var in influencers] @@ -699,13 +765,18 @@ def get_called_descendents(self, elem_name: str) -> list[str]: """Returns empty list if no descendents """ el = self.get_by_name(elem_name) - return [x[0] for x in get_conn_target_map(el).values()] + target_map = get_conn_target_map(el) + if not target_map: + return [] + else: + return [x[0] for x in get_conn_target_map(el).values()] def get_traversable_descendents_of_elem(self, elem_name: str) -> list[str]: - """includes the original elem name""" + """includes the original elem name, elem_tag""" visited = [] worklist = [] curr_name = elem_name + while True: visited.append(curr_name) to_add = [x for x in self.get_called_descendents(curr_name) if @@ -717,8 +788,53 @@ def get_traversable_descendents_of_elem(self, elem_name: str) -> list[str]: else: return visited + def get_tainted_inputs(self) -> set[tuple[str, str]] | None: + """Looks for sources + Args: + parser: parser instance for flow + start: whether this is the first flow being scanned + + Returns: + ((path, varname), ) corresponding to sources of taint + + """ + start = self.flow_path == self.top_flow_path + + if self.tainted_inputs: + return self.tainted_inputs + + flow_path = self.get_filename() -def build_vartype_from_elem(elem: ET.Element) -> VariableType | None: + # input fields are not a tuple of strings, this is an xml elem + # default to None + input_fields = self.get_input_field_elems() + + # these are are tuples, the input variables defined in the flow def. + # also default to None + input_vars = self.get_input_variables() + + if input_fields: + input_field_tuples = set([(flow_path, parse_utils.get_name(x)) for x in input_fields]) + else: + input_field_tuples = set() + + input_vars = input_vars or set() + + if start: + input_field_tuples.update(input_vars) + self.tainted_inputs = input_field_tuples + + else: + # We are in a subflow, so the flow input variables are set + # by the calling flow, the user only controls those inputs they + # enter. TODO: What complicates the matter is screen extensions, which + # can pass data to the flow via input variables. These must be + # audited and then updated here. + self.tainted_inputs = input_field_tuples + + return self.tainted_inputs + +def build_vartype_from_elem(elem: El) -> VariableType | None: """Build VariableType from XML Element The purpose of this function is to assign types to named @@ -877,7 +993,7 @@ def build_vartype_from_elem(elem: ET.Element) -> VariableType | None: elif tag == 'outputParameters': # use in action calls to assign outputs - # to variables. The subelement + # to variables. The sub-element # does not refer to the output parameter # element return VariableType(tag=tag, reference=ReferenceType.NodeReference) @@ -906,7 +1022,7 @@ def build_vartype_from_elem(elem: ET.Element) -> VariableType | None: logger.critical(f"Error parsing recordLookups {parse_utils.get_name(elem)}") return None # Todo: once we support second order flows, we'll need to add all of recordLookups - if parse_utils.is_auto_store(elem) is True: + if parse_utils.is_auto_store(elem): # this is a valid element reference to the return value of the lookups ref_ = ReferenceType.ElementReference @@ -947,7 +1063,7 @@ def build_vartype_from_elem(elem: ET.Element) -> VariableType | None: return VariableType(tag=tag,reference=ReferenceType.ElementReference) elif tag == 'subflows': - if parse_utils.is_auto_store(elem) is True: + if parse_utils.is_auto_store(elem): # todo: we need a None field for booleans we don't know return VariableType(tag=tag, reference=ReferenceType.SubflowReference) @@ -991,7 +1107,7 @@ def build_vartype_from_elem(elem: ET.Element) -> VariableType | None: return VariableType(tag=tag, reference=ReferenceType.NodeReference) - except Exception as e: + except Exception: # Todo: create flow exception here logger.critical(f"Error parsing variable element {traceback.format_exc()}") @@ -999,9 +1115,8 @@ def build_vartype_from_elem(elem: ET.Element) -> VariableType | None: logger.critical(f"Variable type cannot find match for elem {parse_utils.get_name(elem)} with tag {tag}") return None - -def _get_global_flow_data(flow_path, root: ET.Element) \ - -> tuple[list[ET.Element], tuple[str,...], dict[tuple[str, str], VariableType], +def _get_global_flow_data(flow_path: str, root: El) \ + -> tuple[list[El], tuple[str,...], dict[tuple[str, str], VariableType], frozenset[tuple[str, str]], frozenset[tuple[str, str]]]: all_named = get_named_elems(root) diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/__main__.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/__main__.py index 21e88df7..0e7b5087 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/__main__.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/__main__.py @@ -11,8 +11,8 @@ import flow_scanner.query_manager import flow_scanner.util as util import flow_scanner.version as version -import queries.default_query as default_query -from flow_scanner.query_manager import validate_qry_list, get_all_optional_queries + +from flow_scanner.query_manager import validate_qry_list, get_all_queries from flow_scanner.util import make_id from public.data_obj import PresetEncoder from public.parse_utils import quick_validate @@ -134,18 +134,28 @@ def get_tokens_from_csv_file(file_path: str) -> list[str]: return get_validated_queries(unsplit(data)) +def print_preset_list(): + map_ = flow_scanner.query_manager.PRESETS + preset_info = {} + for p in map_: + preset_info[p] = [x[1] for x in map_[p]] + + res_json = json.dumps(preset_info, indent=4) + # print to stdout so user can redirect or examine + print(res_json) def get_validated_queries(data: list[str]) -> list[str]: cleaned_data = de_kebab_list(clean_str_list(data)) - validation = validate_qry_list(cleaned_data) - if validation is True: - return cleaned_data + valid, found, missed, duplicates = validate_qry_list(cleaned_data) + if valid: + return found else: - if len(validation) == 1: - raise argparse.ArgumentTypeError("Unrecognized query requested: %s" % validation[0]) - else: - raise argparse.ArgumentTypeError("Unrecognized queries requested: %s" % - ",".join(validation)) + for issue, data in [('Duplicate', duplicates), ('Unrecognized', missed)]: + if data is not None and len(data) == 1: + raise argparse.ArgumentTypeError(f"{issue} query requested: %s" % data[0]) + else: + raise argparse.ArgumentTypeError(f"{issue} queries requested: %s" % + ",".join(data)) def unsplit(msg: str) -> list[str]: @@ -232,8 +242,8 @@ def parse_args(my_args: list[str], default: str = None) -> argparse.Namespace: version='%(prog)s ' + version.__version__) parser.add_argument("-p", "--preset_info", action='store_true', help="return information on default preset and exit") - parser.add_argument("--optional_query_info", action='store_true', - help="display which optional queries are supported and exit") + parser.add_argument("--query_info", action='store_true', + help="display which queries are supported and exit") """ Options for which flows to scan @@ -255,10 +265,11 @@ def parse_args(my_args: list[str], default: str = None) -> argparse.Namespace: Option for specifying the workspace path list """ - parser.add_argument("--workspace", help=("path of file containing csv separated lists of " - "flows in workspace that may be resolved as subflow targets. " - "If empty this defaults to flows target csv file, the specified directory, " - "or contents of flow directory or flows listed in commandline."), + parser.add_argument("--workspace", + help=("path of file containing csv separated lists of " + "flows in workspace that may be resolved as subflow targets. " + "If empty this defaults to flows target csv file, the specified directory, " + "or contents of flow directory or flows listed in commandline."), type=check_file_exists) """ @@ -310,12 +321,12 @@ def parse_args(my_args: list[str], default: str = None) -> argparse.Namespace: parser.add_argument("--query_path", required=False, help="path of custom query python file") parser.add_argument("--query_class", required=False, help="name of class to instantiate in query_path") parser.add_argument("--preset", required=False, help="name of preset to use (consumed by query code)") - parser.add_argument("--optional_queries", required=False, - help="comma separated list of optional queries to execute in addition to the preset.") - parser.add_argument("--optional_queries_path", required=False, - help="path of file containing a comma separated list of optional queries to " + parser.add_argument("--queries", required=False, + help="comma separated list of queries to execute in addition to the preset.") + parser.add_argument("--queries_path", required=False, + help="path of file containing a comma separated list of queries to " "execute in addition to the preset.", type=check_file_exists) - parser.add_argument("--all_optional", required=False, action='store_true', help=("run all optional queries. " + parser.add_argument("--all_queries", required=False, action='store_true', help=("run all queries. " "WARNING: this is noisy.")) parser.add_argument("--debug_flow", required=False, help=("For expert use only. Run a debug flow with" "the supplied parameter.")) @@ -341,24 +352,33 @@ def main(argv: list[str] = None) -> str | None: args = parse_args(argv, default=default) + if args.preset is not None: + # check preset + if args.preset not in flow_scanner.query_manager.PRESETS: + raise argparse.ArgumentTypeError(f"Invalid preset requested: {args.preset}") + # check if the user wants only a description of the default queries if args.preset_info is True: # if user has specified a preset, use that or None - preset_name = args.preset - preset = default_query.build_preset(preset_name) - queries = preset.queries - query_info = [x.to_dict() for x in list(queries)] - sorted_query_info = sorted(query_info, key=lambda x: x['query_id']) - desc = json.dumps(sorted_query_info, indent=4, cls=PresetEncoder) - # print to stdout so user can redirect or examine - print(desc) - + preset_name = args.preset or "default" + if preset_name: + preset = flow_scanner.query_manager.build_preset_for_name(preset_name) + if preset is None: + print(f"No preset found with name: {preset_name}") + print_preset_list() + return + queries = preset.queries + query_info = [x.to_dict() for x in list(queries)] + sorted_query_info = sorted(query_info, key=lambda x: x['query_id']) + desc = json.dumps(sorted_query_info, indent=4, cls=PresetEncoder) + # print to stdout so user can redirect or examine + print(desc) return - # Check if user wants list of optional queries - if args.optional_query_info is True: - desc = flow_scanner.query_manager.get_all_optional_descriptions() - print(desc) + # Check if user wants list of queries + if args.query_info is True: + desc = flow_scanner.query_manager.get_query_descriptions() + print(json.dumps(desc, indent=4, cls=PresetEncoder)) return # logging @@ -381,18 +401,18 @@ def main(argv: list[str] = None) -> str | None: """ - Handle Optional Queries + Handle Queries """ - if args.all_optional is True: - optional_qry_l = get_all_optional_queries() + if args.all_queries is True: + qry_l = get_all_queries() - elif args.optional_queries_path is not None: - optional_qry_l = get_tokens_from_csv_file(args.optional_queries_path) + elif args.queries_path is not None: + qry_l = get_tokens_from_csv_file(args.queries_path) - elif args.optional_queries is not None: - optional_qry_l = get_validated_queries(unsplit(args.optional_queries)) + elif args.queries is not None: + qry_l = get_validated_queries(unsplit(args.queries)) else: - optional_qry_l = None + qry_l = None """ Handle chunking @@ -459,7 +479,7 @@ def main(argv: list[str] = None) -> str | None: query_module_path=args.query_path, query_class_name=args.query_class, query_preset=args.preset, - optional_queries=optional_qry_l, + queries=qry_l, crawl_dir=args.crawl_dir, resolver=resolver) diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/branch_state.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/branch_state.py index 46de0e8b..be176e76 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/branch_state.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/branch_state.py @@ -320,19 +320,20 @@ def load_crawl_step(self, crawler: Crawler, crawl_step: CrawlStep = None) -> Cra old_history = self.current_crawl_step.visitor.history new_history = next_cs.visitor.history - if old_history == (): + if old_history == () or old_history == ('*',): # we are on the first branch, so no backtracking old_map = self.__influence_map[self.current_crawl_step] - elif len(new_history) >= len(old_history) and new_history[0:len(old_history)] == old_history: + elif len(new_history) == len(old_history) + 1 and new_history[0:len(old_history)] == old_history: # the new branch is a continuation of old branch so no backtracking old_map = self.__influence_map[self.current_crawl_step] else: - # the new history is a different branch, and we need to backtrack + # the new history is a different branch, and we need to either backtrack or jump ahead + # get_last_ancestor returns the last time we visited the element right before the current element old_cs = crawler.get_last_ancestor(next_cs) if old_cs is None: - # no predecessor, so we use default + # use default map old_map = self.__default_map else: old_map = self.__influence_map[old_cs] diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/control_flow.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/control_flow.py index 76ee85b0..d627518f 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/control_flow.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/control_flow.py @@ -9,16 +9,26 @@ import traceback from collections.abc import Generator from dataclasses import dataclass, field -from typing import TextIO, TypeAlias +from typing import TypeAlias, TYPE_CHECKING +import public.custom_parser as CP import flow_parser.parse as parse +import flow_scanner.util as util from flow_parser.parse import Parser -from public.contracts import AbstractSegment, AbstractControlFlowGraph, AbstractCrawler +from public import parse_utils +from public.contracts import AbstractSegment, AbstractControlFlowGraph, AbstractCrawler, FlowParser from public.data_obj import BranchVisitor, CrawlStep, Jump, JSONSerializable from public.enums import ConnType -from public.parse_utils import (ET, get_name, get_conn_target_map, +from public.flow_scanner_exceptions import InvalidFlowException +from public.parse_utils import (get_name, get_conn_target_map, is_subflow, is_loop, get_tag) +El: TypeAlias = CP.ET.Element + +if TYPE_CHECKING: + # False at run time, only for type checker + from _typeshed import SupportsWrite + MAX_VISITS_PER_SEGMENT = 20 logger = logging.getLogger(__name__) @@ -40,28 +50,27 @@ class Segment(JSONSerializable, AbstractSegment): # list of traversal indexes that are subflow elements subflows: list[int] - # connectors at the end of this segment + # connectors at the end of this segment. Empty list if no jumps. jumps: list[Jump] # whether this segment may end execution is_terminal: bool - # for tracking whether it has been visited - seen_tokens: list[tuple[tuple[str, str], ...]] = field(default_factory=list) + # is multiple-inbound + is_multiple_inbound: bool=False + # label -> + seen_tokens: dict[str, list[tuple[tuple[str,str], ...]]] = field(default_factory=dict) - def accept(self, visitor: BranchVisitor, multiple_inbound: bool=False) -> list[BranchVisitor] | None: + + def accept(self, visitor: BranchVisitor) -> list[BranchVisitor] | None: """does the node accept the visitor Also updates visitor state Args: visitor: Branch Visitor trying to jump into node - multiple_inbound: Whether this segment accepts more than - a single inbound, in which case it will - need to assign tokens for the extra inbounds. - Whether an inbound is 'extra' is decided by - order of visit. + Returns: list of labels to process or None @@ -69,24 +78,33 @@ def accept(self, visitor: BranchVisitor, multiple_inbound: bool=False) -> list[B if not self.jumps: return None - if visitor.token in self.seen_tokens: + prev_label = visitor.previous_label + + if prev_label in self.seen_tokens and visitor.token in self.seen_tokens[prev_label]: return None + # We must allow one cycle to traverse loops fully + count, cycle = util.find_cycles(target=self.label, history=visitor.history) + if count > 1 or (count == 1 and len(cycle) == 1): + return None else: - self.seen_tokens.append(visitor.token) - if multiple_inbound: - return self._send_outbound(visitor, add_token=True) + if prev_label in self.seen_tokens: + self.seen_tokens[prev_label].append(visitor.token) else: - return self._send_outbound(visitor) + self.seen_tokens[prev_label] = [visitor.token] + return self._send_outbound(visitor) - def _send_outbound(self, visitor, add_token=False): + def _send_outbound(self, visitor): jumps = self.jumps to_return = [] loop_context = visitor.loop_context or tuple() - history = visitor.history + ((visitor.previous_label, visitor.current_label),) + if visitor.history is None: + visitor.history = tuple() + + history = visitor.history + (self.label,) for jmp in jumps: current_label = jmp.target @@ -109,8 +127,9 @@ def _send_outbound(self, visitor, add_token=False): # remove everything before the entrance to the loop loop_context = loop_context[:z] - if add_token: + if self.is_multiple_inbound: new_token = visitor.token + to_add = (visitor.previous_label, visitor.current_label) if visitor.token is None: new_token = (to_add,) @@ -136,160 +155,135 @@ def _send_outbound(self, visitor, add_token=False): to_return.append(outbound_to_add) return to_return + # noinspection PyTypeChecker @classmethod - def build_from_parser(cls, parser: parse.Parser, elem: ET.Element) -> Segment: + def build_from_parser(cls, parser: parse.Parser, start_elem: El) -> Segment: """Build a segment starting at this element Args: parser: flow parser instance - elem: first element in this segment + start_elem: first element in this segment Returns: segment """ + inbound_map = parser.get_traversable_inbound() + label = get_name(start_elem) + if label is None: + pass + if len(inbound_map[label]) > 1: + is_multiple_inbound = True + else: + is_multiple_inbound = False - label = get_name(elem) - start_tag = get_tag(elem) - jumps = [] + elem = start_elem + elem_name = get_name(elem) + elem_tag = get_tag(elem) - if is_subflow(elem): - subflows = [0] - else: - subflows = [] - - conn_map = _get_connector_map(elem, parser=parser) - optional_values = [x[2] for x in conn_map.values() if x[2] is True] - is_optional = len(optional_values) > 0 - curr_elem = elem - - # elements traversed within this segment, - # so always initialized to zero - traversed = [] - - if len(conn_map) == 0: - return Segment(label=label, - subflows=subflows, - traversed=[(label, start_tag)], - jumps=[], - is_terminal=True) + # elements traversed within this segment + traversed = [(elem_name, elem_tag)] + # outbound jumps + subflows = [] index = 0 - while len(conn_map) > 0: - curr_name = get_name(curr_elem) - curr_tag = get_tag(curr_elem) - assert curr_tag is not None - - if (curr_name, curr_tag) in traversed: - # we are looping back in the segment. break here, and - # the element will not be added to this segment. - # It will then appear in some other segment pointing to this segment. - # - # If it points to an element somewhere in the middle of this segment, - # that will be addressed in the `fix_duplicates` function below. - break - else: - traversed.append((curr_name, curr_tag)) - - if is_subflow(curr_elem): - subflows.append(index) - - if is_loop(curr_elem): - # loops always terminate a segment - for conn, val in conn_map.items(): - elem_is_loop = False - no_more_seen = False - - if get_tag(conn) == 'noMoreValuesConnector': - is_optional = False - no_more_seen = True - - if get_tag(conn) == 'nextValueConnector': - elem_is_loop = True - # there may be no values at all, - # in which case this branch may never be taken - is_optional = True - - jumps.append(Jump(src_name=curr_name, - target=val[0], - is_goto=val[1] is ConnType.Goto, - is_loop=elem_is_loop, - is_no_more_values=no_more_seen, - is_fault=False - ) - ) - break + while True: + try: + jumps, is_terminal = get_jumps_and_terminal(elem_name, elem_tag, elem) - elif len(conn_map) == 1: - vals = list(conn_map.values()) - is_optional = vals[0][2] + if is_subflow(elem): + subflows = [index] - if (vals[0][1] is not ConnType.Goto and - not is_optional): + if len(jumps) == 1 and elem_tag != 'loops' and not is_terminal: + # now check that the next elem has only one inbound + next_elem = parser.get_by_name(jumps[0].target) + next_name = get_name(next_elem) - # this is a normal connector that must always be followed - curr_elem = parser.get_by_name(vals[0][0]) - conn_map = _get_connector_map(curr_elem, parser=parser) - continue + # some flows are missing target elements even + # though the target is specified in the connector. WTF. + if next_name in inbound_map: + next_inbound = inbound_map[next_name] + else: + next_inbound = [] + + if len(next_inbound) == 1: + # we continue in the segment + elem = next_elem + elem_name = next_name + elem_tag = get_tag(elem) + + assert (elem_name, elem_tag) not in traversed + traversed.append((elem_name, elem_tag)) + index += 1 + continue + + return Segment(label=label, + subflows=subflows, + traversed=traversed, + jumps=jumps, + is_multiple_inbound=is_multiple_inbound, + is_terminal=is_terminal + ) + except: + logger.critical(f"Could not crawl flow {parser.get_filename()} {traceback.format_exc()}") + raise InvalidFlowException("Could not crawl flow", flow_path=parser.get_filename()) + + +def get_jumps_and_terminal(el_name: str, el_tag:str, elem: El) -> tuple[list[Jump], bool]: + """Return list of jumps for this element, is_terminal (bool)""" + + jumps = [] + conns = get_conn_target_map(elem) + + if not conns: + # no outbound connectors means the element is terminal + return jumps, True + + if is_loop(elem): + no_more_seen = False + # loops will be terminal without a noMoreValues connector + for key, val in conns.items(): + conn_tag = get_tag(key) + if conn_tag == 'noMoreValuesConnector': + is_no_more = True + no_more_seen = True + else: + is_no_more = False - else: - # although there is only one connector, it is optional - # which means the current element terminates the segment - # and *may* terminate the flow - # and the connector is turned into a jump - jumps.append(Jump(src_name=curr_name, - is_goto=vals[0][1] is ConnType.Goto, - target=vals[0][0], - is_loop=False, - is_no_more_values=False, - is_fault=vals[0][1] is ConnType.Exception)) - - break - - elif len(conn_map) > 1: - is_optional = True - # There is more than one connector, so - for val in conn_map.values(): - - # a single non-optional connector makes the segment non-terminal - if not val[2]: - is_optional = False - jumps.append(Jump(src_name=curr_name, - target=val[0], - is_goto=val[1] is ConnType.Goto, - is_loop=False, - is_no_more_values=False, - is_fault=val[1] is ConnType.Exception - ) - ) - break + jumps.append(Jump(src_name=el_name, + target=val[0], + is_goto=val[1] is ConnType.Goto, + is_loop=is_no_more is False, + is_no_more_values=is_no_more, + is_fault=False + ) + ) + # If the loop does not have a noMoreValuesConnector, then it is terminal + jumps.sort(key=lambda x: x.priority()) + return jumps, no_more_seen is False - # end of conditionals - index += 1 + for key, val in conns.items(): + jumps.append(Jump(src_name=el_name, + is_goto=val[1] is ConnType.Goto, + target=val[0], + is_loop=False, + is_no_more_values=False, + is_fault=val[1] is ConnType.Exception)) - # end of while loop - # Check if the last element in a segment that may also end the flow - if len(conn_map) == 0: - curr_tag = get_tag(curr_elem) - curr_name = get_name(curr_elem) - if (curr_name, curr_tag) not in traversed: - traversed.append((curr_name, curr_tag)) + jumps.sort(key=lambda x: x.priority()) - if is_subflow(curr_elem): - subflows.append(index) + # Now we need to decide if the element is terminal. + # + # If a decision + is_terminal = False + if el_tag == 'decisions': + is_terminal = next((x for x in conns.keys() if get_tag(x) == 'defaultConnector'), False) is False - if len(jumps) == 0: - # if there are no more jumps, this is a terminal element - is_optional = True - else: - # sort jumps so nextValue is taken first - jumps.sort(key=lambda x: x.priority()) + elif len(conns) == 1 and list(conns.values())[0][1] is ConnType.Exception: + # we may also have the case where the only outbound connector is a faultHandler + is_terminal = True - return Segment(label=label, - subflows=subflows, - jumps=jumps, - traversed=traversed, - is_terminal=is_optional) + return jumps, is_terminal @dataclass(frozen=True, eq=True, slots=True) @@ -308,47 +302,42 @@ def from_parser(cls, parser: parse.Parser): start_elem = parser.get_start_elem() start_label = get_name(start_elem) visited_labels = [] - visited_elems = set() + visited_elems = [] segment_map = {} to_visit = [start_elem] + # segment label -> jumps that reach it + inbound_jumps = {} while len(to_visit) > 0: curr_elem = to_visit.pop(0) - curr_segment = Segment.build_from_parser(parser=parser, - elem=curr_elem) - - segment_map[curr_segment.label] = curr_segment + curr_segment = Segment.build_from_parser(parser=parser, start_elem=curr_elem) + curr_name = curr_segment.label + segment_map[curr_name] = curr_segment # add segment label to visited if curr_segment.label not in visited_labels: visited_labels.append(curr_segment.label) - - visited_elems.update(curr_segment.traversed) + visited_elems = visited_elems + curr_segment.traversed # update to_visit with new jumps for jmp in curr_segment.jumps: tgt = jmp.target - tgt_elem = parser.get_by_name(tgt) - if tgt not in visited_labels and tgt_elem not in to_visit: - to_visit.append(tgt_elem) - # The resulting Segments are fine except for - # gotos leading to duplicates. These are fixed here. - _fix_duplicates(segment_map) + tgt_elem = parser.get_by_name(tgt) - # Now generate inbound: - inbound = {} + # handle case of missing targets in malformed flows + if tgt_elem is None: + continue - for seg in segment_map.values(): - for jmp in seg.jumps: - if jmp.target in inbound: - inbound[jmp.target].append(jmp) + if tgt not in visited_labels and tgt_elem not in to_visit: + to_visit.append(tgt_elem) + inbound_jumps[tgt] = [jmp] else: - inbound[jmp.target] = [jmp] + inbound_jumps[tgt].append(jmp) return ControlFlowGraph(start_label=start_label, - inbound=inbound, + inbound=inbound_jumps, segment_map=segment_map) def get_crawl_data(cfg: ControlFlowGraph) -> \ @@ -370,7 +359,6 @@ def get_crawl_data(cfg: ControlFlowGraph) -> \ terminal_steps = [] el_2_cs = dict() # mapping el_name to list of crawl steps step = 0 - max_visit = 100 for (visitor, segment) in generator: @@ -402,13 +390,13 @@ def get_crawl_data(cfg: ControlFlowGraph) -> \ if vals is None: el_2_cs[el_name] = [cs] else: - vals.append(cs) + vals.append(cs) # noqa step += 1 return tuple(crawl_steps), tuple(terminal_steps), el_2_cs -def get_visits_statistics(visit_map: dict[str, Jump | None], cfg: ControlFlowGraph): +def get_visits_statistics(visit_map: dict[str, list[Jump] | None], cfg: ControlFlowGraph): # first check that every label has been visited: missed = [] for label in cfg.segment_map: @@ -487,7 +475,6 @@ def _crawl_iter(cfg: ControlFlowGraph) -> Generator[tuple[BranchVisitor, Segment worklist = [] visitor_counts = {} visited_jumps = [] - first_seen_inbound = {} #: segment_label -> visitor.previous_label while len(worklist) > 0 or visitor is not None: if visitor is None and len(worklist) > 0: @@ -516,7 +503,7 @@ def _crawl_iter(cfg: ControlFlowGraph) -> Generator[tuple[BranchVisitor, Segment else: visitor_counts[curr_label] += 1 if visitor_counts[curr_label] > MAX_VISITS_PER_SEGMENT: - logger.critical(f"Attempting to visit {curr_label} {visitor_counts[curr_label]} " + logger.info(f"Attempting to visit {curr_label} {visitor_counts[curr_label]} " f"times, stopping this visitor.") visitor = None continue @@ -525,37 +512,11 @@ def _crawl_iter(cfg: ControlFlowGraph) -> Generator[tuple[BranchVisitor, Segment yield visitor, segment - # todo: cache this - if segment.label == '*': - is_multiple = False - else: - inbounds = cfg.inbound[segment.label] - if len(inbounds) <= 1: - is_multiple = False - - elif curr_label not in first_seen_inbound: - is_multiple = False - first_seen_inbound[curr_label] = prev_label - - else: - is_multiple = prev_label != first_seen_inbound[curr_label] - - next_visitors = segment.accept(visitor, multiple_inbound=is_multiple) + next_visitors = segment.accept(visitor) if next_visitors is None or len(next_visitors) == 0: visitor = None - """ - # no more visitors means current branch is exhausted - # if the current branch was not visited, then yield it now - history = visitor.history + ((visitor.previous_label, visitor.current_label),) - last_visitor = dataclasses.replace(visitor, - previous_label=label, - current_label=None, - history=history - ) - yield last_visitor, segment - visitor = None - """ + else: # depth-first search so take first branch and assign as current visitor = next_visitors[0] @@ -600,96 +561,6 @@ def _find_segments_with_elem(val: str, segment_map: dict[str, Segment]) -> list[ return to_return - -def _fix_duplicates(segment_map: dict[str, Segment]) -> None: - """segment surgery to merge duplicate paths - - Sometimes we have:: - - segment 1: A->B->C - segment 2: X->A->B->C - - Which should be turned into:: - - segment 1: A->B->C - segment 2': X :jump A - - Or if we have:: - - segment 3: X->Y->A - segment 4: W->B->A - - Then this should be merged into: - - segment 3': X->Y jump A - segment 4': W->B jump A - new segment: A - - Args: - segment_map: label -> Segment - - Returns: - None. (Segments updated in place) - """ - crawled = [] - segments = segment_map.values() - for segment in segments: - crawled = crawled + segment.traversed - - dupes = {x for x in crawled if crawled.count(x) > 1} - if len(dupes) == 0: - return - # el: string name of dupe flow element - # val: list (segment, index of traversed in segment) - processed = [] - for val in dupes: - if val in processed: - continue - - dupes = _find_segments_with_elem(val, segment_map) - new_segment = None - - # (segment, index) - for (label, segment, val_index) in dupes: - if val_index == 0: - # the dupe *starts* a segment, so it is the entire segment - new_segment = segment - else: - # the dupe is partway through the segment - subflows = [x for x in segment.subflows if x < val_index] - new_jump = Jump(src_name=segment.traversed[val_index - 1][0], - target=val[0], - is_loop=False, - is_goto=False, - is_no_more_values=False, - is_fault=False - ) - # replace the segment - segment_map[label] = Segment(label=segment.label, - traversed=segment.traversed[:val_index], - subflows=subflows, - jumps=[new_jump], - is_terminal=False) - # now, make the jump target - if new_segment is not None: - # we already have it, no need to add it. - pass - else: - # make it. All dupes of the same value must end in the same way - # so take the first - (seg_index, segment, val_index) = dupes[0] - new_segment = Segment(label=val[0], - traversed=segment.traversed[val_index:], - subflows=[x for x in segment.subflows if x >= val_index], - jumps=segment.jumps, - is_terminal=segment.is_terminal) - - segment_map[val[0]] = new_segment - - # add all the traversed elems to processed - # so we don't make more new segments unnecessarily - processed = processed + new_segment.traversed - class CrawlEncoder(json.JSONEncoder): def default(self, obj): if (isinstance(obj, JSONSerializable) or isinstance(obj, BranchVisitor) @@ -739,16 +610,23 @@ def __init__(self, total_steps: int, cfg: ControlFlowGraph, #: crawl_step -> last seen ancestor self.history_maps: dict[tuple[tuple[str, str], ...], CrawlStep] = history_maps or {} - #: previous crawlers, None if this is the first - #: if we are 3 frames deep, this is descending order: history = [(crawler 2, int 2), (crawler 1, int 1)] - self.crawler_history: list[tuple[Crawler, int]] | None = None + #: previous subflow/action elements that spawned the current crawler. + # None if this is the first. if we are 3 frames deep, this is descending order: + # history = [(parent_subflow, parent_path), (grandparent_subflow, grandparent_path), ...] + self.subflow_parents: list[tuple[El, str]] | None = None #: file path self.flow_path: str | None = flow_path - #: + #: map from element to all crawl_steps in which it has been crawled self.el_2_cs: dict[str, list[CrawlStep]] | None = el_2_cs + #: traversable elem name -> list of other elements that point to it + self.traversable_inbound: dict[str, list[str]] | None = None + + #: all traversable element tuples (names, tags) connected to the start + self.crawlable_elem_tuples: list[tuple[str, str]] | None = None + @classmethod def from_parser(cls, parser: parse.Parser): """Builds a crawl schedule (recommended builder) @@ -774,94 +652,20 @@ def from_parser(cls, parser: parse.Parser): el_2_cs=el_2_cs ) - def get_control_influence_from_source(self, influenced_var: str, - source_var: var_t) ->tuple[var_t, ...] | None: - """Both the influenced and source variables are top level flow elements. - The influenced variable is in the current flow path, the source variable may be in - a different flow path (so we need the tuple (path, varname)). If the source variable control - influences the influenced_variable, then a chain of (path, element name) will be returned starting - at the source and leading to the influenced variable. The chain only contains branches - and subflow chains, not every step, but every step could be reconstructed if desired - by adding in the segment traversals. - - This must be run at every frame load, because a subflow may be loaded multiple times, with a larger - set of control influencers each time it is called. - - For example, in frame A, we have start --> branch 1, branch 2, and each branch may call the same subflow. - So a given element in the subflow will be control influenced by branch 1 the first time it is called, - and by both branch 1 and branch 2 the second time it is entered. So to get a global control - influencing answer, you need to call this function on every subflow load. You do not need to wait - until a given subflow is fully crawled, as the full crawl info is generated by the parser when the - flow is loaded. - - Args: - influenced_var (str): top level (traversable) flow element name in the flow crawled by this current crawler. - source_var (str, str): flow_path, element name in either the current flow or in another flow that may or - may not be an ancestor in the call chain. - - Returns: - None if there is no influence, or a set of crawl steps linking the source to the influenced. - Only a single chain of crawl steps is returned, there may be other control influence chains. - - """ - if source_var is None or influenced_var is None: - return None - - # case 1: (Local Analysis) everything is in the same flow - src_path = source_var[0] - if self.flow_path == src_path: - if influenced_var == source_var[1]: - # trivial case - return (src_path, influenced_var) - - res = self._get_local_control(influenced_var, source_var) - if res is None: - return None - else: - return tuple([(src_path, x) for x in res]) - - # case 2: the source is in a subflow descendent. Because all elements - # are connected to the start, we only care about the chain of start elements/subflow - # elems connecting to the source elem. - - # set of (call-chain height, (crawler, crawler_step_index)) - candidates = [(index, x) for (index, x) in enumerate(self.crawler_history) if x[0].flow_path == src_path] - - if len(candidates) == 0: - return None - else: - for index, (crawler, step_index) in candidates: - tail = self._get_local_control(crawler.crawl_schedule[step_index].element_name, source_var[1]) - if tail is not None: - result = [(self.flow_path, influenced_var)] - # we have a chain from source -> subflow that exited the frame. - # Now, fill in until we get to the start element of the current frame. - for i in range(index): - c = self.crawler_history[i][0] - step = self.crawler_history[i][1] - elem = c.crawl_schedule[step].element_name - path = c.flow_path - result.append((path, elem)) - - [result.append((src_path, el)) for el in tail] - return tuple(result) - - return None - def get_crawl_schedule(self)->tuple[CrawlStep, ...]: return self.crawl_schedule def get_flow_path(self) -> str | None: return self.flow_path - def get_crawler_history_unsafe(self) -> list[tuple[Crawler, int]]: + def get_subflow_parents(self) -> list[tuple[El, str]]: """READ ONLY Returns: history of crawlers encountered during crawl, together with the current step (int) when they entered a child flow. """ - return self.crawler_history + return self.subflow_parents def get_cfg(self)-> ControlFlowGraph: return self.cfg @@ -917,7 +721,7 @@ def get_last_ancestor(self, crawl_step) -> CrawlStep | None: return res def get_elem_to_crawl_step(self, elem_name: str) -> list[CrawlStep]: - """returns a list of all crawlsteps in which this element has been visited + """returns a list of all crawl steps in which this element has been visited during the crawl of this flow. If not visited, the empty list is returned. Args: @@ -934,39 +738,71 @@ def get_elem_to_crawl_step(self, elem_name: str) -> list[CrawlStep]: else: return dict.get(self.el_2_cs, elem_name, list()) - def _get_local_control(self, influenced_el: str, influencer_el) -> tuple[var_t, ...] | None: - sink_crawl_steps = self.el_2_cs.get(influenced_el, []) - source_crawl_steps = self.el_2_cs.get(influencer_el, []) + def get_crawlable_elem_tuples(self) -> list[tuple[str, str]] | None: + """Returns all traversable element name, tag tuples that are connected to the start element + """ + if self.crawlable_elem_tuples is None: + accum = [] + for seg in self.cfg.segment_map.values(): + accum = accum + seg.traversed - if len(sink_crawl_steps) == 0 or len(source_crawl_steps) == 0: + self.crawlable_elem_tuples = accum + return self.crawlable_elem_tuples + + def get_call_chain(self, source_el: El, source_path: str, + sink_el: El, source_parser: FlowParser) -> list[tuple[El, str]] | None: + """sink_el must be in the current flow. source_el can be in an ancestor + flow. Only returns paths currently crawled, so this must be called + every time a specific frame is loaded. + + Returns: + A list starting with the source and ending with the sink in which the each is an + ancestor caller of the succeeding element. + [(element, element flow path)] + + """ + source_el_tag = parse_utils.get_tag(source_el) + source_el_name = parse_utils.get_name(source_el) + sink_el_name = parse_utils.get_name(sink_el) + + if source_el_tag in parse_utils.START_ELEMS: + local_source_influenced = [x[0] for x in self.get_crawlable_elem_tuples()] + else: + local_source_influenced = source_parser.get_traversable_descendents_of_elem(source_el_name) + + if not local_source_influenced: return None + + if source_path == self.flow_path: + if sink_el_name in local_source_influenced: + if sink_el_name == source_el_name: + return [(source_el, source_path)] + else: + return [(source_el, source_path), (sink_el, source_path)] + else: + return None + else: - for source in source_crawl_steps: # - # Because of how this info is built, the first element is likely - # to have the smallest branch history, which speeds things up. - sink = sink_crawl_steps[0] - - sin_l = len(sink.visitor.history) - src_l = len(source.visitor.history) - if sin_l > src_l and (sink.visitor.history[0:len(source.visitor.history)] == source.visitor.history): - - # we choose jump target arbitrarily - it doesn't matter - # as long as we are consistent since this is for the auditor's own info - return ((source.element_name,) + tuple([x[1] for x in sink.visitor.history[src_l + 1:]]) - + (sink.element_name,)) - - elif sin_l == src_l and sink.visitor.history == source.visitor.history: - # Both the sink and source are already on the same segment, so - # we only need to check if src is dominant. - if sink.local_index > source.local_index: - # sink is downstream of source, so source dominates - return source.element_name, sink.element_name - else: - return None + # sink is in a subflow of the source + if not self.subflow_parents: + return None - return None + reversed_subs = list(reversed(self.subflow_parents)) + subs_start_in_src = [(index, sub_entry) for + (index, sub_entry) in enumerate(reversed_subs) + if (sub_entry[1] == source_path and parse_utils.get_name(sub_entry[0]) in local_source_influenced)] -def dump_cfg(cfg: ControlFlowGraph, fp: TextIO) -> None: + if not subs_start_in_src: + return None + else: + # take last one, which is the shortest path of subflow calls + # from source to current location + index, x = subs_start_in_src[-1] + result_path = reversed_subs[index:] + [(sink_el, self.flow_path)] + return result_path + + +def dump_cfg(cfg: ControlFlowGraph, fp: SupportsWrite[str]) -> None: """Writes to file pointer Args: @@ -990,11 +826,20 @@ def validate_cfg(cfg: ControlFlowGraph, for segment in cfg.segment_map.values(): crawled_elems = crawled_elems + segment.traversed - # ..check there are no missing crawlable elements + # ..check there are elements not in the cfg missing = [x for x in all_elem_tuples if x not in crawled_elems] + # make sure this is not a disconnected flow: + inbound = parser.get_traversable_inbound() + is_disconnected = next((x is not None for x in inbound if + (not inbound[x] and x != '*')), False) + if is_disconnected is True: + not_orphaned = [] + else: + not_orphaned = missing + if missing_only: - return missing + return not_orphaned else: # continue to gather other statistics @@ -1003,21 +848,21 @@ def validate_cfg(cfg: ControlFlowGraph, # ..check there are no duplicates duplicates = [x for x in crawled_elems if counts[x] > 1] - if len(duplicates) != 0: + if len(duplicates) != 0 or len(not_orphaned) != 0: valid = False print("invalid crawl info") for x in duplicates: print(f"duplicate: {x}") + for x in not_orphaned: + # some flows include disconnected elements that can't be crawled. + print(f"caution missing element found: {x}") else: valid = True - for x in missing: - # some flows include disconnected elements that can't be crawled. - print(f"caution missing element found: {x}") return valid -def _get_connector_map(elem: ET.Element, - parser: Parser) -> dict[ET.Element, tuple[str, ConnType, bool]]: +def _get_connector_map(elem: El, + parser: Parser) -> dict[El, tuple[str, ConnType, bool]]: """ Wrapper for getting connectors that handles start elements and missing connector targets, which requires a parser. @@ -1038,6 +883,7 @@ def _get_connector_map(elem: ET.Element, def tuple_trace(x: tuple[tuple[str, str], ...]) -> frozenset[tuple[str, str]]: return frozenset([t for t in x]) + def _right_find(my_iter: tuple[str, ConnType], val_to_find) -> int: """ returns -1 if val_to_find is not in the second value of my_iter diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/executor.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/executor.py index 61c907fc..0dcba66b 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/executor.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/executor.py @@ -12,29 +12,26 @@ import logging import os import traceback -from typing import TYPE_CHECKING, Any, TypeAlias +from datetime import datetime +from typing import Any +from typing import TypeAlias import flow_parser.parse as parse import flow_scanner.control_flow as crawl_spec +import flow_scanner.flows as flows +import public.custom_parser as CP import public.parse_utils +from flow_scanner import wire from flow_scanner.branch_state import BranchState from flow_scanner.control_flow import Crawler, ControlFlowGraph +from flow_scanner.flow_result import ResultsProcessor as Results from flow_scanner.flows import FlowVector from flow_scanner.query_manager import QueryManager, QueryAction from flow_scanner.util import Resolver from public import parse_utils from public.flow_scanner_exceptions import InvalidFlowException -if TYPE_CHECKING: - from public.parse_utils import ET - -from datetime import datetime - -import flow_scanner.flows as flows - -from flow_scanner import wire -from flow_scanner.flow_result import ResultsProcessor as Results - +El: TypeAlias = CP.ET.Element #: Controls whether subflows are followed or not, useful for debugging. FOLLOW_SUBFLOWS: bool = True @@ -162,11 +159,10 @@ def run(self) -> QueryManager: # nothing on the stack, so perform final queries # and exit processing all_states = _consolidate_collected_frames(self.__collected_frames) - # empty - self.__collected_frames = [] self.query_manager.final_query(all_states=all_states) - # delete old states + # delete old states + self.__collected_frames = [] return self.query_manager else: @@ -183,6 +179,14 @@ def run(self) -> QueryManager: self.current_frame.state.filter_input_variables(all_outputs) ) + # add any new user inputs found in the child frame to the parent's parser + tainted = next_frame.state.parser.tainted_inputs + if not tainted: + tainted = self.current_frame.parser.get_tainted_inputs() + else: + tainted.update(self.current_frame.parser.get_tainted_inputs()) + next_frame.state.parser.tainted_inputs = tainted + # now switch execution to new frame self.current_frame = next_frame @@ -211,11 +215,11 @@ def is_circular_reference(self, next_frame: Frame) -> bool: if seen: logger.critical(f"found circular reference in {next_frame.flow_path}") - self.query_manager.lexical_accept("CyclicSubflow", - next_flow_path=flow_path, - current_frame=self.current_frame, - matching_frame=matching_frame, - all_frames=self.__frame_stack) + self.query_manager.static_accept("CyclicSubflow", + next_flow_path=flow_path, + current_frame=self.current_frame, + matching_frame=matching_frame, + all_frames=self.__frame_stack) return seen @@ -359,7 +363,7 @@ def __init__(self, current_flow_path: str | None = None, resolver: Resolver = No self.crawler: Crawler | None = None #: Subflow XML element that launched this flow (can be None) - self.parent_subflow: ET.Element | None = None + self.parent_subflow: El | None = None #: binary semaphore so when we return, we don't execute spawn the subflow again self.child_spawned: bool = False @@ -403,7 +407,7 @@ def __init__(self, current_flow_path: str | None = None, resolver: Resolver = No def build(cls, current_flow_path: str | None = None, resolver: Resolver = None, resolved_subflows: dict[Any, Any] = None, - parent_subflow: ET.Element = None, + parent_subflow: El = None, query_manager: QueryManager = None) -> Frame: """Call this whenever program analysis starts or a subflow is reached @@ -545,7 +549,7 @@ def get_consolidated_output_vars(self) -> dict[tuple[str, str], flows.FlowVector return new_map - def spawn_child_frame(self, subflow: ET.Element, + def spawn_child_frame(self, subflow: El, sub_path: str, input_map: dict[str, str], vector_map: dict[tuple[str, str], flows.FlowVector] @@ -602,20 +606,20 @@ def spawn_child_frame(self, subflow: ET.Element, src2tgt_variable_map=input_map, transition_elem=subflow) # propagate crawl history to child - history = self.crawler.get_crawler_history_unsafe() - last_index = self.crawler.get_current_step_index()-1 # index always points to *next* step + parents = self.crawler.get_subflow_parents() - if history is None: - new_history = [(self.crawler, last_index)] + parent = (subflow, self.flow_path) + if parents is None: + new_parents = [parent] else: - new_history = history.insert(0, (self.crawler, last_index)) + new_parents = parents.insert(0, parent) - new_frame.crawler.crawler_history = new_history + new_frame.crawler.subflow_parents = new_parents self.child_spawned = True return new_frame - def handle_subflows(self, current_elem: ET.Element) -> Frame | None: + def handle_subflows(self, current_elem: El) -> Frame | None: """Checks whether we have encountered a subflow elem. Different behavior required if we are returning from the element or entering into it. @@ -759,7 +763,7 @@ def parse_flow(flow_path: str, query_module_path: str = None, query_class_name: str = None, query_preset: str = None, - optional_queries: list[str] | None = None, + queries: list[str] | None = None, query_manager: QueryManager | None = None, crawl_dir: str = None, resolver: Resolver = None, @@ -776,7 +780,7 @@ def parse_flow(flow_path: str, query_module_path: path of module where custom queries are stored query_class_name: name of query class to instantiate query_preset: name of preset to run - optional_queries: list of optional queries to run + queries: list of optional queries to run query_manager: existing instance that invokes queries across entire run. Start with None and one will be created. crawl_dir: directory of where to store crawl specifications @@ -788,9 +792,48 @@ def parse_flow(flow_path: str, or passed to other flows. """ - # build parser + """ + Bootstrap and overall control flow: + + parse_flow scans only a single flow file and its subflows. Scanning of entire + directories is handled in __main__, by repeatedly calling this function. This allows + for (in the future) multi-process/multi-threaded operation as we can scan large + directories in parallel. It also supports chunking. + + One cost is that we pass the (common) results file back and forth. In reality we pass the + query manager (which holds the requested queries) as well as the results object. + + When __main__ has finished scanning all the files, it asks the ResultsProcessor to generate a report. + Now, with chunking, __main__ may force incremental report generation. + + Bootstrap -- first flow to subflow + ---------------------------------- + + 1. parse file to create parser + 2. create QueryManager if it is None, passing in the parser + 3. if QueryManager is not None, update it with new parser for the current file + 4. Pass QueryManager into constructor for Stack + 5. Stack creates a frame for the current file, passing it QueryManager + 6. Frame grabs parser from Query Manager and uses it to generate a CFG and crawl specification (control flow) + 7. Frame creates a new State (dataflow state) for the current file, and gives it the parser. + 8. Now Frame holds a reference to parser and query manager and results. State holds a reference to parser. + 9. On spawning a child frame, the Stack uses the old frame to spawn a child frame. + 10 The child frame creates a new parser from the new flow file, and *updates it* with + the old parser info when necessary. Subflows are not the same as flows. + 11. On return from a function call, the current frame is put into the processed queue + and the previous frame (with the original parser) is popped. The original parser is *updated* + with the data from the child frame, if necessary. + 12. when the flow is finished processing, the query manager is returned to __main__ + + We are going to replace this flow with something a bit more modern in the future, this + work is scheduled for when we replace the crawler. + + """ + + # 1. build parser (only use builder) parser = parse.Parser.from_file(filepath=flow_path) + # Special case handling if user wants a CFG. if crawl_dir is not None: cfg = ControlFlowGraph.from_parser(parser) schedule = crawl_spec.get_crawl_data(cfg) @@ -804,28 +847,35 @@ def parse_flow(flow_path: str, crawl_spec.dump_cfg(cfg, fp) if query_manager is None: - # 1. build result processor - results = Results(requestor=requestor, report_label=report_label, + # 1. build query manager. Only use the builder. + query_manager = QueryManager.build(parser=parser, + requested_preset=query_preset, + requested_queries=queries, + external_module_path=query_module_path, + external_class_names=query_class_name, + debug_arg_str=debug_query) + + # 2. Generate a preset from user requested preset and any additional queries + preset = query_manager.generate_effective_preset() + + + # 3. build result processor and pass it the effective preset + results = Results(preset=preset, requestor=requestor, report_label=report_label, result_id=result_id, service_version=service_version, help_url=help_url) results.scan_start = str(datetime.now())[:-7] - # 2. build parser. This will also populate basic data - parser = parse.Parser.from_file(filepath=flow_path) - # 3. build query manager - query_manager = QueryManager.build(results=results, - parser=parser, - requested_preset=query_preset, - additional_queries=optional_queries, - module_path=query_module_path, - class_name=query_class_name, - debug_query=debug_query) + # 4. Assign results processor to query manager + query_manager.results = results + else: - # we are continuing a run, so update parser to work on new file + # we are continuing a run as the query manager has been passed back to us, + # so update parser to work on new flow file + # and keep existing query instances and existing results processor. query_manager.parser = parser - # build stack + # 5. build stack try: stack = Stack(root_flow_path=flow_path, resolver=resolver, @@ -840,7 +890,9 @@ def parse_flow(flow_path: str, # update scan end time query_manager.results.scan_end = str(datetime.now())[:-7] - # return back to __main__, which may scan again with another file + # return back to __main__, which may scan again with another file, + # in which case scan_end will be overwritten. + # TODO: This scan overwriting logic should be moved to __main__. return query_manager @@ -854,7 +906,7 @@ def report(state: BranchState, current_step: int, total_steps: int) -> None: logger.debug(msg) -def get_output_variable_map(subflow_elem: ET.Element, subflow_output_vars: list[var_g]) -> dict[str, str]: +def get_output_variable_map(subflow_elem: El, subflow_output_vars: list[var_g]) -> dict[str, str]: # output_variable_map: child name --> parent name the child influences auto, output_variable_map = public.parse_utils.get_subflow_output_map(subflow_elem) if auto: diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_result.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_result.py index 89fcd10e..572f1495 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_result.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/flow_result.py @@ -6,6 +6,7 @@ from __future__ import annotations import copy +import dataclasses import json import logging import sys @@ -27,6 +28,8 @@ logger = logging.getLogger(__name__) + + class ResultsProcessor(object): """Class storing all the information necessary for a report. @@ -276,14 +279,14 @@ def gen_result_dict(self) -> dict[str, dict[str, str]]: dictionary of the form:: query_id -> {flow: tuple of DataInfluenceStatements or None (in case this is a dataflow) - query_name: (human_readable), - counter: (fake similarity id), - elem: source code of element, - elem_name: name of Flow Element, - elem_code: source code of element, - elem_line_no: line number of element, - field: name of influenced variable (if any) within the element, - } + query_name: (human_readable), + counter: (fake similarity id), + elem: source code of element, + elem_name: name of Flow Element, + elem_code: source code of element, + elem_line_no: line number of element, + field: name of influenced variable (if any) within the element, + } """ @@ -300,12 +303,16 @@ def gen_result_dict(self) -> dict[str, dict[str, str]]: src_code = query_result.elem_code src_line = query_result.elem_line_no flow_type = query_result.flow_type.name - + file_name = query_result.filename + elem_name = query_result.elem_name if end_stmt is not None: - src_code_end = clean_string(end_stmt.source_text) - elem_name_end = end_stmt.element_name + src_code = clean_string(end_stmt.source_text) + elem_name = end_stmt.element_name field_end = end_stmt.influenced_var + src_line = end_stmt.line_no + file_name = end_stmt.flow_path + else: src_code_end = None elem_name_end = None @@ -315,16 +322,16 @@ def gen_result_dict(self) -> dict[str, dict[str, str]]: if query_path not in accum: accum[query_path] = [] - to_append = {"query_name": query_desc.query_name, + to_append = {"query_id": query_desc.query_id, + "query_name": query_desc.query_name, "severity": str(query_desc.severity), "description": query_desc.query_description, "counter": self.counter, - "elem": clean_string(query_result.elem_name), - "elem_name": query_result.elem_name or elem_name_end, - "field": query_result.field or query_result.elem_name or field_end, - "elem_code": src_code or src_code_end, + "elem_name": elem_name, + "field": field_end or elem_name, + "elem_code": src_code, "elem_line_no": src_line, - "filename": query_result.filename, + "filename": file_name, "flow_type": flow_type} if query_result.paths is None or len(query_result.paths) == 0: @@ -355,11 +362,15 @@ def gen_result_dict(self) -> dict[str, dict[str, str]]: statements = [] for path_ in query_result.paths: - pruned_history = tuple([x for x in path_.history if x.source_text != "[builtin]"]) - if path_.history[-1] != end_stmt and end_stmt.source_text != "[builtin]": - statements.append(pruned_history + (end_stmt,)) + pruned_history = tuple( [fix_names(x) for x in path_.history if x.source_text != "[builtin]"]) + + if end_stmt is not None: + if path_.history[-1] != end_stmt and end_stmt.source_text != "[builtin]": + statements.append(pruned_history + (end_stmt,)) + else: + statements.append(pruned_history + (end_stmt,)) else: - statements.append(pruned_history + (end_stmt,)) + statements.append(pruned_history) # Now we have our statements normalized and are prepared to render dataflows for path_ in statements: @@ -439,6 +450,9 @@ def render_normal_dataflow_html(statements: tuple[InfluenceStatement, ...], flow def render_html_pathnode(filename: str, flow_type: str, influenced_var: str, line: int, node_id: int, code: str) -> str: + if influenced_var == '*': + influenced_var = 'start' + result_str = f"{ESAPI.html_encode(filename)}" result_str += f"{flow_type}" result_str += f"{line}" @@ -495,3 +509,18 @@ def _validate_qr(qr_list: list[QueryResult]) -> list[QueryResult] | None: return to_return +def fix_names(x: InfluenceStatement) -> InfluenceStatement: + new_influenced = None + new_influencer = None + + if x.influenced_var == '*': + new_influenced = 'start' + + elif x.influenced_var == '*': + new_influencer = 'start' + + return dataclasses.replace(x, + influencer_var=new_influencer or x.influencer_var, + influenced_var=new_influenced or x.influenced_var) + + diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/query_manager.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/query_manager.py index ddd20272..42be36d2 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/query_manager.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/query_manager.py @@ -11,40 +11,85 @@ import os import traceback import types -from importlib import machinery +from importlib import machinery, reload from typing import Any import queries.default_query import queries.optional_query -import queries.debug_query - from flow_parser.parse import Parser from flow_scanner.util import case_insensitive_match from flow_scanner.control_flow import Crawler from flow_scanner.flow_result import ResultsProcessor -from public.contracts import QueryProcessor, State, AbstractQuery, AbstractCrawler, Query, LexicalQuery -from public.data_obj import Preset, PresetEncoder +from public.contracts import State, AbstractCrawler, Query, LexicalQuery, FlowParser +from public.data_obj import Preset, PresetEncoder, QueryDescription from public.enums import QueryAction +from queries import debug_query +from queries.debug_query import Detect + logger = logging.getLogger(__name__) +PRESETS = { + 'default': [(queries.default_query, 'PreventPassingUserDataIntoElementWithoutSharing'), + (queries.default_query,'PreventPassingUserDataIntoElementWithSharing'), + (queries.optional_query, 'DbInLoop'), + (queries.optional_query, 'HardcodedId'), + (queries.optional_query, 'SameRecordUpdate'), + (queries.optional_query, 'TriggerEntryCriteria'), + (queries.optional_query, 'UnusedResource'), + (queries.optional_query, 'UnreachableElement'), + (queries.optional_query, 'MissingNextValueConnector'), + (queries.optional_query, 'TriggerWaitEvent'), + (queries.optional_query, 'TriggerCallout'), + ], + 'org':[(queries.default_query, 'PreventPassingUserDataIntoElementWithoutSharing'), + (queries.default_query, 'PreventPassingUserDataIntoElementWithSharing'), + (queries.optional_query, 'DbInLoop'), + (queries.optional_query, 'CyclicSubflow'), + (queries.optional_query, 'MissingFaultHandler'), + (queries.optional_query, 'HardcodedId'), + (queries.optional_query, 'SameRecordUpdate'), + (queries.optional_query, 'TriggerEntryCriteria'), + (queries.optional_query, 'UnusedResource'), + (queries.optional_query, 'UnreachableElement'), + (queries.optional_query, 'MissingNextValueConnector'), + (queries.optional_query, 'TriggerWaitEvent'), + (queries.optional_query, 'TriggerCallout'), + ] +} + # In the future, if other files are added, need to add here -ADDITIONAL_QUERY_MODULES = [ +QUERY_MODULES = [ + (queries.default_query, queries.default_query.QUERIES), (queries.optional_query, queries.optional_query.QUERIES), - (queries.debug_query, queries.debug_query.QUERIES) ] - class QueryManager: - # instance that performs queries and produces results - query_processor: QueryProcessor = None + """ + Lifecycle: QueryManager is instantiated once per invocation of flow_scanner. + That means that if an argument is set to None due to error, it will not be + attempted again in the next flow. + + At the end of a full flow parse (including subflows), the queries are reloaded, e.g. + re-instantiated. But query instances persist across subflows. They persist until reload + is called. + """ + # which built-in queries were requested, combining preset and any optional + requested_query_ids: list[str] | None = None - # stand-alone query_map action -> additional query instance - queries: dict[QueryAction, list[Query | LexicalQuery]] = None + # built-in query_id -> query instance. Includes debug query but not custom queries + queries: dict[str, Query | LexicalQuery] | None = None - # stand-alone query map -> query_id -> query instance - flattened_queries: dict[str, Query | LexicalQuery] | None = None + # custom query_id -> query instance. None if no custom queries were requested. + custom_queries: dict[str, Query | LexicalQuery] | None = None + + # Custom Queries are combined with regular queries and also with the debug query + # action_type -> list of query instances with this type. + action2queries: dict[QueryAction, list[Query | LexicalQuery]] = None + + # used in lexical accept + query_id2module_name: dict[str, str] | None = None # instance that stores results and generates reports results: ResultsProcessor = None @@ -53,122 +98,154 @@ class QueryManager: parser: Parser = None # which preset to request - requested_preset: str = None - - # additional queries to perform - additional_queries: list[str] = None + preset: str = None - # lexical queries only run once per flow visit - visited_flows: list[str] = None + # module loaded from environment + external_query_module: Any = None - query_module: Any = None + # list of class names to run from external module. Must not conflict with + # any requested built-in queries or with the debug query. + # + external_class_names: list[str] | None = None - class_name: str | None = None - - query_id_to_module_name: dict[str, str] | None = None - - debug_msg: str | None = None + # json object that will be passed to the debug query + debug_arg: Any | None = None @classmethod - def build(cls, results: ResultsProcessor, - parser: Parser = None, + def build(cls, + parser: FlowParser, requested_preset: str | None = None, - additional_queries: list[str] | None = None, - module_path: str | None = None, - class_name: str | None = None, - debug_query: str | None = None) -> QueryManager: + requested_queries: list[str] | None = None, + external_module_path: str | None = None, + external_class_names: str | None = None, + debug_arg_str: str | None = None) -> QueryManager: """Only call this once to build Query Manager at scan start """ qm = QueryManager() + qm.parser = parser + if debug_arg_str is not None: + try: + qm.debug_arg = json.loads(debug_arg_str) - if debug_query is not None: - qm.set_debug_query(debug_query) + except: + logger.critical(f"error parsing debug argument\n{traceback.format_exc()}" + f"\n...skipping debug query in this run") + raise - if module_path is not None: + if external_module_path is not None and external_class_names is not None: # try to load requested query - # TODO: add better error handling - query_module = create_module(module_path=module_path) - - qm.query_module = query_module - qm.class_name = class_name - preset, instance = get_instance(query_module_=query_module, - class_name_=class_name, - preset_=requested_preset) - qm.requested_preset = requested_preset + try: + query_module = create_module(module_path=external_module_path) + + qm.external_query_module = query_module + qm.external_class_names = [x.strip() for x in external_class_names.split(",") + if x.strip() != ''] + + except: + msg=("error loading external module\n" + f"{traceback.format_exc()}\n" + f"skipping external query in this run.") + logger.critical(msg) + print(msg) + raise + + if requested_queries is not None and requested_preset is not None: + # use both + qm.preset = requested_preset + qm.requested_query_ids = qm.requested_query_ids + [x[1] for x in PRESETS[requested_preset] + if x[1] not in qm.requested_query_ids] + + elif requested_queries is not None: + # requested preset must be None but user listed queries, so just run those + qm.requested_query_ids = requested_queries else: - # use default - instance = queries.default_query.DefaultQueryProcessor() - preset = instance.set_preset(preset_name=requested_preset) - - if preset is None: - raise RuntimeError(f"The loaded query module does not support preset: {preset or 'No preset provided'}") - - # store pointer to query processor - qm.query_processor = instance - res = build_query_map(additional_queries=additional_queries, debug_msg=debug_query) - qm.queries, qm.flattened_queries, qm.query_id_to_module_name = res - if qm.flattened_queries is not None: - qm.additional_queries = list(qm.flattened_queries.keys()) - # assign preset to results - results.preset = get_updated_preset(preset, additional_query_map=qm.queries) - - # store pointer to results - qm.results = results - qm.parser = parser + # requested queries must be None, so we rely on preset + if requested_preset is None: + requested_preset = 'default' + qm.preset = requested_preset + qm.requested_query_ids = [x[1] for x in PRESETS[requested_preset]] + + qm.queries, qm.custom_queries, qm.action2queries, qm.query_id2module_name = build_query_maps( + requested_queries=qm.requested_query_ids, + external_module=qm.external_query_module, + external_classnames=qm.external_class_names, + debug_arg=qm.debug_arg + ) return qm - def reload(self): - """Make a new instance of the queries after completing one flow + def generate_effective_preset(self)-> Preset: + """ + + Returns: The list of query descriptions that will actually be run, combining + the preset field selected by the caller and any additional queries selected + by the caller - Returns: - None """ + q = [] + if self.queries: + q += list(self.queries.values()) + if self.custom_queries: + q += list(self.custom_queries.values()) + + query_desc = [instance.get_query_description() for instance in q] + + if self.debug_arg: + query_desc.append(debug_query.Detect.get_query_description()) + + preset_name = self.preset or "custom" + + return Preset(preset_name=preset_name, preset_owner=None, + queries=set(query_desc)) + - if self.query_module is None or self.class_name is None: - # use default - self.query_processor = queries.default_query.DefaultQueryProcessor() - return - else: - preset, instance = get_instance(self.query_module, - self.class_name, self.requested_preset) - self.query_processor = instance - self.queries, self.flattened_queries, self.query_id_to_module_name = build_query_map( - additional_queries=self.additional_queries, debug_msg=self.debug_msg - ) def lexical_query(self, parser: Parser, crawler: AbstractCrawler=None) -> None: - if self.additional_queries is None: - return None - if QueryAction.lexical not in self.queries: + if self.queries is None or QueryAction.lexical not in self.action2queries: return None - flow_path = parser.flow_path - if self.visited_flows is not None and flow_path in self.visited_flows: - return None - else: - if self.visited_flows is None: - self.visited_flows = [flow_path] - else: - self.visited_flows.append(flow_path) - to_run = self.queries[QueryAction.lexical] - for qry in to_run: + to_run = self.action2queries[QueryAction.lexical] + for qry in to_run: + try: res = qry.execute(parser=parser, crawler=crawler) if res is not None: self.results.add_results(res) - return None + except: + logger.critical(f"error executing lexical query in flow " + f"{parser.flow_path} {traceback.format_exc()}") + return None - def lexical_accept(self, query_id, **kwargs) -> None: + def static_accept(self, query_id, **kwargs) -> None: + """Calls the (static) 'accept' method of this query. The query must + override the static 'accept' method of the abstract class. Expert + use only. - if self.additional_queries is not None and query_id in self.additional_queries: - mod_name = self.query_id_to_module_name[query_id] - qry_class = getattr(mod_name,query_id) + The purpose of accept methods is to record issues found in the course + of normal scanning and parsing, and not as a result of running queries. - res = getattr(qry_class, 'accept')(**kwargs) + Because of this, we are accepting issues found and merely reformatting + them into the appropriate query result. But if this query is not requested, + then it will not override the parent accept which is a null op. - if res is not None: - self.results.add_results(res) + Args: + query_id (str): Name of class that has the static accept method + **kwargs (Any): Keyword args to pass + + Returns: + Query Description + + """ + if self.queries is not None and query_id in self.queries: + mod_name = self.query_id2module_name[query_id] + qry_class = getattr(mod_name,query_id) + try: + res = getattr(qry_class, 'accept')(**kwargs) + if res is not None: + self.results.add_results(res) + except: + logger.critical(f"error processing lexical accept " + f"{traceback.format_exc()}") else: logger.info(f"The query id {query_id} is not recognized as a requested lexical query id") @@ -182,67 +259,81 @@ def query(self, action: QueryAction, state: State, crawler: Crawler = None) -> N Returns: None + """ - # TODO: add exception handling and logging as this is third party code # when we first enter a state, there is a start elem which is not assigned and so curr elem is None. # don't look for sinks into these start states. - if action is QueryAction.process_elem and state.get_current_elem() is not None: - - res = self.query_processor.handle_crawl_element(state=state, crawler=crawler) - if res is not None: - self.results.add_results(res) - - elif action is QueryAction.flow_enter: - res = self.query_processor.handle_flow_enter(state=state, crawler=crawler) - # TODO: better validation of result - if res is not None: - self.results.add_results(res) - - self._run_additional_queries(action=action, state=state, - crawler=crawler, all_states=None) - + if action is QueryAction.process_elem and state.get_current_elem() is None: + return None + else: + self.run_queries(action=action, state=state, + crawler=crawler, all_states=None) + return None def final_query(self, all_states: tuple[State]=None) -> None: - res = self.query_processor.handle_final(all_states=all_states) - # TODO: better validation of result - if res is not None: - self.results.add_results(res) - self._run_additional_queries(action=QueryAction.scan_exit, - all_states=all_states) - - # delete old query instance and reload for next flow to process + self.run_queries(action=QueryAction.scan_exit, + all_states=all_states) + + # delete old query instances, modules, and reload for next flow to process self.reload() - # delete old states def accept(self, query_id: str, **kwargs) -> None: - if query_id not in self.additional_queries: + if query_id not in self.queries: return None - qry = self.flattened_queries[query_id] + qry = self.queries[query_id] + try: + res = qry.accept(**kwargs) + if res is not None: + self.results.add_results(res) + except: + logger.critical(f"error handling accept query {traceback.format_exc()}") - res = qry.accept(**kwargs) - if res is not None: - self.results.add_results(res) return None def debug_query(self, msg: str): - self.debug_msg = msg + self.debug_arg = msg - def _run_additional_queries(self, action: QueryAction, state: State=None, - crawler: AbstractCrawler=None, all_states: tuple[State]=None) -> None: - if self.additional_queries is None: + def run_queries(self, action: QueryAction, state: State=None, + crawler: AbstractCrawler=None, all_states: tuple[State]=None) -> None: + if self.action2queries is None: return None - if action not in self.queries: + if action not in self.action2queries: return None else: - to_run = self.queries[action] + to_run = self.action2queries[action] for qry in to_run: - res = qry.execute(state=state, crawler=crawler, all_states=all_states) - if res is not None: - self.results.add_results(res) + try: + res = qry.execute(state=state, crawler=crawler, all_states=all_states) + if res is not None: + self.results.add_results(res) + except: + logger.critical(f"error executing query in flow {state.get_parser().get_filename()}" + f"\n {traceback.format_exc()}") + return None + def reload(self): + """Make a new instance of the queries after completing one flow + + Returns: + None + """ + # reload internal modules + for mod_ in QUERY_MODULES: + reload(mod_[0]) + + # reload any external module + if self.external_query_module: + reload(self.external_query_module) + + self.queries, self.custom_queries, self.action2queries, self.query_id2module_name = build_query_maps( + requested_queries=self.requested_query_ids, + external_module=self.external_query_module, + external_classnames=self.external_class_names, + debug_arg=self.debug_arg + ) def create_module(module_path: str) -> Any: """Loads and Instantiates QueryProcessor @@ -284,118 +375,164 @@ def create_module(module_path: str) -> Any: logger.critical(f"ERROR: could not load module {filename}: {traceback.format_exc()}") raise e +def build_query_maps( + requested_queries: list[str] | None=None, + external_module: Any | None = None, + external_classnames: list[str] | None = None, + debug_arg: Any | None=None +) -> tuple[ + dict[str, Query | LexicalQuery] | None, + dict[str, Query | LexicalQuery] | None, + dict[QueryAction, list[Query | LexicalQuery]] | None, + dict[str,str] +]: + """Instantiates queries and places them into convenient map structures + + Args: + requested_queries: list of validated built in queries + external_module: (loaded) external module reference + external_classnames: list of classnames in external module + debug_arg: json obj corresponding to argument + + Returns: + queries (id -> instance), + custom_queries (id -> instance), + action2queries (actionType -> List[QueryInstance] + query_id2module_name (str -> str) -def get_instance(query_module_, class_name_, preset_): - if query_module_ is None: - query_instance = queries.default_query.DefaultQueryProcessor() - - else: - try: - query_instance = getattr(query_module_, class_name_)() - - except Exception as e: - logger.critical(f"ERROR: could not instantiate module") - raise e - - try: - accepted_preset = query_instance.set_preset(preset_) - if accepted_preset is None: - raise ValueError("Could not set preset") + """ + built_in_id2instance = {} # only for builtin + custom_id2instance = {} # only for custom + action2queries = {} # for everything, including debug + id2module = {} # for everything except debug + + if requested_queries: + for (my_module, qry_map) in QUERY_MODULES: + for qry_id in requested_queries: + if qry_id in qry_map: + populate_maps_from_instance( + qry_id, + my_module, + id2instance=built_in_id2instance, + id2module=id2module, + action2queries=action2queries + ) + + if external_classnames: + for class_ in external_classnames: + populate_maps_from_instance( + qry_id=class_, + my_module=external_module, + id2instance=custom_id2instance, + id2module=id2module, + action2queries=action2queries + ) + if debug_arg: + instance = Detect(debug_arg) + # add to self.queries, debug classname is 'Detect' + built_in_id2instance['Detect'] = instance + # add to action2queries + populate_action2queries(action2queries, instance) + + return built_in_id2instance, custom_id2instance, action2queries, id2module + +def populate_maps_from_instance(qry_id, my_module, + id2instance, + id2module, + action2queries)-> None: + qry_instance = getattr(my_module, qry_id)() + id2instance[qry_id] = qry_instance + id2module[qry_id] = my_module + populate_action2queries(action2queries, qry_instance) + + +def populate_action2queries(action2queries: dict[QueryAction,list[LexicalQuery | Query]], + instance: Query|LexicalQuery|Detect) -> None: + for action in instance.when_to_run(): + if action not in action2queries: + action2queries[action] = [instance] else: - return accepted_preset, query_instance - - except Exception as e: - logger.critical(f"ERROR: could not set preset: {traceback.format_exc()}") - raise e + action2queries[action].append(instance) -def build_query_map(additional_queries: list[str] | None=None, - debug_msg: str|None = None - ) -> tuple[dict[QueryAction, list[Query | LexicalQuery]], - dict[str, Query | LexicalQuery], dict[str,str]] | tuple[None, None, None]: - if additional_queries is None: - return None, None, None - else: - instance_map = {} - flat_map = {} - qry_to_mod = {} - for q_name in additional_queries: - for (my_module, qry_map) in ADDITIONAL_QUERY_MODULES: - match_ = case_insensitive_match(qry_map.keys(), q_name) - if match_ is not None: - qry_to_mod[match_] = my_module - if my_module is not queries.debug_query: - q_instance = getattr(my_module, match_)() - else: - q_instance = getattr(my_module, match_)(debug_msg) - action = q_instance.when_to_run() - if action not in instance_map: - instance_map[action] = [q_instance] - else: - instance_map[action].append(q_instance) - if match_ in flat_map.keys(): - raise ValueError(f"Duplicate query name: {q_name}") - else: - flat_map[match_] = q_instance - # stop looking in other modules for q_name - break - - if len(instance_map) == 0: - return None, None, None - else: - return instance_map, flat_map, qry_to_mod +def get_query_descriptions()-> str: + """ + Returns: All descriptions for builtin queries -def get_updated_preset(preset, additional_query_map: dict[QueryAction,list[AbstractQuery]]=None): - if additional_query_map is None: - return preset - else: - old_queries = preset.queries - for q_list in additional_query_map.values(): - for q in q_list: - if q is not None: - old_queries.add(q.get_query_description()) + """ + descriptions = [] + for (my_module, qry_map) in QUERY_MODULES: + if my_module is not queries.debug_query: + for q_name in qry_map.keys(): + q_instance = getattr(my_module, q_name)() + descriptions.append(q_instance.get_query_description()) + return json.dumps(descriptions, indent=4, cls=PresetEncoder) - return Preset(preset_name=preset.preset_name, - preset_owner=preset.preset_owner, - queries=old_queries) +def validate_qry_list(qry_list: list[str]) -> tuple[bool, list[str] | None, list[str] | None, list[str] | None]: + """Verifies that the passed in list of strings is a case-insensitive match of legal + query names and returns the matching de-duped legal query names along with a boolean + that is False if there are any queries requested that are illegal, or if there are any duplicates -def get_all_optional_descriptions()-> str: - descriptions = [] - for (my_module, qry_map) in ADDITIONAL_QUERY_MODULES: - for q_name in qry_map.keys(): - q_instance = getattr(my_module, q_name)() - descriptions.append(q_instance.get_query_description()) - return (json.dumps(descriptions, indent=4, cls=PresetEncoder) - .replace('\\"', '"').replace('\\n', "\n")) + Args: + qry_list: list of user provided query_ids to run + Returns: + boolean (is valid), found, missed, duplicates -def validate_qry_list(qry_list: list[str]) -> bool | list[str]: - query_keys = [x[1].keys() for x in ADDITIONAL_QUERY_MODULES] + """ + query_keys = [x[1].keys() for x in QUERY_MODULES] found_tkns = [] missed_tkns = [] + duplicates = [] + for tkn in qry_list: + found = False for query_key in query_keys: match_ = case_insensitive_match(query_key, tkn) if match_ is not None: - found_tkns.append(match_) + found = True + if match_ not in found_tkns: + found_tkns.append(match_) + else: + duplicates.append(query_key) break - # tkn not found in any query key - missed_tkns.append(tkn) - valid = len(found_tkns) == len(qry_list) - if valid: - return True - else: - assert len(missed_tkns) != 0 - return missed_tkns -def get_all_optional_queries() -> list[str]: + if not found: + # tkn not found in any query key + missed_tkns.append(tkn) + + valid = len(missed_tkns) == 0 and len(duplicates) == 0 + return valid, found_tkns, missed_tkns, duplicates + +def build_preset_for_name(preset_name: str) -> Preset | None: + """This is used by the CLI to describe an internal preset + + Args: + preset_name (str): + + Returns: + Preset corresponding to this name + """ + queries = dict.get(PRESETS, preset_name, []) + accum = [] + if not queries: + return None + for (mod, query) in queries: + class_ = getattr(mod, query) + accum.append(class_.get_query_description()) + + return Preset(preset_name=preset_name, + preset_owner="Salesforce", + queries=accum) + +def get_all_queries() -> list[str]: """Does not return debug queries """ accum = [] - for x in ADDITIONAL_QUERY_MODULES: + for x in QUERY_MODULES: if x[0] is not queries.debug_query: accum = accum + list(x[1].keys()) diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/util.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/util.py index 98fa2005..ded109a9 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/util.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/util.py @@ -13,7 +13,7 @@ from dataclasses import fields from pathlib import Path from typing import TYPE_CHECKING -from typing import Any as Any +from typing import Any, Sequence from public.data_obj import VariableType from public.enums import RunMode @@ -443,3 +443,62 @@ def case_insensitive_match(list_a: list[str], to_match: str) -> str | None: return item return None + +def find_cycles(target, history: tuple) -> tuple[int, tuple | None]: + """Look for cycles in the history ending with target and return (# of cycles, cycle) + + The idea is to detect cycles. Say the history is + history = [A B X Y Z X Y] + and we are thinking of adding the target Z. + + But we don't want to add it if it will + create a repeating pattern, as this corresponds to looping needlessly. + So we look for the previous occurrence of Z in the history, and then look + at history[right_index(Z):] = [Z X Y] + Now we want to check whether the portion [X Y] also precedes Z. If so, + we found a cycle ending at target, and we don't jump to that target. + + """ + if not history: + return 0, None + + l = len(history) + if history[-1] == target: + cycle = (target,) + index = l-1 + cycle_len = 1 + else: + prev = next(((i for i in range(1, l+1) if history[l-i] == target)), None) + if not prev: + return 0, None + else: + index = l-prev + cycle = history[index+1:] + (target,) + cycle_len = prev + + counter = 1 + position = index + + if cycle_len == 1: + while position >= 0: + position -= 1 + if history[position] == target: + counter += 1 + continue + else: + break + + else: + while position >= cycle_len - 1: + last_position = position + position = position - cycle_len + + if history[position + 1: last_position + 1] == cycle: + counter += 1 + continue + else: + break + + return counter, cycle + + diff --git a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/wire.py b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/wire.py index 1ba5bd0a..df9459c3 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/wire.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/flow_scanner/wire.py @@ -35,9 +35,12 @@ from flow_scanner.branch_state import BranchState from public import parse_utils from public.data_obj import InfluenceStatement -from public.parse_utils import ET from public.parse_utils import ns from enum import Enum +import public.custom_parser as CP + +from typing import TypeAlias +El: TypeAlias = CP.ET.Element #: module logger logger = logging.getLogger(__name__) @@ -49,7 +52,7 @@ class QueryResult(Enum): OutputAssignmentsEls = 40 -def initialize(state: BranchState, elem: ET.Element, elem_name: str) -> dict[QueryResult, bool | str | ET.Element]: +def initialize(state: BranchState, elem: El, elem_name: str) -> dict[QueryResult, bool | str | El]: """Add this element name to influence map if it represents its own output data (Element name is passed in so we don't need to keep looking it up) @@ -84,7 +87,7 @@ def initialize(state: BranchState, elem: ET.Element, elem_name: str) -> dict[Que return result -def wire(state: BranchState, elem: ET.Element) -> None: +def wire(state: BranchState, elem: El) -> None: """Wires influence statements and variable initialization. When the value of one variable changes based on another. @@ -168,7 +171,7 @@ def wire(state: BranchState, elem: ET.Element) -> None: return None -def wire_waits(state: BranchState, elem: ET.Element, el_name: str, stored): +def wire_waits(state: BranchState, elem: El, el_name: str, stored): """Wait events can fire events on exit which is handled via output ref """ wait_events = parse_utils.get_by_tag(elem, 'waitEvents') @@ -188,7 +191,7 @@ def wire_waits(state: BranchState, elem: ET.Element, el_name: str, stored): -def wire_assignment(state: BranchState, elem: ET.Element, elem_name: str, stored): +def wire_assignment(state: BranchState, elem: El, elem_name: str, stored): """Wires assignment statements to influence map in `state` Args: @@ -316,7 +319,7 @@ def wire_orchestrated_stages(state, elem, el_name, stored): state.get_or_make_vector(name=fixed_name, store=True) -def wire_loop(state: BranchState, elem: ET.Element, elem_name: str, stored): +def wire_loop(state: BranchState, elem: El, elem_name: str, stored): """Wires collection loop is over to loop variable. Args: @@ -328,7 +331,7 @@ def wire_loop(state: BranchState, elem: ET.Element, elem_name: str, stored): None """ - collection_ref_els = parse.get_by_tag(elem, tagname='collectionReference') + collection_ref_els = parse.get_by_tag(elem, tag_name='collectionReference') if len(collection_ref_els) != 1: logger.warning(f"Found Loop without a collection reference in {elem_name}") return @@ -341,7 +344,7 @@ def wire_loop(state: BranchState, elem: ET.Element, elem_name: str, stored): el_name=elem_name, elem=elem,comment='assign to loop variable') -def wire_collection_processor(state: BranchState, elem: ET.Element, elem_name: str, stored): +def wire_collection_processor(state: BranchState, elem: El, elem_name: str, stored): """Wires collection reference in collection processor to collection elem. Args: @@ -354,9 +357,9 @@ def wire_collection_processor(state: BranchState, elem: ET.Element, elem_name: s """ # every collectionProcessor must have a single collection ref - subtype = parse.get_by_tag(elem, tagname='elementSubtype') + subtype = parse.get_by_tag(elem, tag_name='elementSubtype') if len(subtype) == 1 and subtype[0].text == 'FilterCollectionProcessor': - collection_el = parse.get_by_tag(elem, tagname='collectionReference')[0] + collection_el = parse.get_by_tag(elem, tag_name='collectionReference')[0] else: return collection_ref_var = collection_el.text @@ -432,7 +435,7 @@ def wire_screens(state, elem, el_name, stored): def wire_and_store(state: BranchState, influencer:str, influenced: str, - el_name: str, elem: ET.Element, comment: str) -> None: + el_name: str, elem: El, comment: str) -> None: stmt = InfluenceStatement( influenced_var=influenced, influencer_var=influencer, diff --git a/packages/code-analyzer-flow-engine/FlowScanner/public/contracts.py b/packages/code-analyzer-flow-engine/FlowScanner/public/contracts.py index 76d324ea..6f997f31 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/public/contracts.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/public/contracts.py @@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Optional import public.enums +from public import parse_utils if TYPE_CHECKING: from public.data_obj import InfluencePath, VariableType, CrawlStep, Jump @@ -24,6 +25,7 @@ from typing import TypeAlias var_t: TypeAlias = tuple[str, str] +El: TypeAlias = parse_utils.CP.ET.Element """ To generate custom queries, implement the QueryPresets class and for each query listed in the preset, implement @@ -34,23 +36,6 @@ class AbstractCrawler(ABC): - @abstractmethod - def get_control_influence_from_source(self, influenced_var: str, - source_var: var_t) ->tuple[var_t, ...] | None: - """Get control influence chain from source to influenced var - - Args: - influenced_var (str): top level (traversable) flow element name in the flow crawled by this current crawler. - source_var (str, str): flow_path, element name in either the current flow or in another flow that may or - may not be an ancestor in the call chain. - - Returns: - None if there is no influence, or a set of crawl steps linking the source to the influenced. - Only a single chain of crawl_steps is returned, there may be other control influence chains. - - """ - pass - @abstractmethod def get_crawl_schedule(self) -> tuple[CrawlStep]: pass @@ -60,12 +45,14 @@ def get_flow_path(self) -> str | None: pass @abstractmethod - def get_crawler_history_unsafe(self) -> list[tuple[AbstractCrawler, int]]: + def get_subflow_parents(self) -> list[tuple[ET.Element, str]]: """READ ONLY. Do not perform any crawlstep loads with these crawlers! Returns: - history of crawlers encountered during crawl, together with the current step (int) - when they entered a child flow. + history of previous subflow/action elements, flow_paths that are ancestors + of the current crawler, crawler. E.g. current_frame <-- [(elem0, path0), (elem1, path1) + .. where the frame that spawned the current frame is at history[0] and the very first frame + is at history[-1] """ pass @@ -111,6 +98,32 @@ def get_elem_to_crawl_step(self, elem_name: str) -> list[CrawlStep]: """ pass + @abstractmethod + def get_crawlable_elem_tuples(self) -> list[tuple[str, str]] | None: + """Returns all traversable element name, tag tuples that are connected to the start element + """ + pass + + @abstractmethod + def get_call_chain(self, source_el: ET.Element, + source_path: str, + sink_el: ET.Element, + source_parser: FlowParser) -> list[tuple[ET.Element, str]] | None: + """sink_el must be in the current flow. source_el can be in an ancestor + flow. Only returns paths currently crawled, so this must be called + every time a specific frame is loaded. + + Args: + source_parser: must be parser in the source flow path + Returns: + A list starting with the source and ending with the sink in which the each is an + ancestor caller of the succeeding element. + [(element, element flow path)] + + """ + pass + + class AbstractControlFlowGraph(ABC): # where to start @property @@ -163,132 +176,7 @@ def is_terminal(self) -> bool: # for tracking whether it has been visited @property @abstractmethod - def seen_tokens(self) -> list[tuple[tuple[str, str]]]: - pass - -class QueryProcessor(ABC): - """Queries must implement this class. - - - Queries are instantiated *once* per flow run, - and the same query instance is passed to all - subflows. Therefore, you can store internal state - in the query, for example querying for all sources - when given the process root command, and then using - those sources in subsequent invocations. - - - Queries are passed the full BranchState at every - invocation, but should never write to this state - - - Queries are passed a parser instance with a number - of higher level functions to search for flow inputs - and outputs, but access to making raw ET queries - is still possible. Never write to the parser - instance. - - - Examine the documentation for the parser instance. - - **CAUTION** The parser and BranchState API is still - in development, so early queries may break on upgrade, - until a more formal release is made, at which point - older APIs will be maintained for backwards compatability. - - Please stay in contact with the project developers if you are writing - custom queries or would prefer additional parser functionality. - - - do not rely on _methods in the parser being stable - across even minor releases. - - - """ - - @abstractmethod - def __init__(self) -> None: - """Constructor is passed only a FlowParser instance. - - Args: - Parser that has parsed the first (master) flow. - - Returns: - None - """ - pass - - # The set_preset() method is called during scan-setup. On - # success, return the Preset with the provided name as acknowledgement - # that these are the queries that will be run. - # - # If the instance is requested a preset with a non-None name and - # returns a preset with a different name, then the scan stops with - # an error. - # - # If an incorrect name is supplied or the preset cannot be found - # return None, and the system will exit with - # an error message to the user (usually a misspelling or - # misconfiguration error). No scan will occur. - # - # If preset_name is None, a default preset - # should be run, and this preset returned. - # - @abstractmethod - def set_preset(self, preset_name: str | None) -> Preset | None: - """ - - Args: - preset_name: - - Returns: - Preset that will be used in subsequent processing - """ - pass - - # This method is called by the query_processor on every flow element - # (except and ) - @abstractmethod - def handle_crawl_element(self, - state: State, - crawler: AbstractCrawler, - ) -> list[QueryResult] | None: - """ - - Args: - state: - crawler: cfg and crawl schedule - - Returns: - list of query results - """ - pass - - # Called every time a new flow is loaded (master flow or subflow) - @abstractmethod - def handle_flow_enter(self, - state: State, # the state has the flow_path variable - crawler: AbstractCrawler, - ) -> list[QueryResult] | None: - """Invoked when a flow or subflow is first entered. - - Args: - state: state instance - crawler: crawl schedule and cfg - - Returns: - list of QueryResults - """ - pass - - # Called when crawling is complete - @abstractmethod - def handle_final(self, - all_states: tuple[State], - ) -> list[QueryResult] | None: - """Invoked when crawl is complete for the flow and all subflows. - - Args: - all_states: - - Returns: - - """ + def seen_tokens(self) -> list[tuple[tuple[str], ...]]: pass class AbstractQuery(ABC): @@ -322,12 +210,13 @@ def accept(cls, **kwargs) -> list[QueryResult] | None: """ return None + @classmethod @abstractmethod - def get_query_description(self) -> QueryDescription: + def get_query_description(cls) -> QueryDescription: pass @abstractmethod - def when_to_run(self) -> QueryAction: + def when_to_run(self) -> list[QueryAction]: pass @abstractmethod @@ -337,12 +226,13 @@ def execute(self) -> list[QueryResult] | None: class Query(AbstractQuery, ABC): + @classmethod @abstractmethod - def get_query_description(self) -> QueryDescription: + def get_query_description(cls) -> QueryDescription: pass @abstractmethod - def when_to_run(self) -> QueryAction: + def when_to_run(self) -> list[QueryAction]: pass @abstractmethod @@ -361,7 +251,7 @@ def get_query_description(self) -> QueryDescription: pass @abstractmethod - def when_to_run(self) -> QueryAction: + def when_to_run(self) -> list[QueryAction]: pass @abstractmethod @@ -502,9 +392,16 @@ def get_literal_var(self) -> VariableType: pass @abstractmethod - def get_action_call_map(self) -> dict[str, list[tuple[str, str]]] | None: + def get_traversable_inbound(self) -> dict[str, list[str]]: + """Returns dict from element name to list of all inbound element names + will be empty list if no inbound. + """ + pass + + @abstractmethod + def get_action_call_map(self) -> dict[str, list[tuple[El, str]]] | None: """Gets all actionCalls in the flow element - Returns: actionCall type -> (element name, action name) + Returns: actionCall type -> (element, action name) """ pass @@ -544,3 +441,7 @@ def get_input_field_elems(self) -> set[ET.Element] | None: @abstractmethod def get_by_name(self, name_to_match: str, scope: ET.Element | None = None) -> ET.Element | None: pass + + @abstractmethod + def get_tainted_inputs(self) -> set[tuple[str, str]] | None: + pass \ No newline at end of file diff --git a/packages/code-analyzer-flow-engine/FlowScanner/public/custom_parser.py b/packages/code-analyzer-flow-engine/FlowScanner/public/custom_parser.py index b8084155..4228c184 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/public/custom_parser.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/public/custom_parser.py @@ -42,6 +42,8 @@ def clean_string(msg: str) -> str: """ if not isinstance(msg, str): return msg + elif msg == '*': + return "start" else: msg1 = msg.replace(" list[str]: return accum -def get_tag(elem: ET.Element) -> str: - if isinstance(elem, ET.Element): +def get_tag(elem: El) -> str: + if isinstance(elem, El): return elem.tag[NS_LEN:] # elif isinstance(elem, ET._Comment): # return '' else: return '' -def get_text_of_tag(elem: ET.Element, tagname: str) -> str | None: - """look for a single child elem (does not recurse) with the specified tagname and return the text. - returns None if there is not exactly one child elem with the specified tagname or if it has no text.""" - res = get_by_tag(elem, tagname) +def get_text_of_tag(elem: El, tag_name: str) -> str | None: + """look for a single child elem (does not recurse) with the specified tag_name and return the text. + returns None if there is not exactly one child elem with the specified tag_name or if it has no text.""" + res = get_by_tag(elem, tag_name) if len(res) == 1 and res[0] is not None: r = res[0].text if r is None or r == '': @@ -221,13 +222,13 @@ def get_text_of_tag(elem: ET.Element, tagname: str) -> str | None: return None -def is_subflow(elem: ET.Element) -> bool: +def is_subflow(elem: El) -> bool: if elem is None: return False return get_tag(elem) == 'subflows' -def is_loop(elem: ET.Element) -> bool: +def is_loop(elem: El) -> bool: """Is this a Loop Flow Element? Args: @@ -241,7 +242,7 @@ def is_loop(elem: ET.Element) -> bool: return elem.tag.endswith("loops") -def is_goto_connector(elem: ET.Element) -> bool | None: +def is_goto_connector(elem: El) -> bool | None: """Is this element a goto? Args: @@ -259,7 +260,7 @@ def is_goto_connector(elem: ET.Element) -> bool | None: return None -def is_decision(elem: ET.Element) -> bool: +def is_decision(elem: El) -> bool: """True if this is a decision Flow Element Args: @@ -271,7 +272,7 @@ def is_decision(elem: ET.Element) -> bool: return get_tag(elem) == 'decisions' -def get_by_tag(elem: ET.Element, tagname: str) -> list[ET.Element]: +def get_by_tag(elem: El, tag_name: str) -> list[El]: """Get list of all elem with the tag (ignoring ns). Convenience method as manually dealing with namespaces is clumsy. @@ -284,10 +285,10 @@ def get_by_tag(elem: ET.Element, tagname: str) -> list[ET.Element]: XML Elements else [] if no matches """ - return elem.findall(f'./{ns}{tagname}') + return elem.findall(f'./{ns}{tag_name}') -def get_named_elems(elem: ET.Element) -> list[ET.Element]: +def get_named_elems(elem: El) -> list[El]: """Get all descendents (recursive) of elem that have a ``name`` tag Args: @@ -301,7 +302,7 @@ def get_named_elems(elem: ET.Element) -> list[ET.Element]: to_return = [x for x in named if get_tag(x) != 'processMetadataValues'] return to_return -def get_name(elem: ET.Element | None) -> str | None: +def get_name(elem: El | None) -> str | None: """returns the string name of elem or None if no name or '*'""" if elem is None: return None @@ -314,19 +315,19 @@ def get_name(elem: ET.Element | None) -> str | None: return name.text -def get_elem_string(elem: ET.Element) -> str | None: +def get_elem_string(elem: El) -> str | None: if elem is None: return '' else: return CP.to_string(elem) -def get_line_no(elem: ET.Element) -> int: +def get_line_no(elem: El) -> int: # noinspection PyUnresolvedReferences return elem.sourceline -def get_start_element(root: ET.Element) -> ET.Element | None: +def get_start_element(root: El) -> El | None: start_elements = START_ELEMS start_res = {x: get_by_tag(root, x) for x in start_elements} @@ -347,7 +348,7 @@ def get_subflow_name(subflow): return sub_name_el[0].text -def get_assignment_statement_dicts(elem: ET.Element) -> list[tuple[str, dict[str, str]]] | None: +def get_assignment_statement_dicts(elem: El) -> list[tuple[str, dict[str, str]]] | None: """Returns assignment statement keywords in 'assignments' elems Args: elem: elem to parse, should have a tag of "assignments" @@ -371,7 +372,7 @@ def get_assignment_statement_dicts(elem: ET.Element) -> list[tuple[str, dict[str return None -def get_filters(elem: ET.Element) -> list[ET.Element]: +def get_filters(elem: El) -> list[El]: """Find all filter elements Searches recursively to find all elements that are children @@ -386,7 +387,7 @@ def get_filters(elem: ET.Element) -> list[ET.Element]: """ return elem.findall(f'.//{ns}filters') -def get_transform_influencers(transform_elem: ET.Element) -> list[tuple[TransformType,str|None,tuple[str, ...]]] | None: +def get_transform_influencers(transform_elem: El) -> list[tuple[TransformType,str|None,tuple[str, ...]]] | None: """Converts transform elem to a list of tuples [(transform_type, outputAPI field (or None), tuple(influencer_names)] Args: transform_elem: top level elem to process @@ -500,7 +501,7 @@ def get_transform_influencers(transform_elem: ET.Element) -> list[tuple[Transfor f"{traceback.format_exc()}") return None -def get_vars_from_value(elem: ET.Element, +def get_vars_from_value(elem: El, expr_parser :Callable[[str], list[str]]=parse_expression) -> dict[str, list[str]] | None: """accepts , , or element and returns a list of variables that influence this element. @@ -510,12 +511,12 @@ def get_vars_from_value(elem: ET.Element, Args: expr_parser (callable): method to parse expressions (default regexp is provided) - elem: (ET.Element): element + elem: (El): element Returns: - a dict tagname: list[variable names] - where tagname is the tag of the child element of value holding the reference unless - this is a complexValue, in which case the tagname contains refined information: + a dict tag_name: list[variable names] + where tag_name is the tag of the child element of value holding the reference unless + this is a complexValue, in which case the tag_name contains refined information: 'ComplexValueType.FieldReference': ['var1', 'var2'] 'ComplexValueType.FieldReference': ['var1', 'var2'] 'ComplexValueType.JoinDefinition.leftJoinKeys: ['var1', 'var2'] @@ -549,7 +550,7 @@ def get_vars_from_value(elem: ET.Element, # fall through return None -def _process_val_child(elem: ET.Element, el_tag: str, parent_el: ET.Element, +def _process_val_child(elem: El, el_tag: str, parent_el: El, expr_parser :Callable[[str], list[str]]=parse_expression) -> dict[str, list[str]] | None: raw_data = elem.text @@ -670,7 +671,7 @@ def _process_val_child(elem: ET.Element, el_tag: str, parent_el: ET.Element, return None -def get_input_assignments(elem: ET.Element) -> list[ET.Element]: +def get_input_assignments(elem: El) -> list[El]: """Find all input assignments Searches recursively to find all elements that are children @@ -686,7 +687,7 @@ def get_input_assignments(elem: ET.Element) -> list[ET.Element]: return elem.findall(f'.//{ns}inputAssignments') -def get_sinks_from_field_values(elems: list[ET.Element]) -> list[tuple[str, str | None, str]]: +def get_sinks_from_field_values(elems: list[El]) -> list[tuple[str, str | None, str]]: """Find variables that flow into field/value pairs E.g.if a recordLookup field has a filter:: @@ -749,7 +750,7 @@ def get_sinks_from_field_values(elems: list[ET.Element]) -> list[tuple[str, str return accum -def process_output_assignments(elem: ET.Element) -> list[tuple[str, str]]: +def process_output_assignments(elem: El) -> list[tuple[str, str]]: """Searches elem recursively and pulls out doubles of the form: WorkItemID @@ -778,7 +779,7 @@ def process_output_assignments(elem: ET.Element) -> list[tuple[str, str]]: accum.append((influencer, influenced)) return accum -def get_field_op_values_from_elem(elem: ET.Element, tag: str) -> list[tuple[str, str | None, str]]: +def get_field_op_values_from_elem(elem: El, tag: str) -> list[tuple[str, str | None, str]]: """ Searches elem recursively for tag, and the pull-out triples of the form: @@ -799,7 +800,7 @@ def get_field_op_values_from_elem(elem: ET.Element, tag: str) -> list[tuple[str, elems = elem.findall(f'.//{ns}{tag}') return get_sinks_from_field_values(elems) -def get_conn_target_map(elem: ET.Element) -> dict[ET.Element, tuple[str, ConnType, bool]] | None: +def get_conn_target_map(elem: El) -> dict[El, tuple[str, ConnType, bool]] | None: """Get a connector map that also works for all possible start elements Args: @@ -830,10 +831,14 @@ def get_conn_target_map(elem: ET.Element) -> dict[ET.Element, tuple[str, ConnTyp if scheduled_paths is None or len(scheduled_paths) == 0: return standard_connectors else: + seen_scheduled_targets = [] for x in scheduled_paths: try: conn_name = x.find('.//{ns}targetReference').text - standard_connectors[x] = (conn_name, ConnType.Other, False) + if conn_name not in seen_scheduled_targets: + seen_scheduled_targets.append(conn_name) + + standard_connectors[x] = (conn_name, ConnType.Other, False) # noinspection PyBroadException except: continue @@ -842,7 +847,7 @@ def get_conn_target_map(elem: ET.Element) -> dict[ET.Element, tuple[str, ConnTyp return _get_conn_target_map(elem) -def _get_conn_target_map(elem: ET.Element) -> dict[ET.Element, tuple[str, ConnType, bool]]: +def _get_conn_target_map(elem: El) -> dict[El, tuple[str, ConnType, bool]]: """returns map from connectors at elem to where they point Args: @@ -857,6 +862,8 @@ def _get_conn_target_map(elem: ET.Element) -> dict[ET.Element, tuple[str, ConnTy el_tag = get_tag(elem) is_optional = False # start with this and then override missing_connector = False + seen_targets = [] + if el_tag == 'decisions': rules_els = get_by_tag(elem, 'rules') @@ -883,7 +890,7 @@ def _get_conn_target_map(elem: ET.Element) -> dict[ET.Element, tuple[str, ConnTy # or if they are default and a rule is missing a connector is_optional = True - res = get_by_tag(elem=x, tagname='targetReference') + res = get_by_tag(elem=x, tag_name='targetReference') if res is None or len(res) == 0: logger.error(f"ERROR: found a connector without a target reference! " f"{get_elem_string(elem)}") @@ -893,19 +900,21 @@ def _get_conn_target_map(elem: ET.Element) -> dict[ET.Element, tuple[str, ConnTy assert x not in to_return target_name = res[0].text - # classify connector - if is_goto_connector(x): - # this takes priority - to_return[x] = (target_name, ConnType.Goto, is_optional) + if target_name not in seen_targets: + seen_targets.append(target_name) + # classify connector + if is_goto_connector(x): + # this takes priority + to_return[x] = (target_name, ConnType.Goto, is_optional) - elif conn_type == NEXT_VALUE_CONNECTOR: - to_return[x] = (res[0].text, ConnType.Loop, is_optional) + elif conn_type == NEXT_VALUE_CONNECTOR: + to_return[x] = (res[0].text, ConnType.Loop, is_optional) - elif conn_type == FAULT_CONNECTOR or conn_type == TIMEOUT_CONNECTOR: - to_return[x] = (res[0].text, ConnType.Exception, is_optional) + elif conn_type == FAULT_CONNECTOR or conn_type == TIMEOUT_CONNECTOR: + to_return[x] = (res[0].text, ConnType.Exception, is_optional) - else: - to_return[x] = (res[0].text, ConnType.Other, is_optional) + else: + to_return[x] = (res[0].text, ConnType.Other, is_optional) return to_return @@ -915,14 +924,14 @@ def _get_conn_target_map(elem: ET.Element) -> dict[ET.Element, tuple[str, ConnTy # Utilities for parsing variables # -def is_assign_null(elem: ET.Element) -> bool | None: +def is_assign_null(elem: El) -> bool | None: res = elem.find(f'{ns}assignNullValuesIfNoRecordsFound') if res is None: return None return res.text == 'true' -def is_auto_store(elem: ET.Element) -> bool | None: +def is_auto_store(elem: El) -> bool | None: # None if the field is missing or can't be parsed # otherwise true or false res = elem.find(f'{ns}storeOutputAutomatically') @@ -931,7 +940,7 @@ def is_auto_store(elem: ET.Element) -> bool | None: return res.text == 'true' -def is_collection(elem: ET.Element) -> bool | None: +def is_collection(elem: El) -> bool | None: # None if the field is missing or can't be parsed # otherwise true or false res = elem.find(f'{ns}isCollection') @@ -940,7 +949,7 @@ def is_collection(elem: ET.Element) -> bool | None: return res.text == 'true' -def get_input_fields(elem: ET.Element) -> set[ET.Element] | None: +def get_input_fields(elem: El) -> set[El] | None: accum = set() elems = elem.findall(f'.//{ns}fields') for el in elems: @@ -954,21 +963,21 @@ def get_input_fields(elem: ET.Element) -> set[ET.Element] | None: return accum -def get_obj_name(elem: ET.Element) -> str | None: +def get_obj_name(elem: El) -> str | None: object_name = elem.find(f'{ns}object') if object_name is None: return None return object_name.text -def get_output_reference(elem: ET.Element) -> str | None: +def get_output_reference(elem: El) -> str | None: object_name = elem.find(f'{ns}outputReference') if object_name is None: return None return object_name.text -def get_datatype(elem: ET.Element) -> DataType | None: +def get_datatype(elem: El) -> DataType | None: obj_ = elem.find(f'{ns}dataType') if obj_ is None: return None @@ -986,19 +995,19 @@ def get_datatype(elem: ET.Element) -> DataType | None: return DataType.Literal -def is_get_first_record_only(elem: ET.Element) -> bool | None: +def is_get_first_record_only(elem: El) -> bool | None: res = elem.find(f'{ns}getFirstRecordOnly') if res is None: return None return res.text == 'true' -def is_input(elem: ET.Element) -> bool: +def is_input(elem: El) -> bool: res = get_by_tag(elem, 'isInput') return len(res) > 0 and res[0].text == 'true' -def is_output(elem: ET.Element) -> bool: +def is_output(elem: El) -> bool: res = get_by_tag(elem, 'isOutput') return len(res) > 0 and res[0].text == 'true' @@ -1010,7 +1019,7 @@ def is_output(elem: ET.Element) -> bool: """ -def _process_assignment_item(elem: CP.ET.Element) -> tuple[str, dict[str, str]] | None: +def _process_assignment_item(elem: El) -> tuple[str, dict[str, str]] | None: """Returns assignment item dict from assignment element Args: @@ -1056,7 +1065,7 @@ def _process_assignment_item(elem: CP.ET.Element) -> tuple[str, dict[str, str]] return None -def _get_value(el: ET.Element) -> str | None: +def _get_value(el: El) -> str | None: for child in el: if get_tag(child) == 'elementReference': return child.text @@ -1065,7 +1074,7 @@ def _get_value(el: ET.Element) -> str | None: return None -def get_subflow_output_map(subflow: ET.Element) -> tuple[bool, dict[str,str]]: +def get_subflow_output_map(subflow: El) -> tuple[bool, dict[str,str]]: """returns a tuple (bool:, map: child name --> parent name) where the first return value is true if outputs are automatically assigned in which case they are flow_name.flow_var @@ -1088,7 +1097,7 @@ def get_subflow_output_map(subflow: ET.Element) -> tuple[bool, dict[str,str]]: return auto, mappings -def get_subflow_input_map(subflow: ET.Element) -> dict[str, str]: +def get_subflow_input_map(subflow: El) -> dict[str, str]: """Returns a map from caller variable to variable in called flow E.g. in this example:: @@ -1126,7 +1135,7 @@ def get_subflow_input_map(subflow: ET.Element) -> dict[str, str]: accum[key] = val return accum -def _get_tags(root: ET.Element, tags: list[str]) -> list[str]: +def _get_tags(root: El, tags: list[str]) -> list[str]: accum = [] for tag in tags: res = root.findall(f'.//{ns}{tag}') @@ -1135,7 +1144,7 @@ def _get_tags(root: ET.Element, tags: list[str]) -> list[str]: accum.append(res.text.strip()) return accum -def get_all_flow_refs(root: ET.Element) -> list[str]: +def get_all_flow_refs(root: El) -> list[str]: accum = _get_tags(root, tags=DIRECT_REF_HOLDERS) expressions = _get_tags(root, tags=EXPRESSION_REF_HOLDERS) for expr in expressions: @@ -1197,9 +1206,11 @@ def quick_validate(flow_path: str) -> bool: return has_start and not has_banned + except FileNotFoundError: + logger.critical(f"Could not find file {flow_path}") except: - logger.critical(f"exception when attempting to quick_validate flow {flow_path}" - f"{traceback.format_exc()}") + logger.critical(f"could not quick_validate flow {flow_path}" + f"\n{traceback.format_exc()}") return False def validate_flow(flow_path: str) -> bool: diff --git a/packages/code-analyzer-flow-engine/FlowScanner/queries/debug_query.py b/packages/code-analyzer-flow-engine/FlowScanner/queries/debug_query.py index a887e7e9..2be1279a 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/queries/debug_query.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/queries/debug_query.py @@ -5,18 +5,12 @@ from __future__ import annotations import logging -import re from typing import TypeAlias -import json -import public -from flow_scanner import control_flow -from flow_scanner.control_flow import Crawler from public import parse_utils -from public.contracts import (AbstractQuery, QueryAction, QueryDescription, - QueryResult, State, AbstractCrawler, FlowParser, LexicalQuery, Query) -from public.data_obj import CrawlStep, InfluenceStatement, InfluencePath -from public.enums import Severity, ConnType, TriggerType, FlowType +from public.contracts import (Query, QueryAction, QueryDescription, + QueryResult) +from public.enums import Severity El: TypeAlias = parse_utils.CP.ET.Element @@ -38,30 +32,27 @@ } -class Detect(AbstractQuery): +class Detect(Query): + query_id = 'Detect' + query_name = QUERIES[query_id] - def __init__(self, msg: str|None = None): - try: - conf = json.loads(msg) - self.conf = msg - except: - self.conf = None - self.query_id = 'Detect' - self.query_name = QUERIES[self.query_id] + def __init__(self, arg_obj: str | None = None): + self.conf = arg_obj - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, - query_description="Flow detected from one named element to another", + query_id=cls.query_id, + query_name=cls.query_name, + query_description="Debug query", severity=Severity.Flow_Low_Severity, is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.process_elem + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.process_elem] def execute(self) -> list[QueryResult] | None: if self.conf is None: diff --git a/packages/code-analyzer-flow-engine/FlowScanner/queries/default_query.py b/packages/code-analyzer-flow-engine/FlowScanner/queries/default_query.py index 0cc638cd..5e01c3e9 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/queries/default_query.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/queries/default_query.py @@ -5,347 +5,210 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING import logging +from typing import TypeAlias +import public.custom_parser as CP from flow_scanner.flow_result import DEFAULT_HELP_URL - -if TYPE_CHECKING: - import xml.etree.ElementTree as ET - from public import parse_utils +from public.contracts import FlowParser, State, Query from public.data_obj import InfluenceStatement, QueryResult +from public.data_obj import QueryDescription +from public.enums import Severity, QueryAction, RunMode -from public.data_obj import QueryDescription, Preset -from public.enums import Severity, FlowType -from public.contracts import QueryProcessor, FlowParser, State, AbstractCrawler - +El: TypeAlias = CP.ET.Element logger = logging.getLogger(__name__) -DEFAULT_HELP_URL = ("https://developer.salesforce.com/docs/atlas.en-us.secure_coding_guide.meta" - "/secure_coding_guide/secure_coding_considerations_flow_design.htm") - -DEFAULT_PRESET = 'pentest' - -presets = {'pentest': {'name': 'Penetration Testing', - 'owner': 'rsussland@salesforce.com', - 'queries': """FlowSecurity.SystemModeWithoutSharing.recordUpdates.data -FlowSecurity.SystemModeWithoutSharing.recordCreates.data -FlowSecurity.SystemModeWithoutSharing.recordUpdates.selector -FlowSecurity.SystemModeWithoutSharing.recordDeletes.selector -FlowSecurity.SystemModeWithoutSharing.recordLookups.selector -FlowSecurity.SystemModeWithSharing.recordCreates.data -FlowSecurity.SystemModeWithSharing.recordUpdates.data -FlowSecurity.SystemModeWithSharing.recordUpdates.selector -FlowSecurity.SystemModeWithSharing.recordDeletes.selector -FlowSecurity.SystemModeWithSharing.recordLookups.selector"""}, - 'all': {'name': 'All', - 'owner': 'rsussland@salesforce.com', - 'queries': """FlowSecurity.SystemModeWithoutSharing.recordUpdates.data -FlowSecurity.SystemModeWithoutSharing.recordCreates.data -FlowSecurity.SystemModeWithoutSharing.recordUpdates.selector -FlowSecurity.SystemModeWithoutSharing.recordDeletes.selector -FlowSecurity.SystemModeWithoutSharing.recordLookups.selector -FlowSecurity.SystemModeWithSharing.recordUpdates.data -FlowSecurity.SystemModeWithSharing.recordCreates.data -FlowSecurity.SystemModeWithSharing.recordUpdates.selector -FlowSecurity.SystemModeWithSharing.recordDeletes.selector -FlowSecurity.SystemModeWithSharing.recordLookups.selector -FlowSecurity.DefaultMode.recordUpdates.data -FlowSecurity.DefaultMode.recordCreates.data -FlowSecurity.DefaultMode.recordUpdates.selector -FlowSecurity.DefaultMode.recordDeletes.selector -FlowSecurity.DefaultMode.recordLookups.selector""" - } - } - -QUERY_IDS = [] - - - -def build_preset(preset_name: str = DEFAULT_PRESET): - if preset_name is None: - preset_name = DEFAULT_PRESET - - if preset_name not in presets.keys(): - return None - preset = presets[preset_name] - pr_name = preset['name'] - pr_owner = preset['owner'] - query_ids = preset['queries'].split("\n") - query_id_list = [x.strip() for x in query_ids] - if len(QUERY_IDS) == 0: - [QUERY_IDS.append(x.strip()) for x in query_ids if len(x) > 0] - queries = {build_query_desc_from_id(x) for x in QUERY_IDS} - return Preset(preset_name=pr_name, - preset_owner=pr_owner, - queries=queries) +QUERIES = { + 'PreventPassingUserDataIntoElementWithoutSharing': 'User Data DML in System Mode Without Sharing', + 'PreventPassingUserDataIntoElementWithSharing': 'User Data DML in System Mode Without Sharing' +} +class PreventPassingUserDataIntoElementWithoutSharing(Query): -class DefaultQueryProcessor(QueryProcessor): - """Default queries to run if user does not load a query file + query_id = 'PreventPassingUserDataIntoElementWithoutSharing' + query_name = 'User Data DML in System Mode Without Sharing' - """ + @classmethod + def get_query_description(cls) -> QueryDescription: + return QueryDescription( + query_id=cls.query_id, + query_name=cls.query_name, + query_description=( + "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) " + "in System context without sharing. This can result in privilege escalation if the user does " + "not have permission to access the underlying record."), + query_version="1.0", + severity=Severity.Flow_High_Severity, + help_url=DEFAULT_HELP_URL, + is_security=True + ) - def __init__(self) -> None: + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.process_elem] - #: flow (xml) root - self.root: ET.Element + def execute(self, state=None, crawler=None, all_states=None) -> list[QueryResult] | None: + if state is None: + return None + if state.get_parser().get_effective_run_mode() != RunMode.SystemModeWithoutSharing: + return None + return process_element(self.query_id, state.get_current_elem(), state) - #: preset selected by user - self.preset: Preset | None = None - #: taint sources are populated on flow enter - self.sources: set[tuple[str, str]] = set() - #: flow parser - self.parser: FlowParser | None = None +class PreventPassingUserDataIntoElementWithSharing(Query): - #: path of flow - self.flow_paths: list[str] | None = None + query_id = 'PreventPassingUserDataIntoElementWithSharing' + query_name = 'User Data DML in System Mode With Sharing' + @classmethod + def get_query_description(cls) -> QueryDescription: + return QueryDescription( + query_id = cls.query_id, + query_name= cls.query_name, + query_description=("User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) " + "in System context with sharing. This can result in privilege escalation if the user does " + "not have permission to access the underlying record."), + query_version="1.0", + severity=Severity.Flow_Moderate_Severity, + help_url=DEFAULT_HELP_URL, + is_security=True + ) - def set_preset(self, preset_name: str | None) -> Preset | None: - self.preset = build_preset(preset_name) - return self.preset + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.process_elem] - def handle_crawl_element(self, state: State, crawler: AbstractCrawler=None) -> list[QueryResult] | None: - return self.process_element(state.get_current_elem(), state) + def execute(self, state=None, crawler=None, all_states=None) -> list[QueryResult] | None: + if state.get_parser().get_effective_run_mode() != RunMode.SystemModeWithSharing: + return None + if state is None: + return None + return process_element(self.query_id, state.get_current_elem(), state) - def handle_flow_enter(self, state: State, crawler: AbstractCrawler=None) -> list[QueryResult] | None: - # set current parser - parser = state.get_parser() - flow_path = parser.get_filename() - if self.flow_paths is None: - self.flow_paths = [flow_path] - start = True - else: - start = False - # always add to the list, so we collect sources in subflows and remember them - parser_sources = get_sources(parser, start=start) - self.sources.update(parser_sources) +def process_element(query_id, elem: El, state: State) -> list[QueryResult] | None: + """Looks for CRUD influencers from sources (input fields or input variables) - # can also do lexical queries here with parser - # in which case we may want to return a result - return None + Searches the xml element looking for tainted variables that are selector or data influencers. - def handle_final(self, all_states: tuple[State]) -> list[QueryResult] | None: - """Entry point for running queries after all scans are complete + If sources of taint are found, calls + the `process_influencers` method that queries the state for vulnerable flows and generates reports. Args: - all_states: tuple of states, one for all flows processed + query_id (): + state: BranchState + elem: element being searched Returns: - list of query results - """ - # This would be appropriate for more advanced analysis that requires the full - # dataflow graph of the entire fully executed program - return None - - def process_element(self, elem: ET.Element, state: State) -> list[QueryResult] | None: - """Looks for CRUD influencers from sources (input fields or input variables) - - Searches the xml element looking for tainted variables that are selector or data influencers. - - If sources of taint are found, calls - the `process_influencers` method that queries the state for vulnerable flows and generates reports. - - Args: - state: BranchState - elem: element being searched - - Returns: - None - """ - elem_type = parse_utils.get_tag(elem) - filter_influencers = [] - input_influencers = [] - parser = state.get_parser() - - # sinks are define here - if elem_type in ["recordUpdates", "recordLookups", "recordCreates", "recordDeletes"]: - - # Look for filter selection criteria (influences *which records* are returned) - filter_influencers = parse_utils.get_field_op_values_from_elem(elem, 'filters') - - # Look for input assignment which influences *what values* are updated or created - input_influencers = parse_utils.get_field_op_values_from_elem(elem, 'inputAssignments') - - # Look for bulk operators: - bulk_ref = parse_utils.get_by_tag(elem, 'inputReference') - - if len(bulk_ref) == 1: - bulk_el = bulk_ref[0] - bulk_var = bulk_el.text - - # for bulk operations, we say the influenced elem is the Flow Element itself. - elem_name = state.get_current_elem_name() + None + """ + elem_type = parse_utils.get_tag(elem) + parser = state.get_parser() + sources = parser.get_tainted_inputs() - if elem_type in ['recordLookups', 'recordDeletes']: - filter_influencers.append((elem_name, None, bulk_var)) - else: - input_influencers.append((elem_name, None, bulk_var)) + # sinks are define here + if elem_type in ["recordUpdates", "recordLookups", "recordCreates", "recordDeletes"]: - res = self.process_influencers(state, elem, filter_influencers, - input_influencers, elem_type, parser) - if res is None: - return None - # validate - for x in res: - assert x.paths is not None - return res + # Look for filter selection criteria (influences *which records* are returned) + filter_influencers = parse_utils.get_field_op_values_from_elem(elem, 'filters') - # fall through - return None + # Look for input assignment which influences *what values* are updated or created + input_influencers = parse_utils.get_field_op_values_from_elem(elem, 'inputAssignments') - def process_influencers(self, state: State, current_elem: ET.Element, - filter_influencers: list[tuple[str, str | None, str]], - input_influencers: list[tuple[str, str | None, str]], - elem_type: str, - parser: FlowParser) -> list[QueryResult] | None: - """Given a list of variables that flow into sinks, search if these are tainted, - and if so, add the tainted flow to the result object. + # Look for bulk operators: + bulk_ref = parse_utils.get_by_tag(elem, 'inputReference') - Before adding the tainted - flow, an additional statement is appended for readability, show how the tainted - value affects the specific Flow element. + if len(bulk_ref) == 1: + bulk_el = bulk_ref[0] + bulk_var = bulk_el.text - Args: - state: current state field - current_elem: xml element being processed - filter_influencers: influencers for record selection/filter - input_influencers: influencers that modify record data - elem_type: Whether this is an update/delete/create/lookup - parser: Parser instance provided by runtime + # for bulk operations, we say the influenced elem is the Flow Element itself. + elem_name = state.get_current_elem_name() - Returns: - None - - """ - to_return = [] - flow_path = parser.get_filename() - run_mode = parser.get_effective_run_mode() - flow_type = parser.get_flow_type() - - for x in filter_influencers + input_influencers: - if x in filter_influencers: - check_label = "selector" + if elem_type in ['recordLookups', 'recordDeletes']: + filter_influencers.append((elem_name, None, bulk_var)) else: - check_label = "data" - - query_id = build_id(elem_type=elem_type, - check_labels_val=check_label, - run_mode=run_mode.name) - if query_id is None: - continue - - a_field, op, influencer_var = x - # surgery that deals with string or dataInfluencePaths happens in get_tainted_flows() - tainted_flows = state.get_flows_from_sources(influenced_var=influencer_var, - source_vars=self.sources) - if tainted_flows is not None and len(tainted_flows) > 0: - """ - query_id: id - - influence_statement: DataInfluenceStatement - - paths: set[DataInfluencePath] - """ - curr_name = parse_utils.get_name(current_elem) - - # SystemModeWithoutSharing User Influenced Record Update - sink_stmt = InfluenceStatement(a_field, influencer_var, curr_name, - comment=f"flow into {elem_type} via influence over {a_field}" - f" in run mode {run_mode.name}", - line_no=current_elem.sourceline, - source_text=parse_utils.get_elem_string(current_elem), - flow_path=flow_path, - source_path=flow_path - ) - to_return.append(QueryResult(query_id=query_id, - flow_type=flow_type, - influence_statement=sink_stmt, - paths=frozenset(tainted_flows))) - - msg = ("***Security Finding**" - f"in Flow Element {curr_name} of type {elem_type}" - f"User input can influence {a_field} via control over {check_label} fields" - f"Through the tainted flows:" - f"\ttn".join([str(x) for x in tainted_flows]) + - "*********************") - logger.info(msg) - - if len(to_return) > 0: - return to_return - else: - return None + input_influencers.append((elem_name, None, bulk_var)) + res = process_influencers(query_id, sources, state, elem, filter_influencers, + input_influencers, elem_type, parser) + if res is None: + return None + # validate + for x in res: + assert x.paths is not None + return res + + # fall through, as we are not in a sink + return None + +def process_influencers(query_id: str, + sources: set[tuple[str, str]], + state: State, current_elem: El, + filter_influencers: list[tuple[str, str | None, str]], + input_influencers: list[tuple[str, str | None, str]], + elem_type: str, + parser: FlowParser) -> list[QueryResult] | None: + """Given a list of variables that flow into sinks, search if these are tainted, + and if so, add the tainted flow to the result object. + + Before adding the tainted + flow, an additional statement is appended for readability, show how the tainted + value affects the specific Flow element. -def get_sources(parser: FlowParser, start=True) -> ((str, str),): - """Looks for sources Args: - parser: parser instance for flow - start: whether this is the first flow being scanned + query_id (): string, name of query + sources (): set of tuple path, var_name + state: current state field + current_elem: xml element being processed + filter_influencers: influencers for record selection/filter + input_influencers: influencers that modify record data + elem_type: Whether this is an update/delete/create/lookup + parser: Parser instance provided by runtime Returns: - ((path, varname), ) corresponding to sources of taint + list of QueryResults """ - + to_return = [] flow_path = parser.get_filename() - input_fields = list(parser.get_input_field_elems() or []) - input_vars = list(parser.get_input_variables() or []) - - if input_fields is not None: - input_field_tuples = [(flow_path, parse_utils.get_name(x)) for x in input_fields] - else: - input_field_tuples = [] - - input_vars = input_vars or [] - - if start is True: - return input_field_tuples + input_vars - else: - return input_field_tuples - - -def build_query_desc_from_id(query_id: str) -> QueryDescription: - [run_mode, elem_type, check_val] = query_id.split(".")[1:] - - return build_query_description(elem_type=elem_type, check_labels_val=check_val, run_mode=run_mode) - - -def build_id(elem_type, check_labels_val, run_mode) -> str | None: - str_ = f"FlowSecurity.{run_mode}.{elem_type}.{check_labels_val}" - if str_ in QUERY_IDS: - return str_ + run_mode = parser.get_effective_run_mode() + flow_type = parser.get_flow_type() + + for x in filter_influencers + input_influencers: + a_field, op, influencer_var = x + # surgery that deals with string or dataInfluencePaths happens in get_tainted_flows() + tainted_flows = state.get_flows_from_sources(influenced_var=influencer_var, + source_vars=sources) + if tainted_flows is not None and len(tainted_flows) > 0: + """ + query_id: id + + influence_statement: DataInfluenceStatement + + paths: set[DataInfluencePath] + """ + curr_name = parse_utils.get_name(current_elem) + + # SystemModeWithoutSharing User Influenced Record Update + sink_stmt = InfluenceStatement(a_field, influencer_var, curr_name, + comment=f"flow into {elem_type} via influence over {a_field}" + f" in run mode {run_mode.name}", + line_no=current_elem.sourceline, # noqa + source_text=parse_utils.get_elem_string(current_elem), + flow_path=flow_path, + source_path=flow_path + ) + to_return.append(QueryResult(query_id=query_id, + flow_type=flow_type, + influence_statement=sink_stmt, + paths=frozenset(tainted_flows))) + + if len(to_return) > 0: + return to_return else: return None - - -def build_query_description(elem_type, check_labels_val, run_mode): - query_description = (f"User controlled data flows into {elem_type} element {check_labels_val} in " - f"run mode: {run_mode}") - query_name = f"Flow: {run_mode} {elem_type} {check_labels_val}" - query_id = build_id(elem_type, check_labels_val, run_mode) - - if (run_mode == "SystemModeWithoutSharing" - and elem_type in ["recordDeletes", "recordUpdates"]): - severity = Severity.Flow_High_Severity - - elif run_mode == "SystemModeWithoutSharing": - severity = Severity.Flow_Moderate_Severity - - elif run_mode == "SystemModeWithSharing": - severity = Severity.Flow_Low_Severity - - else: - severity = Severity.Flow_Informational - - return QueryDescription(query_id=query_id, query_name=query_name, severity=severity, - query_description=query_description, help_url=DEFAULT_HELP_URL) diff --git a/packages/code-analyzer-flow-engine/FlowScanner/queries/optional_query.py b/packages/code-analyzer-flow-engine/FlowScanner/queries/optional_query.py index 29e33852..4a9d723c 100644 --- a/packages/code-analyzer-flow-engine/FlowScanner/queries/optional_query.py +++ b/packages/code-analyzer-flow-engine/FlowScanner/queries/optional_query.py @@ -11,8 +11,6 @@ from typing import TypeAlias import public -from flow_scanner import control_flow -from flow_scanner.control_flow import Crawler from public import parse_utils from public.contracts import (AbstractQuery, QueryAction, QueryDescription, QueryResult, State, AbstractCrawler, FlowParser, LexicalQuery, Query) @@ -69,19 +67,22 @@ class DbInLoop(AbstractQuery): query_id = "DbInLoop" query_name = QUERIES[query_id] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, - query_description="This rule detects when there are CRUD flow elements within a loop (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes). This rule does not trigger if the CRUD element is in a fault handler. These DB operations should be bulkified by using collections and the IN condition. This rule does not follow subflows.", + query_id = cls.query_id, + query_name= cls.query_name, + query_description=("A Database operation (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) " + "is being performed within a loop. To avoid excessive Database calls, the operation " + "should be bulkified by using collection variables and the 'IN' operator."), query_version="1.0", severity=public.enums.Severity.Flow_Moderate_Severity, help_url=DEFAULT_HELP_URL, is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.flow_enter + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.flow_enter] def convert_results(self, results: list[tuple[CrawlStep, str, str, int]], parser: FlowParser) -> list[QueryResult]: q_results = [] @@ -95,7 +96,17 @@ def convert_results(self, results: list[tuple[CrawlStep, str, str, int]], parser step_line_no = parse_utils.get_line_no(step_elem) step_code = parse_utils.get_elem_string(step_elem) - stmt = InfluenceStatement( + stmt_src = InfluenceStatement( + influenced_var=loop_name, + influencer_var=loop_name, + element_name=loop_name, + comment=f"Loop", + flow_path=flow_path, + line_no=loop_line_no, + source_text=loop_code, + source_path=flow_path, + ) + stmt_sink = InfluenceStatement( influenced_var=step.element_name, influencer_var=loop_name, element_name=step.element_name, @@ -106,16 +117,25 @@ def convert_results(self, results: list[tuple[CrawlStep, str, str, int]], parser source_text=step_code, source_path=flow_path, ) - + path = InfluencePath( + history=(stmt_src,), + influenced_name=step.element_name, + influencer_name=loop_name, + influencer_property=None, + influenced_property=None, + influenced_filepath=flow_path, + influencer_filepath=flow_path, + influenced_type_info=None + ) qr = QueryResult( query_id=self.query_id, flow_type=parser.get_flow_type(), - influence_statement=stmt, + influence_statement=stmt_sink, elem_code=loop_code, elem_line_no=loop_line_no, elem_name=loop_name, filename=flow_path, - paths=None # only print from source to sink + paths=frozenset([path]) # only print from source to sink ) q_results.append(qr) return q_results @@ -148,19 +168,21 @@ class HardcodedId(LexicalQuery): query_id = "HardcodedId" query_name = QUERIES[query_id] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, - query_description="This rule detects hardcoded IDs within a flow. Hardcoded Ids are a bad practice, and such flows are not appropriate for distribution.", + query_id=cls.query_id, + query_name=cls.query_name, + query_description=("The flow has a hardcoded Id. Hardcoded Ids are a bad practice, and flows with " + "hardcoded Ids are not appropriate for distribution."), query_version="1.0", severity=public.enums.Severity.Flow_Low_Severity, help_url=DEFAULT_HELP_URL, is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: if parser is None: @@ -214,17 +236,20 @@ class MissingFaultHandler(LexicalQuery): def __init__(self): self.root = None - def get_query_description(self) -> QueryDescription: - return QueryDescription(query_id=self.query_id, - query_name=self.query_name, + @classmethod + def get_query_description(cls) -> QueryDescription: + return QueryDescription(query_id=cls.query_id, + query_name=cls.query_name, severity=public.enums.Severity.Flow_Low_Severity, help_url=DEFAULT_HELP_URL, is_security=False, - query_description=("This rule detects when elements that can fire fault events are missing fault handlers. The rule currently detects Create Records, Update Records, Delete Records, Action Calls, and Subflows.") + query_description=("An element that can fire fault events is missing " + "fault handlers. Add fault handlers to all Create Records, " + "Update Records, Delete Records, Action Calls, and Subflows.") ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: accum = [] @@ -273,18 +298,22 @@ def populate_trig_object(self, parser: FlowParser) -> None: if self.trig_object is None: self.should_scan = False - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=public.enums.Severity.Flow_Moderate_Severity, help_url=DEFAULT_HELP_URL, is_security=False, - query_description=("This rule detects when an AfterSave record trigger modifies the same record. Record modifications should be done in BeforeSave triggers, not AfterSave triggers. This rule follows subflows, so it will detect if the RecordId is passed to a child flow which then modifies a record with that id.") + query_description=("An AfterSave record trigger is modifying the same record. " + "Record modifications should be done in BeforeSave triggers, " + "not AfterSave triggers. The trigger definition may be in a parent flow that calls " + "the current flow as a subflow, passing in the recordId of the trigger record.") ) - def when_to_run(self) -> QueryAction: - return QueryAction.process_elem + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.process_elem] def execute(self, state: State = None, @@ -383,33 +412,36 @@ class TriggerEntryCriteria(LexicalQuery): query_id = "TriggerEntryCriteria" query_name = QUERIES[query_id] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_Moderate_Severity, - query_description="This rule detects when record trigger flows are missing entry criteria. All record trigger flows should have entry criteria specified in the flow trigger definition rather than solely in the flow's own business logic.", + query_description=("The record trigger flow has no entry criteria. " + "All record trigger flows should have entry criteria specified in the flow " + "trigger definition rather than solely in the flow's own business logic."), is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: root = parser.get_root() - starts = parse_utils.get_by_tag(root, tagname='start') + starts = parse_utils.get_by_tag(root, tag_name='start') if len(starts) != 1: # The flow could have a startElementReference logger.debug(f"could not find start element in flow {parser.get_filename()}") return None start = starts[0] - trigger_type_els = parse_utils.get_by_tag(start, tagname='recordTriggerType') + trigger_type_els = parse_utils.get_by_tag(start, tag_name='recordTriggerType') if len(trigger_type_els) != 1: return None else: - filter_formula = parse_utils.get_by_tag(elem=start, tagname='filterFormula') - filters = parse_utils.get_by_tag(elem=start, tagname='filters') + filter_formula = parse_utils.get_by_tag(elem=start, tag_name='filterFormula') + filters = parse_utils.get_by_tag(elem=start, tag_name='filters') if len(filters) == 0 and len(filter_formula) == 0: @@ -430,17 +462,20 @@ class DefaultCopy(LexicalQuery): query_id = "DefaultCopy" query_name = QUERIES[query_id] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_Low_Severity, - query_description=("This rule detects default names and labels that were auto assigned to elements pasted elements in the flow builder UI. These labels and names should be changed to make the flow comprehensible to maintainers."), + query_description=("An element has the auto-assigned copy name and/or label. " + "These names and labels should be changed to make the flow comprehensible " + "to maintainers."), is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: els = parser.get_all_named_elems() @@ -454,7 +489,7 @@ def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | No accum.append((name, name, el)) continue - labels = parse_utils.get_by_tag(el, tagname='label') + labels = parse_utils.get_by_tag(el, tag_name='label') if len(labels) > 0: for label in labels: label_text = label.text @@ -482,17 +517,19 @@ class UnusedResource(LexicalQuery): query_id = "UnusedResource" query_name = QUERIES[query_id] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_Low_Severity, - query_description="This rule detects redundant variables that are not used in the flow. This can be a sign of developer error.", + query_description=("A resource is not used elsewhere in the flow. Check that you did not " + "intend to use the resource and then delete it."), is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: root = parser.get_root() @@ -547,23 +584,27 @@ class MissingNextValueConnector(LexicalQuery): query_id = "MissingNextValueConnector" query_name = QUERIES[query_id] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_Moderate_Severity, - query_description=("This rule detects Loops without nextValue connectors. Loops should always have nextValue connectors, and lack of one usually signifies developer error when connecting the loop element to other elements."), + query_description=("A Loop is missing a nextValue connector. " + "Loops should always have nextValue connectors, " + "and lack of one usually signifies developer error when " + "connecting the loop element to other elements."), is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: accum = [] root = parser.get_root() flow_type = parser.get_flow_type() - loops = parse_utils.get_by_tag(root, tagname='loops') + loops = parse_utils.get_by_tag(root, tag_name='loops') filename = parser.get_filename() for loop in loops: @@ -618,17 +659,19 @@ def accept(cls, **kwargs) -> list[QueryResult] | None: ) ] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_Moderate_Severity, - query_description="This rule detects when a subflow calls a parent flow, creating a cyclic flow. The rule will detect cycles of any depth.", + query_description=("A subflow calls a parent flow, creating a cycle. Ensure that subflows do not call back" + "into a parent."), is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: pass @@ -638,22 +681,33 @@ class UnreachableElement(LexicalQuery): query_id = "UnreachableElement" query_name = QUERIES[query_id] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_Moderate_Severity, - query_description=("This rule identifies elements that have not been connected to the start element of the flow. Unreachable elements are usually due to incomplete flows or developer error.") + query_description=("An element is not connected to the start element of the flow. " + "Unreachable elements are usually due to incomplete flows or developer error. " + "Connect this element to the start element or remove it from the flow.") ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: crawler = kwargs["crawler"] cfg = crawler.get_cfg() # noinspection PyTypeChecker - missing = control_flow.validate_cfg(cfg=cfg, parser=parser, missing_only=True) + all_elems = parser.get_all_traversable_flow_elements() + all_elem_tuples = [(parse_utils.get_name(x), parse_utils.get_tag(x)) for x in all_elems] + + crawled_elems = [] + for segment in cfg.segment_map.values(): + crawled_elems = crawled_elems + segment.traversed + + # ..check there are no elements not in the cfg + missing = [x for x in all_elem_tuples if x not in crawled_elems] if len(missing) == 0: return None @@ -685,20 +739,23 @@ def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | No return results class MissingDescription(LexicalQuery): + query_id = "MissingDescription" query_name = QUERIES[query_id] - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_Low_Severity, - query_description=("This rule detects elements that contain labels but are missing descriptions. All elements with labels should have accompanying descriptions to make the flow comprehensible to future maintainers."), + query_description=("An element contains a label that is missing a description. Document all elements " + "with labels to make the flow comprehensible to future maintainers."), is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: all_named = list(parser.get_all_named_elems()) @@ -771,17 +828,21 @@ def __init__(self): self.should_check: bool = True self.start_elems: list[El] | None = None - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_High_Severity, - query_description=("This rule detects when a wait event is reached during trigger execution. Triggers must be performant and cannot contain wait events. For async processing, use scheduled paths within your trigger and async callouts, not wait events. This rule follows subflows."), + query_description=("A wait event is reached during trigger execution. The trigger may be in a parent flow " + "that calls the current flow as a subflow. Triggers must be performant and cannot " + "contain wait events. For async processing, use scheduled paths within your trigger " + "and async callouts, not wait events."), is_security=False ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: if self.should_check: @@ -824,156 +885,200 @@ def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | No return accum class TriggerCallout(LexicalQuery): + query_id = "TriggerCallout" query_name = QUERIES[query_id] def __init__(self): + self.should_scan: bool = False self.should_check: bool = True + self.has_scheduled_path: bool = False self.top_flow_path: str | None = None - self.called_names: list[str] | None = None - + self.start_el: El | None = None #: element name corresponding to direct path from start - self.conn_target_name: str| None = None - + self.conn_target_el: El | None = None + self.top_parser: FlowParser | None = None - def get_query_description(self) -> QueryDescription: + @classmethod + def get_query_description(cls) -> QueryDescription: return QueryDescription( - query_id=self.query_id, - query_name=self.query_name, + query_id=cls.query_id, + query_name=cls.query_name, severity=Severity.Flow_Moderate_Severity, - query_description=("This rule detects when a trigger performs a callout on the synchronous path. Triggers must be performant and may only contain callouts on async scheduled paths. This rule follows subflows.") + query_description=("A callout is performed on the synchronous path of a trigger. The trigger may be in a " + "parent flow. Triggers must be performant and may only contain callouts on async " + "scheduled paths. It is recommended that you create an async path and place " + "the callout there.") ) - def when_to_run(self) -> QueryAction: - return QueryAction.lexical + def when_to_run(self) -> list[QueryAction]: + return [QueryAction.lexical] def execute(self, parser: FlowParser = None, **kwargs) -> list[QueryResult] | None: if self.should_check: - if parser.get_flow_type() is FlowType.Trigger: - try: - start_el = parser.get_start_elem() - conn = parse_utils.get_by_tag(start_el, 'connector')[0] - conn_target = parse_utils.get_text_of_tag(conn, 'targetReference') - except: - logger.debug(f"exception thrown when searching for start connector target in {self.top_flow_path}" - f"\n {traceback.format_exc()}") - self.should_scan = False + try: + if parser.get_flow_type() is not FlowType.Trigger: self.should_check = False - return None - - if conn_target is None: self.should_scan = False - self.should_check = False return None else: - self.should_scan = True self.should_check = False + self.should_scan = True self.top_flow_path = parser.get_filename() - self.conn_target_name = conn_target - self.called_names = parser.get_traversable_descendents_of_elem(conn_target) - - else: + self.start_el = parser.get_start_elem() + self.top_parser = parser + scheduled_paths = self.start_el.findall(f'.//{ns}scheduledPaths/{ns}connector') + if not scheduled_paths: + self.has_scheduled_path = False + # we look for anything connected to start as everything is on the + # synchronous path + self.conn_target_el = self.start_el + else: + self.has_scheduled_path = True + conn = parse_utils.get_by_tag(self.start_el, 'connector')[0] + conn_target = parse_utils.get_text_of_tag(conn, 'targetReference') + if conn is None or conn_target is None or len(conn_target) == 0: + self.should_scan = False + self.should_check = False + return None + else: + self.conn_target_el = parser.get_by_name(conn_target) + if self.conn_target_el is None: + logger.error(f"start element pointing to a non-existing connector " + f"in flow {self.top_flow_path}") + self.should_scan = False + self.should_check = False + return None + + except: + logger.debug(f"exception thrown when searching for start connector target in {self.top_flow_path}" + f"\n {traceback.format_exc()}") self.should_scan = False self.should_check = False return None + + elif not self.should_scan: return None # fall through + + # a map from actionType -> list of tuples (action_element, action_name) action_calls = parser.get_action_call_map() - if action_calls is None: + if not action_calls: return None callouts = dict.get(action_calls, 'externalService', None) if not callouts: return None - callout_names = [x[0] for x in callouts] + accum = [] crawler = kwargs.get("crawler") - results = search_for_sync_jumps( - parser=parser, - called_names=self.called_names, - current_crawler=crawler, - prev_crawlers=crawler.get_crawler_history_unsafe(), - target_el_names=callout_names, - conn_target_name=self.conn_target_name, - current_filename=parser.get_filename(), - top_filename=self.top_flow_path) - - if len(results) == 0: - return None - else: - accum = [] - for result in results: - elem = parser.get_by_name(name_to_match=result) + for callout_el, callout_name in callouts: - accum.append(QueryResult( - query_id=self.query_id, - flow_type=FlowType.Trigger, - elem_code=parse_utils.get_elem_string(elem), - elem_name=result, - elem_line_no=parse_utils.get_line_no(elem), - field=result, - filename=parser.get_filename() - ) - ) + res = crawler.get_call_chain(source_el=self.conn_target_el, + source_path=self.top_flow_path, + sink_el=callout_el, + source_parser=self.top_parser) + if not res: + continue + res.insert(0, (self.start_el, self.top_flow_path)) - return accum + qr = generate_query_result_from_call_chain( + chain=res, + flow_type=FlowType.Trigger, + query_id=self.query_id, + ) + accum.append(qr) + if accum: + return accum + else: + return None -def search_for_sync_jumps( - parser: FlowParser, - called_names: list[str], - current_crawler: Crawler, - prev_crawlers: list[Crawler] | None, - target_el_names: list[str], - conn_target_name: str, - current_filename: str, - top_filename: str) -> list[str]: - """ - target_el_names = names of http callouts that should not be called from - the conn_target_name +def generate_query_result_from_call_chain(chain: list[tuple[El, str]], + flow_type: FlowType, + query_id: str, + )-> QueryResult | None: - Returns: - list of target_el_names that are running in the synchronous path - """ - results = [] + influence_path = generate_path_from_call_chain(chain) + assert influence_path is not None - if top_filename == current_filename: + qr = QueryResult( + query_id=query_id, + flow_type=flow_type, + paths=frozenset([influence_path]) + ) - for tgt_name in target_el_names: - if tgt_name in called_names: - results.append(tgt_name) + return qr - else: - # we are in a subflow so we need to find the first top level crawler - # that is connected to this frame. - if prev_crawlers is None: - # This means the executor didn't set the parent crawler - logger.critical(f"could not link back to {top_filename} from {current_filename}") - return results +def generate_path_from_call_chain(chain: list[tuple[El, str]]) -> InfluencePath: + """ + Args: + chain is the call chain (element, flow_path of element). It must start at the source + and end at the sink, and it must be non-empty. + """ + assert chain is not None and len(chain) > 0 + accum = [] + for index, (el, filename) in enumerate(chain): + comment = None + if index == 0: + if el is not chain[1][0]: + influencer_var = parse_utils.get_name(chain[0][0]) + comment = f"start of call chain" + else: + continue else: - crawler_to_check, index = next((c for c in prev_crawlers if c[0].get_flow_path() == top_filename), None) - # The crawler was set, but incorrectly - if crawler_to_check is None: - logger.critical(f"could not link back to {top_filename} from {current_filename}") - return results - - step_to_check = crawler_to_check.get_current_step_index() - 1 - - subflow_name = crawler_to_check.get_crawl_schedule()[step_to_check].element_name - - if subflow_name in called_names: - # all the actions in this subflow are running in the direct path - results = results + target_el_names - - return results - + influencer_var = parse_utils.get_name(chain[index-1][0]) + + influenced_var = parse_utils.get_name(el) + assert influenced_var is not None + + if influencer_var == '*': + influencer_var = 'start' + if influenced_var == '*': + influenced_var = 'start' + + if not comment: + comment = f"{influenced_var} is in the call chain of {influencer_var}" + + code = parse_utils.get_elem_string(el) + line_no = parse_utils.get_line_no(el) + source_path = flow_path = filename + element_name = influenced_var + + accum.append(InfluenceStatement( + influenced_var=influenced_var, + influencer_var=influencer_var, + element_name=element_name, + comment=comment, + flow_path=flow_path, + source_path=source_path, + line_no=line_no, + source_text=code + ) + ) + influenced_name = accum[-1].influenced_var + influencer_name = accum[0].influencer_var + influenced_filepath = accum[-1].flow_path + influencer_filepath = accum[0].flow_path + + influence_path = InfluencePath( + influencer_name=influencer_name, + influenced_name=influenced_name, + influenced_filepath=influenced_filepath, + influencer_filepath=influencer_filepath, + influenced_type_info=None, + influenced_property=None, + influencer_property=None, + history=tuple(accum) + ) + return influence_path def check_in_templates_or_formulas(name_to_check: str, formula_elems: list[El], diff --git a/packages/code-analyzer-flow-engine/package.json b/packages/code-analyzer-flow-engine/package.json index 9d67d713..ab749191 100644 --- a/packages/code-analyzer-flow-engine/package.json +++ b/packages/code-analyzer-flow-engine/package.json @@ -1,7 +1,7 @@ { "name": "@salesforce/code-analyzer-flow-engine", "description": "Plugin package that adds 'Flow Scanner' as an engine into Salesforce Code Analyzer", - "version": "0.30.0", + "version": "0.31.0-SNAPSHOT", "author": "The Salesforce Code Analyzer Team", "license": "BSD-3-Clause", "homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview", diff --git a/packages/code-analyzer-flow-engine/src/engine.ts b/packages/code-analyzer-flow-engine/src/engine.ts index b81ae51c..cf44ab50 100644 --- a/packages/code-analyzer-flow-engine/src/engine.ts +++ b/packages/code-analyzer-flow-engine/src/engine.ts @@ -13,7 +13,7 @@ import { import {Clock, RealClock} from '@salesforce/code-analyzer-engine-api/utils'; import {getMessage} from './messages'; import {FlowNodeDescriptor, FlowScannerCommandWrapper, FlowScannerExecutionResult, FlowScannerRuleResult} from "./python/FlowScannerCommandWrapper"; -import {getDescriptionForRule, getRuleNameFromQueryName, getAllRuleNames, getOptionalQueryIdsForRule} from "./hardcoded-catalog"; +import {getDescriptionForRule, getRuleNameFromQueryId, getAllRuleNames, getQueryIdsForRule} from "./hardcoded-catalog"; /** * An arbitrarily chosen value for how close the engine is to completion before the underlying Flow tool is invoked, @@ -80,17 +80,17 @@ export class FlowScannerEngine extends Engine { this.emitRunRulesProgressEvent(normalizeRelativeCompletionPercentage(percentage)); } - const optionalQueryIds: string[] = ruleNames.flatMap(getOptionalQueryIdsForRule); + const queryIds: string[] = ruleNames.flatMap(getQueryIdsForRule); const executionResults: FlowScannerExecutionResult = await this.commandWrapper.runFlowScannerRules( runOptions.workingFolder, workspaceFlows, targetedFlows, logFile, - optionalQueryIds, + queryIds, percentageUpdateHandler ); - const convertedResults: EngineRunResults = toEngineRunResults(executionResults, ruleNames); + const convertedResults: EngineRunResults = toEngineRunResults(executionResults); this.emitRunRulesProgressEvent(100); return convertedResults; } @@ -134,22 +134,15 @@ function normalizeRelativeCompletionPercentage(flowPercentage: number): number { return PRE_INVOCATION_RUN_PERCENT + ((flowPercentage * percentageSpread) / 100); } -function toEngineRunResults(flowScannerExecutionResult: FlowScannerExecutionResult, requestedRules: string[]): EngineRunResults { - const requestedRulesSet: Set = new Set(requestedRules); +function toEngineRunResults(flowScannerExecutionResult: FlowScannerExecutionResult): EngineRunResults { const results: EngineRunResults = { violations: [] }; - for (const queryName of Object.keys(flowScannerExecutionResult.results)) { - const flowScannerRuleResults: FlowScannerRuleResult[] = flowScannerExecutionResult.results[queryName]; + for (const queryId of Object.keys(flowScannerExecutionResult.results)) { + const flowScannerRuleResults: FlowScannerRuleResult[] = flowScannerExecutionResult.results[queryId]; for (const flowScannerRuleResult of flowScannerRuleResults) { - const ruleName = getRuleNameFromQueryName(flowScannerRuleResult.query_name); - // Since the non-optional queries (designated by the default preset) always run, we need filter any of their - // results out if their corresponding rule was not selected. - if (!requestedRulesSet.has(ruleName)) { - continue; - } - + const ruleName = getRuleNameFromQueryId(flowScannerRuleResult.query_id); const flowNodes: FlowNodeDescriptor[] | undefined = flowScannerRuleResult.flow; if (flowNodes) { // If flow based violation results.violations.push({ diff --git a/packages/code-analyzer-flow-engine/src/hardcoded-catalog.ts b/packages/code-analyzer-flow-engine/src/hardcoded-catalog.ts index ecda68f1..40be7779 100644 --- a/packages/code-analyzer-flow-engine/src/hardcoded-catalog.ts +++ b/packages/code-analyzer-flow-engine/src/hardcoded-catalog.ts @@ -2,6 +2,8 @@ import {COMMON_TAGS, RuleDescription, SeverityLevel} from '@salesforce/code-anal import {getMessage} from './messages'; // Code Analyzer rule names +// Good news: The python flow scanner query ids now happen to be the exact same names as our code analyzer rule names +// so we no longer need to keep a map between the two. enum RuleName { CyclicSubflow = 'CyclicSubflow', DbInLoop = 'DbInLoop', @@ -130,186 +132,28 @@ const RULE_DESCRIPTIONS: RuleDescription[] = [ const RULE_DESCRIPTIONS_BY_NAME: Map = new Map(RULE_DESCRIPTIONS.map(rd => [rd.name, rd])); -type FlowScannerQueryAssociation = { - // The id of the flow scanner query. This is used when selecting which query to run (if it is an optional query). - queryId : string, - - // The name of the flow scanner query. Unfortunately this is what shows up in the results instead of the id. - queryName: string, - - // Should be true if the rule is queried by the --optional_queries flag and false if it is in the default preset - isOptional: boolean - - // The name of the Code Analyzer rule that the query is associated with. Note that multiple queries can be under the same rule. - ruleName: RuleName -} - -const QUERY_ASSOCIATIONS: FlowScannerQueryAssociation[] = [ - // ==== QUERIES FROM THE DEFAULT PRESET (which we can't turn off when running flow scanner) ==== - { - queryId: "FlowSecurity.SystemModeWithSharing.recordCreates.data", - queryName: "Flow: SystemModeWithSharing recordCreates data", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithSharing - }, - { - queryId: "FlowSecurity.SystemModeWithSharing.recordDeletes.selector", - queryName: "Flow: SystemModeWithSharing recordDeletes selector", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithSharing - }, - { - queryId: "FlowSecurity.SystemModeWithSharing.recordLookups.selector", - queryName: "Flow: SystemModeWithSharing recordLookups selector", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithSharing - }, - { - queryId: "FlowSecurity.SystemModeWithSharing.recordUpdates.data", - queryName: "Flow: SystemModeWithSharing recordUpdates data", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithSharing - }, - { - queryId: "FlowSecurity.SystemModeWithSharing.recordUpdates.selector", - queryName: "Flow: SystemModeWithSharing recordUpdates selector", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithSharing - }, - { - queryId: "FlowSecurity.SystemModeWithoutSharing.recordCreates.data", - queryName: "Flow: SystemModeWithoutSharing recordCreates data", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithoutSharing - }, - { - queryId: "FlowSecurity.SystemModeWithoutSharing.recordDeletes.selector", - queryName: "Flow: SystemModeWithoutSharing recordDeletes selector", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithoutSharing - }, - { - queryId: "FlowSecurity.SystemModeWithoutSharing.recordLookups.selector", - queryName: "Flow: SystemModeWithoutSharing recordLookups selector", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithoutSharing - }, - { - queryId: "FlowSecurity.SystemModeWithoutSharing.recordUpdates.data", - queryName: "Flow: SystemModeWithoutSharing recordUpdates data", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithoutSharing - }, - { - queryId: "FlowSecurity.SystemModeWithoutSharing.recordUpdates.selector", - queryName: "Flow: SystemModeWithoutSharing recordUpdates selector", - isOptional: false, - ruleName: RuleName.PreventPassingUserDataIntoElementWithoutSharing - }, - - // ==== OPTIONAL QUERIES (which we can choose to run) ==== - { - queryId: "CyclicSubflow", - queryName: "Chain of subflow calls forms a cycle", - isOptional: true, - ruleName: RuleName.CyclicSubflow - }, - { - queryId: "DbInLoop", - queryName: "Database Operation In Loop", - isOptional: true, - ruleName: RuleName.DbInLoop - }, - { - queryId: "DefaultCopy", - queryName: "Default Copy Label", - isOptional: true, - ruleName: RuleName.DefaultCopy - }, - { - queryId: "HardcodedId", - queryName: "Hardcoded Id", - isOptional: true, - ruleName: RuleName.HardcodedId - }, - { - queryId: "MissingDescription", - queryName: "Missing Description", - isOptional: true, - ruleName: RuleName.MissingDescription - }, - { - queryId: "MissingFaultHandler", - queryName: "Missing Fault Handler", - isOptional: true, - ruleName: RuleName.MissingFaultHandler - }, - { - queryId: "MissingNextValueConnector", - queryName: "Loop Element Without nextValueConnector", - isOptional: true, - ruleName: RuleName.MissingNextValueConnector - }, - { - queryId: "SameRecordUpdate", - queryName: "Same Record Update In Trigger", - isOptional: true, - ruleName: RuleName.SameRecordUpdate - }, - { - queryId: "TriggerCallout", - queryName: "Trigger Flow Callout in Synchronous Path", - isOptional: true, - ruleName: RuleName.TriggerCallout - }, - { - queryId: "TriggerEntryCriteria", - queryName: "Record Trigger With No Entry Criteria", - isOptional: true, - ruleName: RuleName.TriggerEntryCriteria - }, - { - queryId: "TriggerWaitEvent", - queryName: "Wait Event in Trigger", - isOptional: true, - ruleName: RuleName.TriggerWaitEvent - }, - { - queryId: "UnreachableElement", - queryName: "Element is Unreachable", - isOptional: true, - ruleName: RuleName.UnreachableElement - }, - { - queryId: "UnusedResource", - queryName: "Unused Resource", - isOptional: true, - ruleName: RuleName.UnusedResource - } -] - -const QUERY_ASSOCIATIONS_BY_NAME : Map = new Map(QUERY_ASSOCIATIONS.map(qa => [qa.queryName, qa])); - export function getAllRuleNames(): string[] { return Object.values(RuleName); } -export function getRuleNameFromQueryName(queryName: string): string { +export function getRuleNameFromQueryId(queryId: string): string { + // Good news: The python flow scanner query ids now happen to be the exact same names as our code analyzer rule names + // so we no longer need to keep a map between the two. But leaving this helper just in case we need it again in the + // future. + // istanbul ignore else - if (QUERY_ASSOCIATIONS_BY_NAME.has(queryName)) { - return QUERY_ASSOCIATIONS_BY_NAME.get(queryName)!.ruleName; + if (Object.values(RuleName).includes(queryId as RuleName)) { + return queryId; } else { - throw new Error(`Developer error: invalid query name ${queryName}`); + throw new Error(`Developer error: invalid query id ${queryId}`); } } -export function getOptionalQueryIdsForRule(ruleName: string): string[] { - const queryIds: string[] = []; - for (const queryAssociation of QUERY_ASSOCIATIONS) { - if (queryAssociation.isOptional && queryAssociation.ruleName === ruleName) { - queryIds.push(queryAssociation.queryId); - } - } +export function getQueryIdsForRule(ruleName: string): string[] { + // It used to be that a single Code Analyzer rule could map to multiple flow scanner query ids. But now + // they are mapped 1-to-1 and happen to be the exact same names. But keeping the output as a string array + // just in case things change in the future. + const queryIds: string[] = [ruleName]; return queryIds; } diff --git a/packages/code-analyzer-flow-engine/src/python/FlowScannerCommandWrapper.ts b/packages/code-analyzer-flow-engine/src/python/FlowScannerCommandWrapper.ts index 241b6ff3..257de506 100644 --- a/packages/code-analyzer-flow-engine/src/python/FlowScannerCommandWrapper.ts +++ b/packages/code-analyzer-flow-engine/src/python/FlowScannerCommandWrapper.ts @@ -19,7 +19,7 @@ export type FlowScannerExecutionResult = { } export type FlowScannerRuleResult = { - query_name: string; + query_id: string; severity: string; counter?: number; description: string; @@ -59,7 +59,7 @@ export class RunTimeFlowScannerCommandWrapper implements FlowScannerCommandWrapp workspaceFlowFiles: string[], targetedFlowFiles: string[], absLogFilePath: string, - optionalQueryIds: string[], + queryIds: string[], completionPercentageHandler: (percentage: number) => void ): Promise { const workspaceFlowsFile: string = path.join(workingFolder, 'workspaceFiles.txt'); @@ -80,8 +80,8 @@ export class RunTimeFlowScannerCommandWrapper implements FlowScannerCommandWrapp workspaceFlowsFile, '--target', targetedFlowsFile, - '--optional_queries', - optionalQueryIds.join(','), + '--queries', + queryIds.join(','), '--json', flowScannerResultsFile ]; @@ -143,7 +143,7 @@ export class RunTimeFlowScannerCommandWrapper implements FlowScannerCommandWrapp private ruleResultIsValid(ruleResult: object): ruleResult is FlowScannerRuleResult { // Only require the fields that we actually use - if (!('query_name' in ruleResult) || typeof ruleResult.query_name !== 'string') { + if (!('query_id' in ruleResult) || typeof ruleResult.query_id !== 'string') { return false; } if (!('severity' in ruleResult) || typeof ruleResult.severity !== 'string') { diff --git a/packages/code-analyzer-flow-engine/test/engine.test.ts b/packages/code-analyzer-flow-engine/test/engine.test.ts index 21ae60bb..250d661f 100644 --- a/packages/code-analyzer-flow-engine/test/engine.test.ts +++ b/packages/code-analyzer-flow-engine/test/engine.test.ts @@ -86,14 +86,14 @@ describe('Tests for the FlowScannerEngine', () => { }, {}); expect(countsPerRule).toEqual({ MissingDescription: 56, - MissingFaultHandler: 15, + MissingFaultHandler: 9, PreventPassingUserDataIntoElementWithoutSharing: 5, PreventPassingUserDataIntoElementWithSharing: 2 }); // Next, spot check a few violations to confirm formatting: expect(results.violations).toContainEqual({ ruleName: 'MissingDescription', - message: 'This rule detects elements that contain labels but are missing descriptions. All elements with labels should have accompanying descriptions to make the flow comprehensible to future maintainers.', + message: 'An element contains a label that is missing a description. Document all elements with labels to make the flow comprehensible to future maintainers.', codeLocations: [{ file: PATH_TO_EXAMPLE2, startLine: 225, @@ -104,10 +104,10 @@ describe('Tests for the FlowScannerEngine', () => { }); expect(results.violations).toContainEqual({ ruleName: 'MissingFaultHandler', - message: 'This rule detects when elements that can fire fault events are missing fault handlers. The rule currently detects Create Records, Update Records, Delete Records, Action Calls, and Subflows.', + message: 'An element that can fire fault events is missing fault handlers. Add fault handlers to all Create Records, Update Records, Delete Records, Action Calls, and Subflows.', codeLocations: [{ file: PATH_TO_EXAMPLE4_SUBFLOW, - startLine: 69, + startLine: 46, startColumn: 1 }], primaryLocationIndex: 0, @@ -183,7 +183,7 @@ describe('Tests for the FlowScannerEngine', () => { const expectedExample1Violation1: Violation = { ruleName: "PreventPassingUserDataIntoElementWithoutSharing", - message: "User controlled data flows into recordUpdates element data in run mode: SystemModeWithoutSharing", + message: "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context without sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", codeLocations: [ { comment: "change_subject_of_case.change_subject_of_case: Initialization", @@ -210,7 +210,7 @@ describe('Tests for the FlowScannerEngine', () => { const expectedExample1Violation2: Violation = { ruleName: "PreventPassingUserDataIntoElementWithoutSharing", - message: "User controlled data flows into recordDeletes element selector in run mode: SystemModeWithoutSharing", + message: "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context without sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", codeLocations: [ { comment: "change_subject_of_case.change_subject_of_case: Initialization", @@ -237,7 +237,7 @@ describe('Tests for the FlowScannerEngine', () => { const expectedExample2Violation1: Violation = { ruleName: "PreventPassingUserDataIntoElementWithSharing", - message: "User controlled data flows into recordUpdates element data in run mode: SystemModeWithSharing", + message: "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context with sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", codeLocations: [ { comment: "change_subject_of_case.change_subject_of_case: Initialization", @@ -264,7 +264,7 @@ describe('Tests for the FlowScannerEngine', () => { const expectedExample2Violation2: Violation = { ruleName: "PreventPassingUserDataIntoElementWithSharing", - message: "User controlled data flows into recordDeletes element selector in run mode: SystemModeWithSharing", + message: "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context with sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", codeLocations: [ { comment: "change_subject_of_case.change_subject_of_case: Initialization", @@ -292,7 +292,7 @@ describe('Tests for the FlowScannerEngine', () => { function createSharedExample4Violation(inputAssignmentField: string): Violation { return { ruleName: "PreventPassingUserDataIntoElementWithoutSharing", - message: "User controlled data flows into recordCreates element data in run mode: SystemModeWithoutSharing", + message: "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context without sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", codeLocations: [ { file: PATH_TO_EXAMPLE4_PARENTFLOW, @@ -330,7 +330,7 @@ describe('Tests for the FlowScannerEngine', () => { const expectedExample4Violation3: Violation = { ruleName: "PreventPassingUserDataIntoElementWithoutSharing", - message: "User controlled data flows into recordLookups element selector in run mode: SystemModeWithoutSharing", + message: "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context without sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", codeLocations: [ { file: PATH_TO_EXAMPLE4_SUBFLOW, @@ -487,7 +487,7 @@ describe('Tests for the FlowScannerEngine', () => { const childFlowFile: string = path.join(PARENT_WITH_SOURCE_CALLS_SUB_WITH_SINK_WORKSPACE, 'child_with_sink.flow-meta.xml'); const expectedViolation: Violation = { ruleName: "PreventPassingUserDataIntoElementWithoutSharing", - message: "User controlled data flows into recordCreates element data in run mode: SystemModeWithoutSharing", + message: "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context without sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", codeLocations: [ { file: parentFlowFile, @@ -586,7 +586,7 @@ describe('Tests for the FlowScannerEngine', () => { const childFlowFile: string = path.join(PARENT_WITH_SINK_CALLS_SUB_WITH_SOURCE_WORKSPACE, 'child_with_source.flow-meta.xml'); const expectedViolation: Violation = { ruleName: "PreventPassingUserDataIntoElementWithoutSharing", - message: "User controlled data flows into recordLookups element selector in run mode: SystemModeWithoutSharing", + message: "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context without sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", codeLocations: [ { file: childFlowFile, diff --git a/packages/code-analyzer-flow-engine/test/python/FlowScannerCommandWrapper.test.ts b/packages/code-analyzer-flow-engine/test/python/FlowScannerCommandWrapper.test.ts index 4a07fa75..ed2bc2e9 100644 --- a/packages/code-analyzer-flow-engine/test/python/FlowScannerCommandWrapper.test.ts +++ b/packages/code-analyzer-flow-engine/test/python/FlowScannerCommandWrapper.test.ts @@ -38,7 +38,7 @@ describe('FlowScannerCommandWrapper implementations', () => { [PATH_TO_EXAMPLE1, PATH_TO_EXAMPLE2], [PATH_TO_EXAMPLE1, PATH_TO_EXAMPLE2], tempLogFile, - ['MissingFaultHandler'], // adding in one optional query as well + ['PreventPassingUserDataIntoElementWithoutSharing','PreventPassingUserDataIntoElementWithSharing','MissingFaultHandler'], statusProcessorFunction); // The `counter` property is irrelevant to us, and causes problems across platforms. So delete it. for (const queryName of Object.keys(results.results)) { diff --git a/packages/code-analyzer-flow-engine/test/test-data/goldfiles/FlowScannerCommandWrapper.test.ts/results.goldfile.json b/packages/code-analyzer-flow-engine/test/test-data/goldfiles/FlowScannerCommandWrapper.test.ts/results.goldfile.json index 78c8a76b..7f78c242 100644 --- a/packages/code-analyzer-flow-engine/test/test-data/goldfiles/FlowScannerCommandWrapper.test.ts/results.goldfile.json +++ b/packages/code-analyzer-flow-engine/test/test-data/goldfiles/FlowScannerCommandWrapper.test.ts/results.goldfile.json @@ -1,89 +1,46 @@ { - "preset": "Penetration Testing", + "preset": "custom", "help_url": null, - "result_id": "6b042660", + "result_id": "8067e8f5", "service_version": "0.9.9", "flow_scanner_version": "0.9.9", "report_label": "scan of code-analyzer-core", "email": null, - "scan_start": "2025-10-24 10:55:33", - "scan_end": "2025-10-24 10:55:33", + "scan_start": "2025-11-26 10:42:22", + "scan_end": "2025-11-26 10:42:22", "results": { - "FlowSecurity.SystemModeWithSharing.recordDeletes.selector": [ + "MissingFaultHandler": [ { - "query_name": "Flow: SystemModeWithSharing recordDeletes selector", + "query_id": "MissingFaultHandler", + "query_name": "Missing Fault Handler", "severity": "Flow_Low_Severity", - "description": "User controlled data flows into recordDeletes element selector in run mode: SystemModeWithSharing", - "elem": null, + "description": "An element that can fire fault events is missing fault handlers. Add fault handlers to all Create Records, Update Records, Delete Records, Action Calls, and Subflows.", "elem_name": "delete_created_case", "field": "delete_created_case", "elem_code": "\n delete_created_case\n \n 247\n 201\n \n exit_screen\n \n another_case_holder\n ", - "elem_line_no": null, - "filename": null, - "flow_type": "Screen", - "flow": [ - { - "influenced_var": "change_subject_of_case", - "influencer_var": "change_subject_of_case", - "element_name": "change_subject_of_case", - "comment": "Initialization", - "line_no": 124, - "source_text": "\n change_subject_of_case\n String\n \n another_case_holder.Subject\n \n change subject of case\n InputField\n true\n ", - "flow_path": "__PATH_TO_EXAMPLE2__" - }, - { - "influenced_var": "another_case_holder.Subject", - "influencer_var": "change_subject_of_case", - "element_name": "change_subj_assignment", - "comment": "Variable Assignment", - "line_no": 26, - "source_text": "\n another_case_holder.Subject\n Assign\n \n change_subject_of_case\n \n ", - "flow_path": "__PATH_TO_EXAMPLE2__" - }, - { - "influenced_var": "delete_created_case", - "influencer_var": "another_case_holder", - "element_name": "delete_created_case", - "comment": "flow into recordDeletes via influence over delete_created_case in run mode SystemModeWithSharing", - "line_no": 69, - "source_text": "\n delete_created_case\n \n 247\n 201\n \n exit_screen\n \n another_case_holder\n ", - "flow_path": "__PATH_TO_EXAMPLE2__" - } - ] - } - ], - "MissingFaultHandler": [ - { - "query_name": "Missing Fault Handler", - "severity": "Flow_Low_Severity", - "description": "This rule detects when elements that can fire fault events are missing fault handlers. The rule currently detects Create Records, Update Records, Delete Records, Action Calls, and Subflows.", - "elem": "create_case", - "elem_name": "create_case", - "field": "create_case", - "elem_code": "\n create_from_record\n create_case\n \n 1045\n 224\n \n press_next\n \n case_holder\n ", - "elem_line_no": 58, + "elem_line_no": 69, "filename": "__PATH_TO_EXAMPLE1__", "flow_type": "Screen", "flow": null }, { + "query_id": "MissingFaultHandler", "query_name": "Missing Fault Handler", "severity": "Flow_Low_Severity", - "description": "This rule detects when elements that can fire fault events are missing fault handlers. The rule currently detects Create Records, Update Records, Delete Records, Action Calls, and Subflows.", - "elem": "update_to_new_subject", + "description": "An element that can fire fault events is missing fault handlers. Add fault handlers to all Create Records, Update Records, Delete Records, Action Calls, and Subflows.", "elem_name": "update_to_new_subject", "field": "update_to_new_subject", "elem_code": "\n update_to_new_subject\n \n 50\n 355\n \n confirm_delete\n \n another_case_holder\n ", "elem_line_no": 102, - "filename": "__PATH_TO_EXAMPLE1__", + "filename": "__PATH_TO_EXAMPLE2__", "flow_type": "Screen", "flow": null }, { + "query_id": "MissingFaultHandler", "query_name": "Missing Fault Handler", "severity": "Flow_Low_Severity", - "description": "This rule detects when elements that can fire fault events are missing fault handlers. The rule currently detects Create Records, Update Records, Delete Records, Action Calls, and Subflows.", - "elem": "create_case", + "description": "An element that can fire fault events is missing fault handlers. Add fault handlers to all Create Records, Update Records, Delete Records, Action Calls, and Subflows.", "elem_name": "create_case", "field": "create_case", "elem_code": "\n create_from_record\n create_case\n \n 1045\n 224\n \n press_next\n \n case_holder\n ", @@ -93,56 +50,97 @@ "flow": null }, { + "query_id": "MissingFaultHandler", "query_name": "Missing Fault Handler", "severity": "Flow_Low_Severity", - "description": "This rule detects when elements that can fire fault events are missing fault handlers. The rule currently detects Create Records, Update Records, Delete Records, Action Calls, and Subflows.", - "elem": "delete_created_case", + "description": "An element that can fire fault events is missing fault handlers. Add fault handlers to all Create Records, Update Records, Delete Records, Action Calls, and Subflows.", + "elem_name": "create_case", + "field": "create_case", + "elem_code": "\n create_from_record\n create_case\n \n 1045\n 224\n \n press_next\n \n case_holder\n ", + "elem_line_no": 58, + "filename": "__PATH_TO_EXAMPLE1__", + "flow_type": "Screen", + "flow": null + }, + { + "query_id": "MissingFaultHandler", + "query_name": "Missing Fault Handler", + "severity": "Flow_Low_Severity", + "description": "An element that can fire fault events is missing fault handlers. Add fault handlers to all Create Records, Update Records, Delete Records, Action Calls, and Subflows.", "elem_name": "delete_created_case", "field": "delete_created_case", "elem_code": "\n delete_created_case\n \n 247\n 201\n \n exit_screen\n \n another_case_holder\n ", "elem_line_no": 69, - "filename": "__PATH_TO_EXAMPLE1__", + "filename": "__PATH_TO_EXAMPLE2__", "flow_type": "Screen", "flow": null }, { + "query_id": "MissingFaultHandler", "query_name": "Missing Fault Handler", "severity": "Flow_Low_Severity", - "description": "This rule detects when elements that can fire fault events are missing fault handlers. The rule currently detects Create Records, Update Records, Delete Records, Action Calls, and Subflows.", - "elem": "update_to_new_subject", + "description": "An element that can fire fault events is missing fault handlers. Add fault handlers to all Create Records, Update Records, Delete Records, Action Calls, and Subflows.", "elem_name": "update_to_new_subject", "field": "update_to_new_subject", "elem_code": "\n update_to_new_subject\n \n 50\n 355\n \n confirm_delete\n \n another_case_holder\n ", "elem_line_no": 102, - "filename": "__PATH_TO_EXAMPLE2__", + "filename": "__PATH_TO_EXAMPLE1__", "flow_type": "Screen", "flow": null - }, + } + ], + "PreventPassingUserDataIntoElementWithSharing": [ { - "query_name": "Missing Fault Handler", - "severity": "Flow_Low_Severity", - "description": "This rule detects when elements that can fire fault events are missing fault handlers. The rule currently detects Create Records, Update Records, Delete Records, Action Calls, and Subflows.", - "elem": "delete_created_case", + "query_id": "PreventPassingUserDataIntoElementWithSharing", + "query_name": "User Data DML in System Mode With Sharing", + "severity": "Flow_Moderate_Severity", + "description": "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context with sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", "elem_name": "delete_created_case", "field": "delete_created_case", "elem_code": "\n delete_created_case\n \n 247\n 201\n \n exit_screen\n \n another_case_holder\n ", "elem_line_no": 69, "filename": "__PATH_TO_EXAMPLE2__", "flow_type": "Screen", - "flow": null - } - ], - "FlowSecurity.SystemModeWithoutSharing.recordUpdates.data": [ + "flow": [ + { + "influenced_var": "change_subject_of_case", + "influencer_var": "change_subject_of_case", + "element_name": "change_subject_of_case", + "comment": "Initialization", + "line_no": 124, + "source_text": "\n change_subject_of_case\n String\n \n another_case_holder.Subject\n \n change subject of case\n InputField\n true\n ", + "flow_path": "__PATH_TO_EXAMPLE2__" + }, + { + "influenced_var": "another_case_holder.Subject", + "influencer_var": "change_subject_of_case", + "element_name": "change_subj_assignment", + "comment": "Variable Assignment", + "line_no": 26, + "source_text": "\n another_case_holder.Subject\n Assign\n \n change_subject_of_case\n \n ", + "flow_path": "__PATH_TO_EXAMPLE2__" + }, + { + "influenced_var": "delete_created_case", + "influencer_var": "another_case_holder", + "element_name": "delete_created_case", + "comment": "flow into recordDeletes via influence over delete_created_case in run mode SystemModeWithSharing", + "line_no": 69, + "source_text": "\n delete_created_case\n \n 247\n 201\n \n exit_screen\n \n another_case_holder\n ", + "flow_path": "__PATH_TO_EXAMPLE2__" + } + ] + }, { - "query_name": "Flow: SystemModeWithoutSharing recordUpdates data", - "severity": "Flow_High_Severity", - "description": "User controlled data flows into recordUpdates element data in run mode: SystemModeWithoutSharing", - "elem": null, + "query_id": "PreventPassingUserDataIntoElementWithSharing", + "query_name": "User Data DML in System Mode With Sharing", + "severity": "Flow_Moderate_Severity", + "description": "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context with sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", "elem_name": "update_to_new_subject", "field": "update_to_new_subject", "elem_code": "\n update_to_new_subject\n \n 50\n 355\n \n confirm_delete\n \n another_case_holder\n ", - "elem_line_no": null, - "filename": null, + "elem_line_no": 102, + "filename": "__PATH_TO_EXAMPLE2__", "flow_type": "Screen", "flow": [ { @@ -152,7 +150,7 @@ "comment": "Initialization", "line_no": 124, "source_text": "\n change_subject_of_case\n String\n \n another_case_holder.Subject\n \n change subject of case\n InputField\n true\n ", - "flow_path": "__PATH_TO_EXAMPLE1__" + "flow_path": "__PATH_TO_EXAMPLE2__" }, { "influenced_var": "another_case_holder.Subject", @@ -161,31 +159,31 @@ "comment": "Variable Assignment", "line_no": 26, "source_text": "\n another_case_holder.Subject\n Assign\n \n change_subject_of_case\n \n ", - "flow_path": "__PATH_TO_EXAMPLE1__" + "flow_path": "__PATH_TO_EXAMPLE2__" }, { "influenced_var": "update_to_new_subject", "influencer_var": "another_case_holder", "element_name": "update_to_new_subject", - "comment": "flow into recordUpdates via influence over update_to_new_subject in run mode SystemModeWithoutSharing", + "comment": "flow into recordUpdates via influence over update_to_new_subject in run mode SystemModeWithSharing", "line_no": 102, "source_text": "\n update_to_new_subject\n \n 50\n 355\n \n confirm_delete\n \n another_case_holder\n ", - "flow_path": "__PATH_TO_EXAMPLE1__" + "flow_path": "__PATH_TO_EXAMPLE2__" } ] } ], - "FlowSecurity.SystemModeWithoutSharing.recordDeletes.selector": [ + "PreventPassingUserDataIntoElementWithoutSharing": [ { - "query_name": "Flow: SystemModeWithoutSharing recordDeletes selector", + "query_id": "PreventPassingUserDataIntoElementWithoutSharing", + "query_name": "User Data DML in System Mode Without Sharing", "severity": "Flow_High_Severity", - "description": "User controlled data flows into recordDeletes element selector in run mode: SystemModeWithoutSharing", - "elem": null, + "description": "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context without sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", "elem_name": "delete_created_case", "field": "delete_created_case", "elem_code": "\n delete_created_case\n \n 247\n 201\n \n exit_screen\n \n another_case_holder\n ", - "elem_line_no": null, - "filename": null, + "elem_line_no": 69, + "filename": "__PATH_TO_EXAMPLE1__", "flow_type": "Screen", "flow": [ { @@ -216,19 +214,17 @@ "flow_path": "__PATH_TO_EXAMPLE1__" } ] - } - ], - "FlowSecurity.SystemModeWithSharing.recordUpdates.data": [ + }, { - "query_name": "Flow: SystemModeWithSharing recordUpdates data", - "severity": "Flow_Low_Severity", - "description": "User controlled data flows into recordUpdates element data in run mode: SystemModeWithSharing", - "elem": null, + "query_id": "PreventPassingUserDataIntoElementWithoutSharing", + "query_name": "User Data DML in System Mode Without Sharing", + "severity": "Flow_High_Severity", + "description": "User controlled data is sent to a DB Element (RecordLookups, RecordCreates, RecordUpdates, RecordDeletes) in System context without sharing. This can result in privilege escalation if the user does not have permission to access the underlying record.", "elem_name": "update_to_new_subject", "field": "update_to_new_subject", "elem_code": "\n update_to_new_subject\n \n 50\n 355\n \n confirm_delete\n \n another_case_holder\n ", - "elem_line_no": null, - "filename": null, + "elem_line_no": 102, + "filename": "__PATH_TO_EXAMPLE1__", "flow_type": "Screen", "flow": [ { @@ -238,7 +234,7 @@ "comment": "Initialization", "line_no": 124, "source_text": "\n change_subject_of_case\n String\n \n another_case_holder.Subject\n \n change subject of case\n InputField\n true\n ", - "flow_path": "__PATH_TO_EXAMPLE2__" + "flow_path": "__PATH_TO_EXAMPLE1__" }, { "influenced_var": "another_case_holder.Subject", @@ -247,16 +243,16 @@ "comment": "Variable Assignment", "line_no": 26, "source_text": "\n another_case_holder.Subject\n Assign\n \n change_subject_of_case\n \n ", - "flow_path": "__PATH_TO_EXAMPLE2__" + "flow_path": "__PATH_TO_EXAMPLE1__" }, { "influenced_var": "update_to_new_subject", "influencer_var": "another_case_holder", "element_name": "update_to_new_subject", - "comment": "flow into recordUpdates via influence over update_to_new_subject in run mode SystemModeWithSharing", + "comment": "flow into recordUpdates via influence over update_to_new_subject in run mode SystemModeWithoutSharing", "line_no": 102, "source_text": "\n update_to_new_subject\n \n 50\n 355\n \n confirm_delete\n \n another_case_holder\n ", - "flow_path": "__PATH_TO_EXAMPLE2__" + "flow_path": "__PATH_TO_EXAMPLE1__" } ] }