Skip to content

Commit

Permalink
[inferpython][pulse] avoid dereference after field access
Browse files Browse the repository at this point in the history
Summary: currently all frontends use a C-like memory encoding inside Pulse. We experiment a new one where each field points directly to its content instead of systematiccaly using an extra dereference. This is possible because, contrary to C, a langage like Python never exposes the **address** of a field.

Reviewed By: ngorogiannis

Differential Revision:
D67077638

Privacy Context Container: L1208441

fbshipit-source-id: 1f672299b4720e1dbabfcadc656aa8ef307a604c
  • Loading branch information
davidpichardie authored and facebook-github-bot committed Dec 12, 2024
1 parent 8c67e38 commit 13b983b
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 24 deletions.
22 changes: 19 additions & 3 deletions infer/src/pulse/Pulse.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,15 @@ module PulseTransferFunctions = struct
([astate], path, astate_n)
| ContinueProgram astate -> (
match instr with
| Load {id= lhs_id; e= Lfield _ as rhs_exp; loc} when Language.curr_language_is Python ->
(* note: Python frontend only uses load during closure calls, for captured arguments *)
let astates =
(let++ astate, rhs_addr = PulseOperations.eval path Read loc rhs_exp astate in
PulseOperations.write_load_id lhs_id (ValueOrigin.unknown rhs_addr) astate )
|> SatUnsat.to_list
|> PulseReport.report_results analysis_data loc
in
(astates, path, astate_n)
| Load {id= lhs_id; e= rhs_exp; loc; typ} ->
(* [lhs_id := *rhs_exp] *)
let model_opt = PulseLoadInstrModels.dispatch ~load:rhs_exp in
Expand All @@ -1327,7 +1336,7 @@ module PulseTransferFunctions = struct
in
let rhs_addr = ValueOrigin.value rhs_vo in
and_is_int_if_integer_type typ rhs_addr astate
>>|| PulseOperations.hack_python_propagates_type_on_load tenv path loc rhs_exp rhs_addr
>>|| PulseOperations.hack_propagates_type_on_load tenv path loc rhs_exp rhs_addr
>>|| PulseOperations.add_static_type_objc_class tenv typ rhs_addr loc
>>|| PulseOperations.write_load_id lhs_id rhs_vo )
|> SatUnsat.to_list
Expand Down Expand Up @@ -1404,8 +1413,15 @@ module PulseTransferFunctions = struct
then AbductiveDomain.apply_unknown_effect hist lhs_addr astate
else astate
in
let=+ astate =
PulseOperations.write_deref path loc ~ref:lhs_addr_hist ~obj:(rhs_addr, hist) astate
let+* astate =
match lhs_exp with
| Lfield ({exp}, fieldname, _) when Language.curr_language_is Python ->
let+* astate, ref = PulseOperations.eval path Read loc exp astate in
PulseOperations.write_field path loc ~ref fieldname ~obj:(rhs_addr, hist) astate
| _ ->
Sat
(PulseOperations.write_deref path loc ~ref:lhs_addr_hist ~obj:(rhs_addr, hist)
astate )
in
let* astate =
PulseRetainCycleChecker.check_retain_cycles_store tenv loc (rhs_addr, hist) astate
Expand Down
17 changes: 10 additions & 7 deletions infer/src/pulse/PulseModelsDSL.ml
Original file line number Diff line number Diff line change
Expand Up @@ -358,11 +358,13 @@ module Syntax = struct
PulseOperations.eval_deref path location exp |> exec_partial_operation


let load_access ?(no_access = false) aval access : aval model_monad =
let load_access ?(no_access = false) ?(deref = true) aval access : aval model_monad =
let* {path; location} = get_data in
let mode = if no_access then NoAccess else Read in
PulseOperations.eval_deref_access path mode location aval access
>> sat |> exec_partial_operation
let eval_access =
if deref then PulseOperations.eval_deref_access else PulseOperations.eval_access
in
eval_access path mode location aval access >> sat |> exec_partial_operation


let load x = access Read x Dereference
Expand Down Expand Up @@ -573,9 +575,10 @@ module Syntax = struct
PulseOperations.write_field path location ~ref ~obj field >> sat |> exec_partial_command


let store_field ~ref field obj : unit model_monad =
let store_field ?(deref = true) ~ref field obj : unit model_monad =
let* {path; location} = get_data in
PulseOperations.write_deref_field path location ~ref ~obj field >> sat |> exec_partial_command
let write = if deref then PulseOperations.write_deref_field else PulseOperations.write_field in
write path location ~ref ~obj field >> sat |> exec_partial_command


let store ~ref obj : unit model_monad =
Expand Down Expand Up @@ -655,7 +658,7 @@ module Syntax = struct
unreachable


let constructor type_name fields : aval model_monad =
let constructor ?(deref = true) type_name fields : aval model_monad =
let exp =
Exp.Sizeof
{ typ= Typ.mk_struct type_name
Expand All @@ -668,7 +671,7 @@ module Syntax = struct
let* () =
list_iter fields ~f:(fun (fieldname, obj) ->
let field = Fieldname.make type_name fieldname in
store_field ~ref:new_obj field obj )
store_field ~deref ~ref:new_obj field obj )
in
ret new_obj

Expand Down
6 changes: 3 additions & 3 deletions infer/src/pulse/PulseModelsDSL.mli
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ module Syntax : sig

val access : access_mode -> aval -> Access.t -> aval model_monad

val load_access : ?no_access:bool -> aval -> Access.t -> aval model_monad
val load_access : ?no_access:bool -> ?deref:bool -> aval -> Access.t -> aval model_monad

val load : aval -> aval model_monad
(** read the Dereference access from the value *)
Expand All @@ -164,7 +164,7 @@ module Syntax : sig

val new_ : Exp.t -> aval model_monad

val constructor : Typ.Name.t -> (string * aval) list -> aval model_monad
val constructor : ?deref:bool -> Typ.Name.t -> (string * aval) list -> aval model_monad
(** [constructor_dsl typ_name fields] builds a fresh object of type [typ_name] and initializes its
fields using list [fields] *)

Expand All @@ -177,7 +177,7 @@ module Syntax : sig

val write_field : ref:aval -> Fieldname.t -> aval -> unit model_monad

val store_field : ref:aval -> Fieldname.t -> aval -> unit model_monad
val store_field : ?deref:bool -> ref:aval -> Fieldname.t -> aval -> unit model_monad

val store : ref:aval -> aval -> unit model_monad

Expand Down
16 changes: 8 additions & 8 deletions infer/src/pulse/PulseModelsPython.ml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ module Dict = struct
if not (Int.equal (List.length args) (List.length keys)) then
L.die InternalError "Dict.make expects two list of same length@\n" ;
let bindings = List.zip_exn keys args in
let* dict = constructor dict_tname bindings in
let* dict = constructor ~deref:false dict_tname bindings in
ret dict


Expand All @@ -64,15 +64,15 @@ module Dict = struct
let get dict key : DSL.aval DSL.model_monad =
let open DSL.Syntax in
let* field = sil_fieldname_from_string_value_exn dict_tname key in
let* load_res = load_access dict (FieldAccess field) in
let* load_res = load_access ~deref:false dict (FieldAccess field) in
let* () = propagate_static_type_on_load dict key load_res in
ret load_res


let set dict key value : unit DSL.model_monad =
let open DSL.Syntax in
let* field = sil_fieldname_from_string_value_exn dict_tname key in
let* () = store_field ~ref:dict field value in
let* () = store_field ~deref:false ~ref:dict field value in
ret ()
end

Expand All @@ -86,7 +86,7 @@ module Tuple = struct
let field = str_field_of_int i in
(field, aval) )
in
let* dict = constructor tuple_tname bindings in
let* dict = constructor ~deref:false tuple_tname bindings in
ret dict


Expand All @@ -98,13 +98,13 @@ module Tuple = struct
fresh ()
| Some i ->
let field = Fieldname.make tuple_tname (str_field_of_int i) in
load_access tuple (FieldAccess field)
load_access ~deref:false tuple (FieldAccess field)
end

module PyModule = struct
let make name : DSL.aval DSL.model_monad =
let open DSL.Syntax in
constructor (module_tname name) []
constructor ~deref:false (module_tname name) []
end

let build_tuple args : model =
Expand Down Expand Up @@ -299,7 +299,7 @@ let make_int arg : model =
let* res =
match opt_int with
| None ->
constructor int_tname []
constructor ~deref:false int_tname []
| Some i ->
let* res = int i in
let* () = and_dynamic_type_is res (Typ.mk_struct int_tname) in
Expand All @@ -312,7 +312,7 @@ let make_none : model =
let open DSL.Syntax in
start_model
@@ fun () ->
let* none = constructor none_tname [] in
let* none = constructor ~deref:false none_tname [] in
assign_ret none


Expand Down
2 changes: 1 addition & 1 deletion infer/src/pulse/PulseOperations.ml
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ let havoc_deref_field path location addr_trace field trace_obj astate =
astate


let hack_python_propagates_type_on_load tenv path loc rhs_exp addr astate =
let hack_propagates_type_on_load tenv path loc rhs_exp addr astate =
( if Language.curr_language_is Hack then
(* The Hack frontend do not propagate types from declarations to usage,
so we redo part of the work ourself *)
Expand Down
2 changes: 1 addition & 1 deletion infer/src/pulse/PulseOperations.mli
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ val eval_deref_access :
val eval_proc_name :
PathContext.t -> Location.t -> Exp.t -> t -> (t * Procname.t option) AccessResult.t SatUnsat.t

val hack_python_propagates_type_on_load :
val hack_propagates_type_on_load :
Tenv.t -> PathContext.t -> Location.t -> Exp.t -> AbstractValue.t -> t -> t

val add_static_type_objc_class : Tenv.t -> Typ.t -> AbstractValue.t -> Location.t -> t -> t
Expand Down
2 changes: 1 addition & 1 deletion infer/tests/codetoanalyze/python/pulse/issues.exp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
async_global.py, async_global.__module_body__, 24, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,parameter `globals` of async_global.__module_body__,allocated by call to `new` (modelled),assigned,allocated by call to `new` (modelled),assigned,in call to `closure:async_global:0.call`,parameter `__this` of closure:async_global:0.call,in call to `async_global.set_unwaited_global`,parameter `globals` of async_global.set_unwaited_global,return from call to `async_global.set_unwaited_global`,return from call to `closure:async_global:0.call`,in call to `closure:async_global:1.call`,parameter `__this` of closure:async_global:1.call,in call to `async_global.await_global`,parameter `globals` of async_global.await_global,return from call to `async_global.await_global`,return from call to `closure:async_global:1.call`,when calling `closure:async_global:0.call` here,parameter `__this` of closure:async_global:0.call,when calling `async_global.set_unwaited_global` here,allocated by async call here,awaitable becomes unreachable here]
async_global.py, async_global.__module_body__, 24, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,parameter `globals` of async_global.__module_body__,assigned,when calling `closure:async_global:0.call` here,parameter `__this` of closure:async_global:0.call,when calling `async_global.set_unwaited_global` here,allocated by async call here,awaitable becomes unreachable here]
async_import_simple.py, async_import_simple.with_import_bad, 1, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here]
async_import_simple.py, async_import_simple.with_from_import_bad, 1, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here]
async_import_with_package.py, async_import_with_package.bad1, 1, PULSE_UNAWAITED_AWAITABLE, no_bucket, ERROR, [allocation part of the trace starts here,allocated by async call here,awaitable becomes unreachable here]
Expand Down

0 comments on commit 13b983b

Please sign in to comment.