Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[hlopt] cache based on opcode array #11797

Open
wants to merge 7 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/generators/genhl.ml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ type context = {
cfunctions : fundecl DynArray.t;
cconstants : (constval, (global * int array)) lookup;
optimize : bool;
opt_cache : bool;
w_null_compare : bool;
overrides : (string * path, bool) Hashtbl.t;
defined_funs : (int,unit) Hashtbl.t;
Expand Down Expand Up @@ -3434,7 +3435,7 @@ and make_fun ?gen_content ctx name fidx f cthis cparent =
Hashtbl.add ctx.defined_funs fidx ();
let f = if ctx.optimize && (gen_content = None || name <> ("","")) then begin
let t = Timer.timer ["generate";"hl";"opt"] in
let f = Hlopt.optimize ctx.dump_out (DynArray.get ctx.cstrings.arr) hlf f in
let f = Hlopt.optimize ctx.dump_out ctx.opt_cache (DynArray.get ctx.cstrings.arr) hlf f in
t();
f
end else
Expand Down Expand Up @@ -4127,6 +4128,7 @@ let create_context com dump =
let ctx = {
com = com;
optimize = not (Common.raw_defined com "hl_no_opt");
opt_cache = not (Common.raw_defined com "hl_no_opt_cache");
w_null_compare = Common.raw_defined com "hl_w_null_compare";
dump_out = if dump then Some (IO.output_channel (open_out_bin "dump/hlopt.txt")) else None;
m = method_context 0 HVoid null_capture false;
Expand Down
50 changes: 42 additions & 8 deletions src/generators/hlopt.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,30 @@ let _optimize (f:fundecl) =
r_reg_moved = reg_moved;
}

let same_op_except_index op1 op2 =
match op1, op2 with
| OInt (r1,_), OInt (r2, _) -> r1 = r2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this and other lines here, I don't understand why OInt(1, 1) and OInt(1, 2) should be considered the same op.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's OInt reg * int index, what only matters in optimized code is the used register / control flow, and not the index in global int table (e.g. in two different run, the same int 55 can have index 10 or 35). They are "fixed" by code related to c_remap_indexes.

I should double check if the replacement is always good, I'm trying to also remap field index but there are some errors x(

Copy link
Contributor Author

@yuxiaomao yuxiaomao Oct 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same_op is confusing. I should probably rename the function but I don't have a good idea. Maybe same_op_except_index

| OFloat (r1,_), OFloat (r2,_) -> r1 = r2
| OBytes (r1,_), OBytes (r2,_) -> r1 = r2
| OString (r1,_), OString (r2,_) -> r1 = r2
| OCall0 (r1,_), OCall0 (r2,_) -> r1 = r2
| OCall1 (r1,_,a1), OCall1 (r2,_,a2) -> r1 = r2 && a1 = a2
| OCall2 (r1,_,a1,b1), OCall2 (r2,_,a2,b2) -> r1 = r2 && a1 = a2 && b1 = b2
| OCall3 (r1,_,a1,b1,c1), OCall3 (r2,_,a2,b2,c2) -> r1 = r2 && a1 = a2 && b1 = b2 && c1 = c2
| OCall4 (r1,_,a1,b1,c1,d1), OCall4 (r2,_,a2,b2,c2,d2) -> r1 = r2 && a1 = a2 && b1 = b2 && c1 = c2 && d1 = d2
| OCallN (r1,_,rl1), OCallN (r2,_,rl2) -> r1 = r2 && rl1 = rl2
| OStaticClosure (r1,_), OStaticClosure (r2,_) -> r1 = r2
| OInstanceClosure (r1,_,v1), OInstanceClosure (r2,_,v2) -> r1 = r2 && v1 = v2
| OGetGlobal (r1,_), OGetGlobal (r2,_) -> r1 = r2
| OSetGlobal (_,r1), OSetGlobal (_,r2) -> r1 = r2
| ODynGet (r1,o1,_), ODynGet (r2,o2,_) -> r1 = r2 && o1 = o2
| ODynSet (o1,_,v1), ODynSet (o2,_,v2) -> o1 = o2 && v1 = v2
| OType (r1,_), OType (r2,_) -> r1 = r2
| _ -> op1 = op2

type cache_elt = {
c_old_code : opcode array;
c_old_fnargs : int;
c_code : opcode array;
c_rctx : rctx;
c_remap_indexes : int array;
Expand All @@ -1059,13 +1082,21 @@ type cache_elt = {
let opt_cache = ref PMap.empty
let used_mark = ref 0

let optimize dump get_str (f:fundecl) (hxf:Type.tfunc) =
let old_code = match dump with None -> f.code | Some _ -> Array.copy f.code in
let optimize dump usecache get_str (f:fundecl) (hxf:Type.tfunc) =
let nargs f = (match f.ftype with HFun (args,_) -> List.length args | _ -> Globals.die "" __LOC__) in
let sign = if f.fpath <> ("","") then fundecl_name f else (Printf.sprintf "%s:%d" hxf.tf_expr.epos.pfile hxf.tf_expr.epos.pmin) in
try
let c = PMap.find hxf (!opt_cache) in
if not usecache then raise Not_found;
let c = PMap.find sign (!opt_cache) in
if Array.length f.code <> Array.length c.c_code then raise Not_found;
if Array.length f.regs <> Array.length c.c_rctx.r_reg_map then raise Not_found;
if nargs f <> c.c_old_fnargs then raise Not_found;
Array.iteri (fun i op1 ->
let op2 = Array.unsafe_get f.code i in
if not (same_op_except_index op1 op2) then raise Not_found;
) c.c_old_code;
let code = if c.c_last_used = !used_mark then Array.copy c.c_code else c.c_code in
c.c_last_used <- !used_mark;
if Array.length f.code <> Array.length c.c_code then Globals.die "" __LOC__;
let code = c.c_code in
Array.iter (fun i ->
let op = (match Array.unsafe_get code i, Array.unsafe_get f.code i with
| OInt (r,_), OInt (_,idx) -> OInt (r,idx)
Expand All @@ -1088,8 +1119,9 @@ let optimize dump get_str (f:fundecl) (hxf:Type.tfunc) =
| _ -> Globals.die "" __LOC__) in
Array.unsafe_set code i op
) c.c_remap_indexes;
remap_fun c.c_rctx { f with code = code } dump get_str old_code
remap_fun c.c_rctx { f with code = code } dump get_str f.code
with Not_found ->
let old_code = match dump, usecache with None, true | Some _, _ -> Array.copy f.code | _ -> f.code in
let rctx = _optimize f in
let old_ops = f.code in
let fopt = remap_fun rctx f dump get_str old_code in
Expand All @@ -1109,12 +1141,14 @@ let optimize dump get_str (f:fundecl) (hxf:Type.tfunc) =
DynArray.add idxs i
| _ -> ()
) old_ops;
(*opt_cache := PMap.add hxf {
if usecache then opt_cache := PMap.add sign {
c_old_code = old_code;
c_old_fnargs = nargs f;
c_code = old_ops;
c_rctx = rctx;
c_last_used = !used_mark;
c_remap_indexes = DynArray.to_array idxs;
} (!opt_cache);*)
} (!opt_cache);
fopt

let clean_cache() =
Expand Down
Loading