Skip to content

Commit

Permalink
Support for rigid type variables (#560)
Browse files Browse the repository at this point in the history
Until this commit, we had no notion of rigid type variables
in Gradualizer. All type variables were treated as flexible
and thus errors in polymorphic functions were not discovered.
Even obvious errors like

    -spec id(A) -> A.
    id(_X) -> banana.

were left unreported.

This commit adds a new syntactic form of `type()`:
`{rigid_var, anno(), atom()}`. All type variables mentioned
in a spec are convert to rigid_vars during typechecking
of the particular function. Rigid type variables have no
subtypes or supertypes (except for itself, top, none(),
and any()), and therefore we treat them as types we know
nothing about (as they can be anything; their type
determines the caller, not the callee).

I've also added some missing cases to `subst_ty/2`.

* Move the poly_2 test functions to should_fail

All these function coming from the paper "Bidirectional Typing
for Erlang" (N. Rajendrakumar, A. Bieniusa, 2021) require
higher-rank (at least rank-2) polymorphism. To be able to
have higher-ranked polymorphism, we would have to be able to
express inner quantifiers. We don't have that in the typespec
syntax, so the traditional way of interpreting type variables
in specs is to treat them all as quantified at the top-level.

Authors of the paper took a non-standard approach, to quote it:

    Type specifications in Erlang consider all type variables
    as universal and thus restrict the types to prefix or rank-1
    polymorphism.

    For allowing higher-rank polymorphism, we are interpreting
    the type specification differently, namely we are adding
    the quantifier to the type variable with the smallest scope.

    -spec poly_2(fun((A) -> A)) -> {boolean(), integer()}.

    For example, the type spec for function poly_2 above is
    interpreted as
        (∀a.(a) → a) → {boolean(), integer()}
    instead of
        ∀a.((a → a) → {boolean(), integer()})

I argue that this interpretation of type variables would
be confusing for users. Additionally, I think that there
is not much value in having higher-rank polymorphism in
a dynamically/gradually typed language. In Haskell, it
is useful for sure (even though one does not encounter
it very often) but this is because there is a strict
typechecker that doesn't accept anything you cannot type
well. Here, in Erlang, if you ever come to a situation
where you need higher-rank polymorphism (and I think it
is quite unlikely), you can always fallback to any().
  • Loading branch information
xxdavid authored Apr 18, 2024
1 parent 81385f6 commit ac139bc
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 40 deletions.
7 changes: 3 additions & 4 deletions src/absform.erl
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,14 @@ normalize_record_field({typed_record_field,
_Type} = Complete) ->
Complete.

-type bounded_fun() :: gradualizer_type:af_constrained_function_type().

%% @doc Turns all function types into bounded function types. Add default empty
%% constraints if missing.
-spec normalize_function_type_list(FunTypeList) -> FunTypeList when
FunTypeList :: gradualizer_type:af_function_type_list().
-spec normalize_function_type_list(gradualizer_type:af_function_type_list()) -> [bounded_fun()].
normalize_function_type_list(FunTypeList) ->
?assert_type(lists:map(fun normalize_function_type/1, FunTypeList), nonempty_list()).

-type bounded_fun() :: gradualizer_type:af_constrained_function_type().

-spec normalize_function_type(bounded_fun()) -> bounded_fun();
(gradualizer_type:af_fun_type()) -> bounded_fun().
normalize_function_type({type, _, 'bounded_fun', [_FunType, _FunConst]} = BoundedFun) ->
Expand Down
1 change: 1 addition & 0 deletions src/gradualizer_int.erl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ is_int_subtype(Ty1, Ty2) ->
upper_bound_more_or_eq(R2, R1).

%% Greatest lower bound of two integer types.
-spec int_type_glb(type(), type()) -> type().
int_type_glb(Ty1, Ty2) ->
{Lo1, Hi1} = int_type_to_range(Ty1),
{Lo2, Hi2} = int_type_to_range(Ty2),
Expand Down
15 changes: 15 additions & 0 deletions src/gradualizer_type.erl
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,15 @@
af_binary_op/1,
af_constrained_function_type/0,
af_constraint/0,
af_clause/0,
af_guard_seq/0,
af_guard/0,
af_field_name/0,
af_fun_type/0,
af_function_type_list/0,
af_record_field/1,
af_record_field_type/0,
gr_any_type/0,
af_singleton_integer_type/0,
af_string/0,
af_unary_op/1,
Expand Down Expand Up @@ -244,6 +248,7 @@
| af_tuple_type()
| af_type_union()
| af_type_variable()
| gr_rigid_type_variable()
| af_user_defined_type().

-type af_annotated_type() ::
Expand Down Expand Up @@ -308,6 +313,14 @@
-type gr_type_var() :: atom() | string().
-type af_type_variable() :: {'var', anno(), gr_type_var()}.

%% Gradualizer: `rigid_var' is used for type variables (instead of plain `var')
%% originating from specs of the currently checked function. They are rigid
%% in the sense that they are fixed but completely unknown from the perspective
%% of the function definition. We want to be able to differentiate between
%% flexible (`var') and rigid type variables, and therefore we add this new
%% syntactic form although they are syntactically the same.
-type gr_rigid_type_variable() :: {'rigid_var', anno(), atom()}.

-type af_user_defined_type() ::
{'user_type', anno(), type_name(), [abstract_type()]}.

Expand Down Expand Up @@ -338,6 +351,8 @@
[af_lit_atom('is_subtype') |
[af_type_variable() | abstract_type()]]}. % [IsSubtype, [V, T]]

-type gr_any_type() :: {'type', anno(), 'any', []}.

-type af_singleton_integer_type() :: af_integer()
| af_character()
| af_unary_op(af_singleton_integer_type())
Expand Down
58 changes: 50 additions & 8 deletions src/typechecker.erl
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,8 @@ compat_record_fields(_, _, _, _) ->

%% Returns a successful matching of two types. Convenience function for when
%% there were no type variables involved.
-spec ret(Seen) -> acc(Seen) when
Seen :: map() | type().
-spec ret(map()) -> acc(map());
(type()) -> acc(type()).
ret(Seen) ->
{Seen, constraints:empty()}.

Expand Down Expand Up @@ -1729,14 +1729,21 @@ free_vars({type, _, _, Args}, Vars) ->
free_vars(_, Vars) -> Vars.

-spec subst_ty(#{atom() | string() := type()}, [type()]) -> [type()];
(#{atom() | string() := type()}, [fun_ty()]) -> [fun_ty()];
(#{atom() | string() := type()}, type()) -> type().
subst_ty(Sub, Tys) when is_list(Tys) ->
[ subst_ty(Sub, Ty) || Ty <- Tys ];
subst_ty(Sub, Ty = {var, _, X}) ->
maps:get(X, Sub, Ty);
subst_ty(Sub, Ty = {rigid_var, _, X}) ->
maps:get(X, Sub, Ty);
subst_ty(Sub, {type, P, Name, Args}) ->
{type, P, Name, subst_ty(Sub, Args)};
subst_ty(Sub, {user_type, P, Name, Args}) ->
{user_type, P, Name, subst_ty(Sub, Args)};
subst_ty(Sub, {remote_type, P, [Mod, Fun, Args]}) ->
{remote_type, P, [Mod, Fun, subst_ty(Sub, Args)]};
subst_ty(Sub, {ann_type, P, [AnnoVar, Type]}) ->
{ann_type, P, [AnnoVar, subst_ty(Sub, Type)]};
subst_ty(_, Ty) -> Ty.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down Expand Up @@ -3745,6 +3752,8 @@ instantiate({var, _, TyVar}, Map) ->
Ty ->
{Ty, maps:new(), Map}
end;
instantiate(T = {rigid_var, _, _}, Map) ->
{T, maps:new(), Map};
instantiate(T = ?type(_), Map) ->
{T, maps:new(), Map};
instantiate({type, P, Ty, Args}, Map) ->
Expand Down Expand Up @@ -3791,6 +3800,37 @@ instantiate_inner([Ty | Tys], Map) ->
{[NewTy|NewTys], maps:merge(Vars, MoreVars), EvenNewerMap}.


%% Turns all type variables in a given type into rigid type variables.
%%
%% We want to use rigid type variables when typechecking a polymorphic
%% function because type variables used in its spec can be instantiated
%% to anything by the caller, so the polymorphic function cannot make
%% any assumptions about their type or value.
%%
%% Variables inside function constraints (e.g., `List' in the type fragment
%% `when List :: [term()]') are kept as they are, i.e., `var'.
-spec make_rigid_type_vars(type()) -> type();
([type()]) -> [type()].
make_rigid_type_vars(Tys) when is_list(Tys) ->
[ make_rigid_type_vars(Ty) || Ty <- Tys ];
make_rigid_type_vars({var, _, '_'} = T) ->
T;
make_rigid_type_vars({var, P, Var}) when is_atom(Var) ->
{rigid_var, P, Var};
make_rigid_type_vars({type, _, constraint, _Args} = T) ->
% do no descent into function constraints (bounds)
T;
make_rigid_type_vars({type, P, Tag, Args}) ->
{type, P, Tag, make_rigid_type_vars(Args)};
make_rigid_type_vars({user_type, P, Tag, Args}) ->
{user_type, P, Tag, make_rigid_type_vars(Args)};
make_rigid_type_vars({remote_type, P, [Mod, Fun, Args]}) ->
{remote_type, P, [Mod, Fun, make_rigid_type_vars(Args)]};
make_rigid_type_vars({ann_type, P, [AnnoVar, Type]}) ->
{ann_type, P, [AnnoVar, make_rigid_type_vars(Type)]};
make_rigid_type_vars(T) ->
T.

%% Infers (or at least propagates types from) fun/receive/try/case/if clauses.
-spec infer_clauses(env(), [gradualizer_type:abstract_clause()]) -> R when
R :: {type(), VarBinds :: env(), constraints:t()}.
Expand Down Expand Up @@ -3979,8 +4019,9 @@ check_clauses_intersection_throw_if_seen(ArgsTys, RefinedArgsTy, Clause, Seen, E
{type_error, ClauseError}
end.

-spec check_reachable_clauses(type(), Clauses, _Caps, [env()], Css, [type()], env()) -> R when
-spec check_reachable_clauses(type(), Clauses, Caps, [env()], Css, [type()], env()) -> R when
Clauses :: [gradualizer_type:abstract_clause()],
Caps :: capture_vars | bind_vars,
Css :: [constraints:t()],
R :: {[env()],
[constraints:t()],
Expand Down Expand Up @@ -4100,7 +4141,7 @@ check_clause(Env, ArgsTy, ResTy, C = {clause, P, Args, Guards, Block}, Caps) ->

%% Refine types by matching clause. MatchedTys are the types exhausted by
%% each pattern in the previous clause.
-spec refine_clause_arg_tys([type()], [type()], _Guards, env()) -> [type()].
-spec refine_clause_arg_tys([type()], [type()], gradualizer_type:af_guard_seq(), env()) -> [type()].
refine_clause_arg_tys(Tys, MatchedTys, [], Env) ->
Ty = type(tuple, Tys),
MatchedTy = type(tuple, MatchedTys),
Expand Down Expand Up @@ -4134,7 +4175,7 @@ refine_mismatch_using_guards(PatTys,
PatTys
end.

-spec do_refine_mismatch_using_guards(_Guards, [type()], _, _, env()) -> [type()].
-spec do_refine_mismatch_using_guards(gradualizer_type:af_guard() | [], [type()], _, _, env()) -> [type()].
do_refine_mismatch_using_guards([], PatTys, _, _, _) -> PatTys;
do_refine_mismatch_using_guards([{call, _, {atom, _, Fun}, Args = [{var, _, Var}]} | Tail],
PatTys, Pats, VEnv, Env) ->
Expand Down Expand Up @@ -4609,7 +4650,7 @@ no_guards({clause, _, _, Guards, _}) ->

%% Refines the types of bound variables using the assumption that a clause has
%% mismatched.
-spec refine_vars_by_mismatching_clause(_Clause, VEnv, env()) -> VEnv.
-spec refine_vars_by_mismatching_clause(gradualizer_type:af_clause(), venv(), env()) -> venv().
refine_vars_by_mismatching_clause({clause, _, Pats, Guards, _Block}, VEnv, Env) ->
PatternCantFail = are_patterns_matching_all_input(Pats, VEnv),
case Guards of
Expand Down Expand Up @@ -4771,7 +4812,8 @@ type_check_function(Env, {function, Anno, Name, NArgs, Clauses}) ->
case maps:find({Name, NArgs}, Env#env.fenv) of
{ok, FunTy} ->
NewEnv = Env#env{current_spec = FunTy},
FunTyNoPos = [ typelib:remove_pos(?assert_type(Ty, type())) || Ty <- FunTy ],
FunTyRigid = make_rigid_type_vars(FunTy),
FunTyNoPos = [ typelib:remove_pos(?assert_type(Ty, type())) || Ty <- FunTyRigid ],
Arity = clause_arity(hd(Clauses)),
case expect_fun_type(NewEnv, FunTyNoPos, Arity) of
{type_error, NotFunTy} ->
Expand Down
5 changes: 4 additions & 1 deletion src/typechecker.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

-record(clauses_controls, {exhaust}).

-record(env, {fenv = #{} :: map(),
-record(env, {fenv = #{} :: #{{atom(), arity()} =>
[gradualizer_type:af_constrained_function_type()]
| [gradualizer_type:gr_any_type()]
},
imported = #{} :: #{{atom(), arity()} => module()},
venv = #{} :: typechecker:venv(),
tenv :: gradualizer_lib:tenv(),
Expand Down
38 changes: 31 additions & 7 deletions src/typelib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,14 @@
'fun',
[{type, erl_anno:anno(), product, [type()]} |
type()]}.
-type printable_type() :: type() |
{type, erl_anno:anno(), bounded_fun,
-type bounded_fun() :: {type, erl_anno:anno(), bounded_fun,
[function_type() | [constraint()]]}.
-type printable_type() :: type() | bounded_fun().

%% @doc
%% Pretty-print a type represented as an Erlang abstract form.
%% @end
-spec pp_type(printable_type()) -> string();
([printable_type()]) -> string().
-spec pp_type(printable_type() | [printable_type()]) -> io_lib:chars().
pp_type(Types = [_|_]) ->
%% TODO: This is a workaround for the fact that a list is sometimes used in
%% place of a type. It typically represents a function type with multiple clauses.
Expand All @@ -53,7 +52,8 @@ pp_type({type, _, bounded_fun, [FunType, []]}) ->
pp_type(Type = {type, _, bounded_fun, _}) ->
%% erl_pp can't handle bounded_fun in type definitions
%% We invent our own syntax here, e.g. "fun((A) -> ok when A :: atom())"
Form = {attribute, erl_anno:new(0), spec, {{foo, 0}, [Type]}},
TypePp = type_for_erl_pp(Type),
Form = {attribute, erl_anno:new(0), spec, {{foo, 0}, [TypePp]}},
TypeDef = erl_pp:form(Form),
{match, [S]} = re:run(TypeDef, <<"-spec foo\\s*(.*)\\.\\n*$">>,
[{capture, all_but_first, list}, dotall]),
Expand All @@ -66,10 +66,13 @@ pp_type({var, _, TyVar}) ->
is_atom(TyVar) -> atom_to_list(TyVar);
is_list(TyVar) -> TyVar
end;
pp_type([]) ->
error({badarg, []});
pp_type(Type) ->
%% erl_pp can handle type definitions, so wrap Type in a type definition
%% and then take the type from that.
Form = {attribute, erl_anno:new(0), type, {t, Type, []}},
TypePp = type_for_erl_pp(Type),
Form = {attribute, erl_anno:new(0), type, {t, TypePp, []}},
TypeDef = erl_pp:form(Form),
{match, [S]} = re:run(TypeDef, <<"::\\s*(.*)\\.\\n*">>,
[{capture, all_but_first, list}, dotall]),
Expand All @@ -84,6 +87,21 @@ pp_type(Type) ->
% File -> S ++ " in " ++ File
%end.


%% Transform our `type()' into the standard `erl_parse:abstract_type()' that erl_pp accepts.
-spec type_for_erl_pp(type()) -> type();
(bounded_fun()) -> bounded_fun().
type_for_erl_pp({rigid_var, P, Var}) ->
{var, P, Var};
type_for_erl_pp({type, P, Tag, Args}) when is_list(Args) ->
{type, P, Tag, [type_for_erl_pp(Arg) || Arg <- Args]};
type_for_erl_pp({user_type, P, Tag, Args}) ->
{user_type, P, Tag, [type_for_erl_pp(Arg) || Arg <- Args]};
type_for_erl_pp({remote_type, P, [Mod, Fun, Args]}) ->
{remote_type, P, [Mod, Fun, [type_for_erl_pp(Arg) || Arg <- Args]]};
type_for_erl_pp(Type) ->
Type.

%% Looks up and prints the type M:N(P1, ..., Pn).
debug_type(M, N, P) ->
case gradualizer_db:get_type(M, N, P) of
Expand Down Expand Up @@ -114,7 +132,7 @@ remove_pos({type, _, constraint, [{atom, _, is_subtype}, Args]}) ->
L = erl_anno:new(0),
{type, L, constraint, [{atom, L, is_subtype}, lists:map(fun remove_pos/1, Args)]};
remove_pos({Type, _, Value})
when Type == atom; Type == integer; Type == char; Type == var ->
when Type == atom; Type == integer; Type == char; Type == var; Type == rigid_var ->
{Type, erl_anno:new(0), Value};
remove_pos({user_type, Anno, Name, Params}) when is_list(Params) ->
{user_type, anno_keep_only_filename(Anno), Name,
Expand Down Expand Up @@ -246,6 +264,11 @@ substitute_type_vars({var, L, Var}, TVars) ->
#{Var := Type} -> Type;
_ -> {var, L, Var}
end;
substitute_type_vars({rigid_var, L, Var}, TVars) when is_atom(Var) ->
case TVars of
#{Var := Type} -> Type;
_ -> {var, L, Var}
end;
substitute_type_vars(Other = {type, _, T, any}, _)
when T == tuple; T == map ->
Other;
Expand Down Expand Up @@ -317,6 +340,7 @@ reduce(Fun, _, Acc, {'type', _Anno, any} = Ty) -> Fun(Ty, Acc);
reduce(Fun, _, Acc, pos_inf = Ty) -> Fun(Ty, Acc);
reduce(Fun, _, Acc, neg_inf = Ty) -> Fun(Ty, Acc);
reduce(Fun, _, Acc, {var, _, _} = Ty) -> Fun(Ty, Acc);
reduce(Fun, _, Acc, {rigid_var, _, _} = Ty) -> Fun(Ty, Acc);
reduce(Fun, apply, Acc, Ty) ->
{NewTy, Acc1} = Fun(Ty, Acc),
reduce(Fun, recurse, Acc1, NewTy);
Expand Down
6 changes: 0 additions & 6 deletions test/known_problems/should_fail/rigid_type_variables_fail.erl

This file was deleted.

16 changes: 15 additions & 1 deletion test/should_fail/opaque_fail.erl
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
-module(opaque_fail).

-export([use_external/2]).
-export([use_external/2, update_without_opaque/0, add_to_opaque/0, return_opaque/0]).

-spec use_external(user_types:my_opaque(), integer() | undefined) -> integer().
use_external(I, undefined) -> I;
use_external(_, I) -> I.

-spec update_without_opaque() -> ok.
update_without_opaque() ->
_Val = user_types:update_opaque(3),
ok.

-spec add_to_opaque() -> ok.
add_to_opaque() ->
Val = user_types:new_opaque(),
Val + 1,
ok.

-spec return_opaque() -> user_types:my_opaque().
return_opaque() -> 3.
10 changes: 9 additions & 1 deletion test/should_fail/poly_fail.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
%% See "Bidirectional Typing for Erlang", N. Rajendrakumar, A. Bieniusa, 2021, Section 2. Examples.
-export([find1/0,
poly_2/1,
poly_fail/3]).
poly_2b/1,
poly_fail/3,
poly_fail_2/3]).

%% These examples don't come from the above paper.
-export([f/1,
Expand All @@ -25,9 +27,15 @@ find1() ->
-spec poly_2(fun((A) -> A)) -> {integer(), boolean()}.
poly_2(F) -> {F(42), F(false)}.

-spec poly_2b(fun((A) -> A)) -> {integer(), integer()}.
poly_2b(F) -> {F(42), F(84)}.

-spec poly_fail(fun((A) -> A), boolean(), integer()) -> {boolean(), integer()}.
poly_fail(F, B, I) -> {F(I), F(B)}.

-spec poly_fail_2(fun((A) -> A), boolean(), boolean()) -> {boolean(), boolean()}.
poly_fail_2(F, B1, B2) -> {F(B2), F(B1)}.

-spec f([integer(), ...]) -> atom().
f(L) ->
hd(L).
Expand Down
Loading

0 comments on commit ac139bc

Please sign in to comment.