Skip to content

Commit

Permalink
[mypyc] Track definedness of native int attributes using a bitmap (#1…
Browse files Browse the repository at this point in the history
…3532)

Since native ints can't support a reserved value to mark an undefined
attribute, use a separate bitmap attribute (or attributes) to store
information about defined/undefined attributes with native int types.

The bitmap is only defined if we can't infer that an attribute is
always defined, and it's only needed for native int attributes.

We only access the bitmap if the runtime value of an attribute is
equal to the (overlapping) error value. This way the performance cost
of the bitmap is pretty low on average.

I'll add support for traits in a follow-up PR to keep this PR simple.

Work on mypyc/mypyc#837.
  • Loading branch information
JukkaL committed Aug 31, 2022
1 parent 2857736 commit 9393fa1
Show file tree
Hide file tree
Showing 10 changed files with 503 additions and 17 deletions.
21 changes: 20 additions & 1 deletion mypyc/analysis/attrdefined.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def foo(self) -> int:
SetMem,
Unreachable,
)
from mypyc.ir.rtypes import RInstance
from mypyc.ir.rtypes import RInstance, is_fixed_width_rtype

# If True, print out all always-defined attributes of native classes (to aid
# debugging and testing)
Expand Down Expand Up @@ -120,6 +120,11 @@ def analyze_always_defined_attrs(class_irs: list[ClassIR]) -> None:
for cl in class_irs:
update_always_defined_attrs_using_subclasses(cl, seen)

# Final pass: detect attributes that need to use a bitmap to track definedness
seen = set()
for cl in class_irs:
detect_undefined_bitmap(cl, seen)


def analyze_always_defined_attrs_in_class(cl: ClassIR, seen: set[ClassIR]) -> None:
if cl in seen:
Expand Down Expand Up @@ -407,3 +412,17 @@ def update_always_defined_attrs_using_subclasses(cl: ClassIR, seen: set[ClassIR]
removed.add(attr)
cl._always_initialized_attrs -= removed
seen.add(cl)


def detect_undefined_bitmap(cl: ClassIR, seen: Set[ClassIR]) -> None:
if cl in seen:
return
seen.add(cl)
for base in cl.base_mro[1:]:
detect_undefined_bitmap(cl, seen)

if len(cl.base_mro) > 1:
cl.bitmap_attrs.extend(cl.base_mro[1].bitmap_attrs)
for n, t in cl.attributes.items():
if is_fixed_width_rtype(t) and not cl.is_always_defined(n):
cl.bitmap_attrs.append(n)
71 changes: 66 additions & 5 deletions mypyc/codegen/emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from mypyc.codegen.literals import Literals
from mypyc.common import (
ATTR_BITMAP_BITS,
ATTR_PREFIX,
FAST_ISINSTANCE_MAX_SUBCLASSES,
NATIVE_PREFIX,
Expand Down Expand Up @@ -329,21 +330,81 @@ def tuple_c_declaration(self, rtuple: RTuple) -> list[str]:

return result

def bitmap_field(self, index: int) -> str:
"""Return C field name used for attribute bitmap."""
n = index // ATTR_BITMAP_BITS
if n == 0:
return "bitmap"
return f"bitmap{n + 1}"

def attr_bitmap_expr(self, obj: str, cl: ClassIR, index: int) -> str:
"""Return reference to the attribute definedness bitmap."""
cast = f"({cl.struct_name(self.names)} *)"
attr = self.bitmap_field(index)
return f"({cast}{obj})->{attr}"

def emit_attr_bitmap_set(
self, value: str, obj: str, rtype: RType, cl: ClassIR, attr: str
) -> None:
"""Mark an attribute as defined in the attribute bitmap.
Assumes that the attribute is tracked in the bitmap (only some attributes
use the bitmap). If 'value' is not equal to the error value, do nothing.
"""
self._emit_attr_bitmap_update(value, obj, rtype, cl, attr, clear=False)

def emit_attr_bitmap_clear(self, obj: str, rtype: RType, cl: ClassIR, attr: str) -> None:
"""Mark an attribute as undefined in the attribute bitmap.
Unlike emit_attr_bitmap_set, clear unconditionally.
"""
self._emit_attr_bitmap_update("", obj, rtype, cl, attr, clear=True)

def _emit_attr_bitmap_update(
self, value: str, obj: str, rtype: RType, cl: ClassIR, attr: str, clear: bool
) -> None:
if value:
self.emit_line(f"if (unlikely({value} == {self.c_undefined_value(rtype)})) {{")
index = cl.bitmap_attrs.index(attr)
mask = 1 << (index & (ATTR_BITMAP_BITS - 1))
bitmap = self.attr_bitmap_expr(obj, cl, index)
if clear:
self.emit_line(f"{bitmap} &= ~{mask};")
else:
self.emit_line(f"{bitmap} |= {mask};")
if value:
self.emit_line("}")

def use_vectorcall(self) -> bool:
return use_vectorcall(self.capi_version)

def emit_undefined_attr_check(
self, rtype: RType, attr_expr: str, compare: str, unlikely: bool = False
self,
rtype: RType,
attr_expr: str,
compare: str,
obj: str,
attr: str,
cl: ClassIR,
*,
unlikely: bool = False,
) -> None:
if isinstance(rtype, RTuple):
check = "({})".format(
check = "{}".format(
self.tuple_undefined_check_cond(rtype, attr_expr, self.c_undefined_value, compare)
)
else:
check = f"({attr_expr} {compare} {self.c_undefined_value(rtype)})"
undefined = self.c_undefined_value(rtype)
check = f"{attr_expr} {compare} {undefined}"
if unlikely:
check = f"(unlikely{check})"
self.emit_line(f"if {check} {{")
check = f"unlikely({check})"
if is_fixed_width_rtype(rtype):
index = cl.bitmap_attrs.index(attr)
bit = 1 << (index & (ATTR_BITMAP_BITS - 1))
attr = self.bitmap_field(index)
obj_expr = f"({cl.struct_name(self.names)} *){obj}"
check = f"{check} && !(({obj_expr})->{attr} & {bit})"
self.emit_line(f"if ({check}) {{")

def tuple_undefined_check_cond(
self,
Expand Down
32 changes: 28 additions & 4 deletions mypyc/codegen/emitclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,17 @@
generate_richcompare_wrapper,
generate_set_del_item_wrapper,
)
from mypyc.common import NATIVE_PREFIX, PREFIX, REG_PREFIX, use_fastcall
from mypyc.common import (
ATTR_BITMAP_BITS,
ATTR_BITMAP_TYPE,
NATIVE_PREFIX,
PREFIX,
REG_PREFIX,
use_fastcall,
)
from mypyc.ir.class_ir import ClassIR, VTableEntries
from mypyc.ir.func_ir import FUNC_CLASSMETHOD, FUNC_STATICMETHOD, FuncDecl, FuncIR
from mypyc.ir.rtypes import RTuple, RType, object_rprimitive
from mypyc.ir.rtypes import RTuple, RType, is_fixed_width_rtype, object_rprimitive
from mypyc.namegen import NameGenerator
from mypyc.sametype import is_same_type

Expand Down Expand Up @@ -367,8 +374,17 @@ def generate_object_struct(cl: ClassIR, emitter: Emitter) -> None:
lines += ["typedef struct {", "PyObject_HEAD", "CPyVTableItem *vtable;"]
if cl.has_method("__call__") and emitter.use_vectorcall():
lines.append("vectorcallfunc vectorcall;")
bitmap_attrs = []
for base in reversed(cl.base_mro):
if not base.is_trait:
if base.bitmap_attrs:
# Do we need another attribute bitmap field?
if emitter.bitmap_field(len(base.bitmap_attrs) - 1) not in bitmap_attrs:
for i in range(0, len(base.bitmap_attrs), ATTR_BITMAP_BITS):
attr = emitter.bitmap_field(i)
if attr not in bitmap_attrs:
lines.append(f"{ATTR_BITMAP_TYPE} {attr};")
bitmap_attrs.append(attr)
for attr, rtype in base.attributes.items():
if (attr, rtype) not in seen_attrs:
lines.append(f"{emitter.ctype_spaced(rtype)}{emitter.attr(attr)};")
Expand Down Expand Up @@ -546,6 +562,9 @@ def generate_setup_for_class(
emitter.emit_line("}")
else:
emitter.emit_line(f"self->vtable = {vtable_name};")
for i in range(0, len(cl.bitmap_attrs), ATTR_BITMAP_BITS):
field = emitter.bitmap_field(i)
emitter.emit_line(f"self->{field} = 0;")

if cl.has_method("__call__") and emitter.use_vectorcall():
name = cl.method_decl("__call__").cname(emitter.names)
Expand Down Expand Up @@ -887,7 +906,7 @@ def generate_getter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N
always_defined = cl.is_always_defined(attr) and not rtype.is_refcounted

if not always_defined:
emitter.emit_undefined_attr_check(rtype, attr_expr, "==", unlikely=True)
emitter.emit_undefined_attr_check(rtype, attr_expr, "==", "self", attr, cl, unlikely=True)
emitter.emit_line("PyErr_SetString(PyExc_AttributeError,")
emitter.emit_line(f' "attribute {repr(attr)} of {repr(cl.name)} undefined");')
emitter.emit_line("return NULL;")
Expand Down Expand Up @@ -926,7 +945,7 @@ def generate_setter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N
if rtype.is_refcounted:
attr_expr = f"self->{attr_field}"
if not always_defined:
emitter.emit_undefined_attr_check(rtype, attr_expr, "!=")
emitter.emit_undefined_attr_check(rtype, attr_expr, "!=", "self", attr, cl)
emitter.emit_dec_ref(f"self->{attr_field}", rtype)
if not always_defined:
emitter.emit_line("}")
Expand All @@ -943,9 +962,14 @@ def generate_setter(cl: ClassIR, attr: str, rtype: RType, emitter: Emitter) -> N
emitter.emit_lines("if (!tmp)", " return -1;")
emitter.emit_inc_ref("tmp", rtype)
emitter.emit_line(f"self->{attr_field} = tmp;")
if is_fixed_width_rtype(rtype) and not always_defined:
emitter.emit_attr_bitmap_set("tmp", "self", rtype, cl, attr)

if deletable:
emitter.emit_line("} else")
emitter.emit_line(f" self->{attr_field} = {emitter.c_undefined_value(rtype)};")
if is_fixed_width_rtype(rtype):
emitter.emit_attr_bitmap_clear("self", rtype, cl, attr)
emitter.emit_line("return 0;")
emitter.emit_line("}")

Expand Down
14 changes: 12 additions & 2 deletions mypyc/codegen/emitfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
RStruct,
RTuple,
RType,
is_fixed_width_rtype,
is_int32_rprimitive,
is_int64_rprimitive,
is_int_rprimitive,
Expand Down Expand Up @@ -353,7 +354,9 @@ def visit_get_attr(self, op: GetAttr) -> None:
always_defined = cl.is_always_defined(op.attr)
merged_branch = None
if not always_defined:
self.emitter.emit_undefined_attr_check(attr_rtype, dest, "==", unlikely=True)
self.emitter.emit_undefined_attr_check(
attr_rtype, dest, "==", obj, op.attr, cl, unlikely=True
)
branch = self.next_branch()
if branch is not None:
if (
Expand Down Expand Up @@ -433,10 +436,17 @@ def visit_set_attr(self, op: SetAttr) -> None:
# previously undefined), so decref the old value.
always_defined = cl.is_always_defined(op.attr)
if not always_defined:
self.emitter.emit_undefined_attr_check(attr_rtype, attr_expr, "!=")
self.emitter.emit_undefined_attr_check(
attr_rtype, attr_expr, "!=", obj, op.attr, cl
)
self.emitter.emit_dec_ref(attr_expr, attr_rtype)
if not always_defined:
self.emitter.emit_line("}")
elif is_fixed_width_rtype(attr_rtype) and not cl.is_always_defined(op.attr):
# If there is overlap with the error value, update bitmap to mark
# attribute as defined.
self.emitter.emit_attr_bitmap_set(src, obj, attr_rtype, cl, op.attr)

# This steals the reference to src, so we don't need to increment the arg
self.emitter.emit_line(f"{attr_expr} = {src};")
if op.error_kind == ERR_FALSE:
Expand Down
5 changes: 5 additions & 0 deletions mypyc/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@
MAX_LITERAL_SHORT_INT: Final = sys.maxsize >> 1 if not IS_MIXED_32_64_BIT_BUILD else 2**30 - 1
MIN_LITERAL_SHORT_INT: Final = -MAX_LITERAL_SHORT_INT - 1

# Decription of the C type used to track definedness of attributes
# that have types with overlapping error values
ATTR_BITMAP_TYPE: Final = "uint32_t"
ATTR_BITMAP_BITS: Final = 32

# Runtime C library files
RUNTIME_C_FILES: Final = [
"init.c",
Expand Down
9 changes: 8 additions & 1 deletion mypyc/ir/class_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def __init__(
self.builtin_base: str | None = None
# Default empty constructor
self.ctor = FuncDecl(name, None, module_name, FuncSignature([], RInstance(self)))

# Attributes defined in the class (not inherited)
self.attributes: dict[str, RType] = {}
# Deletable attributes
self.deletable: list[str] = []
Expand Down Expand Up @@ -184,6 +184,13 @@ def __init__(
# If True, __init__ can make 'self' visible to unanalyzed/arbitrary code
self.init_self_leak = False

# Definedness of these attributes is backed by a bitmap. Index in the list
# indicates the bit number. Includes inherited attributes. We need the
# bitmap for types such as native ints that can't have a dedicated error
# value that doesn't overlap a valid value. The bitmap is used if the
# value of an attribute is the same as the error value.
self.bitmap_attrs: List[str] = []

def __repr__(self) -> str:
return (
"ClassIR("
Expand Down
2 changes: 1 addition & 1 deletion mypyc/ir/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ def __init__(self, obj: Value, attr: str, line: int, *, borrow: bool = False) ->
attr_type = obj.type.attr_type(attr)
self.type = attr_type
if is_fixed_width_rtype(attr_type):
self.error_kind = ERR_NEVER
self.error_kind = ERR_MAGIC_OVERLAPPING
self.is_borrowed = borrow and attr_type.is_refcounted

def sources(self) -> list[Value]:
Expand Down
6 changes: 5 additions & 1 deletion mypyc/irbuild/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ def build_ir(
options: CompilerOptions,
errors: Errors,
) -> ModuleIRs:
"""Build IR for a set of modules that have been type-checked by mypy."""
"""Build basic IR for a set of modules that have been type-checked by mypy.
The returned IR is not complete and requires additional
transformations, such as the insertion of refcount handling.
"""

build_type_map(mapper, modules, graph, types, options, errors)
singledispatch_info = find_singledispatch_register_impls(modules, errors)
Expand Down
Loading

0 comments on commit 9393fa1

Please sign in to comment.