Skip to content

Commit

Permalink
Rewrite unions such that all members are of the same size
Browse files Browse the repository at this point in the history
In the C front-end, rewrite union components with types smaller than the
union's size to anonymous structs. Each such struct contains the
original union component plus padding. Assignments to union members thus
always assign all bytes that make up the object representation of a
union. The use of an anonymous struct ensures that member accesses can
still be resolved.

As this is a change in the semantics of goto programs, the goto binary
version is incremented. rewrite_union is no longer necessary, but bugs
(hidden by rewrite_union) in handling endianness in simplify_expr_member
and convert_member surfaced and had to be fixed.
  • Loading branch information
tautschnig committed Jan 12, 2021
1 parent 67b36f0 commit 60b3d32
Show file tree
Hide file tree
Showing 21 changed files with 320 additions and 58 deletions.
2 changes: 0 additions & 2 deletions jbmc/src/jdiff/jdiff_parse_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ Author: Peter Schrammel
#include <goto-programs/remove_unused_functions.h>
#include <goto-programs/remove_vector.h>
#include <goto-programs/remove_virtual_functions.h>
#include <goto-programs/rewrite_union.h>
#include <goto-programs/set_properties.h>
#include <goto-programs/show_properties.h>
#include <goto-programs/string_abstraction.h>
Expand Down Expand Up @@ -296,7 +295,6 @@ bool jdiff_parse_optionst::process_goto_program(
remove_returns(goto_model);
remove_vector(goto_model);
remove_complex(goto_model);
rewrite_union(goto_model);

// add generic checks
log.status() << "Generic Property Instrumentation" << messaget::eom;
Expand Down
Binary file modified regression/ansi-c/arch_flags_mcpu_bad/object.intel
Binary file not shown.
2 changes: 1 addition & 1 deletion regression/ansi-c/arch_flags_mcpu_bad/test.desc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This tests the -mcpu=cortex=a15 flag that should activate ARM-32 mode.
The object file 'object.intel' was compiled from 'source.c' with goto-cc
on a 64-bit platform:

goto-cc -c source.c
goto-cc -c source.c -o object.intel

preproc.i is already pre-processed so that it can be linked in without
needing to invoke a pre-processor from a cross-compile toolchain on your
Expand Down
Binary file modified regression/ansi-c/arch_flags_mcpu_good/object.arm
Binary file not shown.
Binary file modified regression/ansi-c/arch_flags_mthumb_bad/object.intel
Binary file not shown.
2 changes: 1 addition & 1 deletion regression/ansi-c/arch_flags_mthumb_bad/test.desc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This tests the -mthumb flag that should activate ARM-32 mode. The object
file 'object.intel' was compiled from 'source.c' with goto-cc on a
64-bit platform:

goto-cc -c source.c
goto-cc -c source.c -o object.intel

preproc.i is already pre-processed so that it can be linked in without
needing to invoke a pre-processor from a cross-compile toolchain on your
Expand Down
Binary file modified regression/ansi-c/arch_flags_mthumb_good/object.arm
Binary file not shown.
6 changes: 3 additions & 3 deletions regression/cbmc-library/float-nan-check/test.desc
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
CORE
main.c
--nan-check
\[main.NaN.1\] line \d+ NaN on \+ in byte_extract_little_endian\(c, (0|0l|0ll), float\) \+ myzero: SUCCESS
\[main.NaN.1\] line \d+ NaN on \+ in c\.f \+ myzero: SUCCESS
\[main.NaN.2\] line \d+ NaN on / in myzero / myzero: FAILURE
\[main.NaN.3\] line \d+ NaN on / in \(float\)n / myinf: FAILURE
\[main.NaN.4\] line \d+ NaN on \* in myinf \* myzero: FAILURE
\[main.NaN.5\] line \d+ NaN on \+ in -myinf \+ myinf: FAILURE
\[main.NaN.6\] line \d+ NaN on - in myinf - myinf: FAILURE
\[main.NaN.7\] line \d+ NaN on - in -myinf - -myinf: FAILURE
\[main.NaN.8\] line \d+ NaN on \+ in byte_extract_little_endian\(c, (0|0l|0ll), float\) \+ byte_extract_little_endian\(c, (0|0l|0ll), float\): SUCCESS
\[main.NaN.9\] line \d+ NaN on / in byte_extract_little_endian\(c, (0|0l|0ll), float\) / myzero: SUCCESS
\[main.NaN.8\] line \d+ NaN on \+ in c\.f \+ c\.f: SUCCESS
\[main.NaN.9\] line \d+ NaN on / in c\.f / myzero: SUCCESS
\[main.NaN.10\] line \d+ NaN on \* in mynan \* mynan: SUCCESS
\[main.NaN.11\] line \d+ NaN on \+ in mynan \+ mynan: SUCCESS
\[main.NaN.12\] line \d+ NaN on - in mynan - mynan: SUCCESS
Expand Down
6 changes: 3 additions & 3 deletions regression/cbmc/union7/test.desc
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
CORE broken-smt-backend
CORE
main.c
--big-endian
Generated 3 VCC\(s\), 0 remaining after simplification
^EXIT=0$
^SIGNAL=0$
^VERIFICATION SUCCESSFUL$
--
^warning: ignoring
--
This test reports a VERIFICATION ERROR when running with the SMT2 solver on
Windows only.
This test can now be fully solved via constant propagation and simplification.
31 changes: 31 additions & 0 deletions src/ansi-c/c_typecheck_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Author: Daniel Kroening, kroening@kroening.com
#include <util/c_types.h>
#include <util/config.h>
#include <util/cprover_prefix.h>
#include <util/expr_initializer.h>
#include <util/expr_util.h>
#include <util/ieee_float.h>
#include <util/mathematical_expr.h>
Expand Down Expand Up @@ -1086,6 +1087,36 @@ void c_typecheck_baset::typecheck_expr_typecast(exprt &expr)
expr.set(ID_C_lvalue, true);
return;
}
else if(c.get_anonymous() && c.type().id() == ID_struct_tag)
{
// we also look into anonymous members as those we might have wrapped a
// non-maximal member in such a struct
const struct_typet &struct_type =
follow_tag(to_struct_tag_type(c.type()));
if(
!struct_type.components().empty() &&
struct_type.components().front().type() == op.type())
{
auto nondet =
nondet_initializer(c.type(), expr.source_location(), *this);
if(!nondet.has_value())
{
error().source_location = expr.source_location();
error() << "cannot nondet-initialize union component of type '"
<< to_string(c.type()) << "'" << eom;
throw 0;
}
CHECK_RETURN(nondet->id() == ID_struct);
CHECK_RETURN(!nondet->operands().empty());

nondet->operands().front() = op;
union_exprt union_expr(c.get_name(), *nondet, expr.type());
union_expr.add_source_location() = expr.source_location();
expr = union_expr;
expr.set(ID_C_lvalue, true);
return;
}
}
}

// not found, complain
Expand Down
45 changes: 34 additions & 11 deletions src/ansi-c/c_typecheck_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,19 +526,42 @@ exprt::operandst::const_iterator c_typecheck_baset::do_designated_initializer(
// The first component is not the maximum member, which the (default)
// zero initializer prepared. Replace this by a component-specific
// initializer; other bytes have an unspecified value (C Standard
// 6.2.6.1(7)).
const auto zero =
zero_initializer(component.type(), value.source_location(), *this);
if(!zero.has_value())
// 6.2.6.1(7)). In practice, objects of static lifetime are fully zero
// initialized.
if(current_symbol.is_static_lifetime)
{
error().source_location = value.source_location();
error() << "cannot zero-initialize union component of type '"
<< to_string(component.type()) << "'" << eom;
throw 0;
const auto zero =
zero_initializer(component.type(), value.source_location(), *this);
if(!zero.has_value())
{
error().source_location = value.source_location();
error() << "cannot zero-initialize union component of type '"
<< to_string(component.type()) << "'" << eom;
throw 0;
}

union_exprt union_expr(component.get_name(), *zero, type);
union_expr.add_source_location() = value.source_location();
*dest = std::move(union_expr);
dest = &(to_union_expr(*dest).op());
}
else
{
const auto nondet = nondet_initializer(
component.type(), value.source_location(), *this);
if(!nondet.has_value())
{
error().source_location = value.source_location();
error() << "cannot nondet-initialize union component of type '"
<< to_string(component.type()) << "'" << eom;
throw 0;
}

union_exprt union_expr(component.get_name(), *nondet, type);
union_expr.add_source_location() = value.source_location();
*dest = std::move(union_expr);
dest = &(to_union_expr(*dest).op());
}
union_exprt union_expr(component.get_name(), *zero, type);
union_expr.add_source_location()=value.source_location();
*dest=union_expr;
}

dest = &(to_union_expr(*dest).op());
Expand Down
128 changes: 128 additions & 0 deletions src/ansi-c/c_typecheck_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,134 @@ void c_typecheck_baset::typecheck_compound_body(
else
it++;
}

if(type.id() == ID_union && !components.empty())
{
const auto widest_member =
to_union_type(type).find_widest_union_component(*this);
optionalt<exprt> size_expr_opt;

if(!widest_member.has_value())
{
// GCC extension: arrays of non-constant size
size_expr_opt = size_of_expr(type, *this);

if(!size_expr_opt.has_value())
{
error().source_location = type.source_location();
error() << "cannot determine size of union" << eom;
throw 0;
}
}

for(auto &component : components)
{
std::vector<typet> padding_types;

const auto &component_bits = pointer_offset_bits(component.type(), *this);
if(
component_bits.has_value() &&
(!widest_member.has_value() ||
*component_bits < widest_member->second) &&
*component_bits % 8 != 0)
{
std::size_t bit_field_padding_width =
8 - numeric_cast_v<std::size_t>(*component_bits % 8);
padding_types.push_back(c_bit_field_typet{
unsignedbv_typet{bit_field_padding_width}, bit_field_padding_width});
}

if(size_expr_opt.has_value())
{
const auto component_size_expr_opt =
size_of_expr(component.type(), *this);
CHECK_RETURN(component_size_expr_opt.has_value());
// size_of_expr will round up to full bytes, and also works for bit
// fields (which C sizeof does not support). Thus, we don't need to
// special-case bit_field_padding_opt being set.
if_exprt padding_bytes{
binary_relation_exprt{
*component_size_expr_opt, ID_lt, *size_expr_opt},
minus_exprt{*size_expr_opt, *component_size_expr_opt},
from_integer(0, size_expr_opt->type())};
simplify(padding_bytes, *this);

if(!padding_bytes.is_zero())
{
make_index_type(padding_bytes);

padding_types.push_back(
array_typet{unsigned_char_type(), padding_bytes});
}
}
else
{
CHECK_RETURN(component_bits.has_value());
// Truncating division ensures that we don't need to special-case
// bit_field_padding_opt being set.
std::size_t padding_bytes =
numeric_cast_v<std::size_t>(widest_member->second - *component_bits) /
8;

if(padding_bytes != 0)
{
padding_types.push_back(array_typet{
unsigned_char_type(), from_integer(padding_bytes, index_type())});
}
}

if(padding_types.empty())
continue;

struct_typet::componentst component_with_padding_type;
component_with_padding_type.push_back(component);
for(const auto &t : padding_types)
{
// TODO: perhaps use the same naming as padding.cpp uses
component_with_padding_type.push_back(struct_union_typet::componentt{
"$anon" + std::to_string(anon_member_counter++), t});
// TODO: not sure whether this should be anonymous
component_with_padding_type.back().set_anonymous(true);
component_with_padding_type.back().set_is_padding(true);
component_with_padding_type.back().add_source_location() =
component.source_location();
}

struct_union_typet::componentt component_with_padding{
"$anon" + std::to_string(anon_member_counter++),
struct_typet{component_with_padding_type}};
component_with_padding.set_anonymous(true);
component_with_padding.add_source_location() =
component.source_location();

// based on typecheck_compound_type, branch
// "if(type.find(ID_tag).is_nil())"
// produce type symbol
type_symbolt compound_symbol{component_with_padding.type()};
compound_symbol.location = component_with_padding.source_location();

std::string typestr = type2name(compound_symbol.type, *this);
compound_symbol.base_name = "#anon#" + typestr;
compound_symbol.name = "tag-#anon#" + typestr;

// We might already have the same anonymous struct, and this is simply
// ok as those are exactly the same types, just introduced at a
// different source location.
symbolt *new_symbol = &compound_symbol;
if(
symbol_table.symbols.find(compound_symbol.name) ==
symbol_table.symbols.end())
{
move_symbol(compound_symbol, new_symbol);
}

struct_tag_typet tag_type{new_symbol->name};
tag_type.add_source_location() = component_with_padding.source_location();
component_with_padding.type().swap(tag_type);

component.swap(component_with_padding);
}
}
}

typet c_typecheck_baset::enum_constant_type(
Expand Down
2 changes: 0 additions & 2 deletions src/cbmc/cbmc_parse_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ Author: Daniel Kroening, kroening@kroening.com
#include <goto-programs/remove_skip.h>
#include <goto-programs/remove_unused_functions.h>
#include <goto-programs/remove_vector.h>
#include <goto-programs/rewrite_union.h>
#include <goto-programs/set_properties.h>
#include <goto-programs/show_goto_functions.h>
#include <goto-programs/show_properties.h>
Expand Down Expand Up @@ -943,7 +942,6 @@ bool cbmc_parse_optionst::process_goto_program(
remove_returns(goto_model);
remove_vector(goto_model);
remove_complex(goto_model);
rewrite_union(goto_model);

// add generic checks
log.status() << "Generic Property Instrumentation" << messaget::eom;
Expand Down
2 changes: 0 additions & 2 deletions src/goto-diff/goto_diff_parse_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ Author: Peter Schrammel
#include <goto-programs/remove_unused_functions.h>
#include <goto-programs/remove_vector.h>
#include <goto-programs/remove_virtual_functions.h>
#include <goto-programs/rewrite_union.h>
#include <goto-programs/set_properties.h>
#include <goto-programs/show_properties.h>
#include <goto-programs/string_abstraction.h>
Expand Down Expand Up @@ -341,7 +340,6 @@ bool goto_diff_parse_optionst::process_goto_program(
remove_returns(goto_model);
remove_vector(goto_model);
remove_complex(goto_model);
rewrite_union(goto_model);

// add generic checks
log.status() << "Generic Property Instrumentation" << messaget::eom;
Expand Down
2 changes: 1 addition & 1 deletion src/goto-programs/write_goto_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Author: CM Wintersteiger
#ifndef CPROVER_GOTO_PROGRAMS_WRITE_GOTO_BINARY_H
#define CPROVER_GOTO_PROGRAMS_WRITE_GOTO_BINARY_H

#define GOTO_BINARY_VERSION 5
#define GOTO_BINARY_VERSION 6

#include <iosfwd>
#include <string>
Expand Down
28 changes: 25 additions & 3 deletions src/goto-symex/symex_assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ void symex_assignt::assign_rec(
}
else if(type.id() == ID_union || type.id() == ID_union_tag)
{
// should have been replaced by byte_extract
throw unsupported_operation_exceptiont(
"assign_rec: unexpected assignment to union member");
assign_union_member(to_member_expr(lhs), full_lhs, rhs, guard);
}
else
throw unsupported_operation_exceptiont(
Expand Down Expand Up @@ -343,6 +341,30 @@ void symex_assignt::assign_struct_member(
}
}

void symex_assignt::assign_union_member(
const member_exprt &lhs,
const expr_skeletont &full_lhs,
const exprt &rhs,
exprt::operandst &guard)
{
// Symbolic execution of a union member assignment.

// lhs must be member operand, which takes one operand, which must be a union.
exprt lhs_union = lhs.op();
const irep_idt &component_name = lhs.get_component_name();

// turn
// a.c=e
// into
// a'== { .c=e }
// as the front-end guarantees that .c will have the full size of the union

union_exprt new_rhs{component_name, rhs, lhs_union.type()};
const expr_skeletont new_skeleton =
full_lhs.compose(expr_skeletont::remove_op0(lhs));
assign_rec(lhs_union, new_skeleton, new_rhs, guard);
}

void symex_assignt::assign_if(
const if_exprt &lhs,
const expr_skeletont &full_lhs,
Expand Down
6 changes: 6 additions & 0 deletions src/goto-symex/symex_assign.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ class symex_assignt
const exprt &rhs,
exprt::operandst &guard);

void assign_union_member(
const member_exprt &lhs,
const expr_skeletont &full_lhs,
const exprt &rhs,
exprt::operandst &guard);

void assign_if(
const if_exprt &lhs,
const expr_skeletont &full_lhs,
Expand Down
Loading

0 comments on commit 60b3d32

Please sign in to comment.