diff --git a/src/wasm-type-shape.h b/src/wasm-type-shape.h index b649e1b72db..b3dda426803 100644 --- a/src/wasm-type-shape.h +++ b/src/wasm-type-shape.h @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include "wasm-features.h" @@ -162,17 +162,22 @@ struct BrandTypeIterator { // with an extra brand type at the end of the group that differentiates it from // previous group. struct UniqueRecGroups { - std::list> groups; - std::unordered_set shapes; + using Groups = std::list>; + Groups groups; + std::unordered_map shapes; FeatureSet features; UniqueRecGroups(FeatureSet features) : features(features) {} // Insert a rec group. If it is already unique, return the original types. - // Otherwise rebuild the group make it unique and return the rebuilt types, + // Otherwise rebuild the group to make it unique and return the rebuilt types, // including the brand. const std::vector& insert(std::vector group); + + // If the group is unique, insert it and return the types. Otherwise, return + // the types that already have this shape. + const std::vector& insertOrGet(std::vector group); }; } // namespace wasm diff --git a/src/wasm-type.h b/src/wasm-type.h index 98aacb074b2..fb084aeb7e2 100644 --- a/src/wasm-type.h +++ b/src/wasm-type.h @@ -264,6 +264,11 @@ class HeapType { // Returns the feature set required to use this type. FeatureSet getFeatures() const; + // We support more precise types in the IR than the enabled feature set would + // suggest. Get the generalized version of the type that will be written by + // the binary writer given the feature set. + inline HeapType asWrittenGivenFeatures(FeatureSet feats) const; + // Helper allowing the value of `print(...)` to be sent to an ostream. Stores // a `TypeID` because `Type` is incomplete at this point and using a reference // makes it less convenient to use. @@ -283,6 +288,16 @@ class HeapType { std::string toString() const; }; +HeapType HeapType::asWrittenGivenFeatures(FeatureSet feats) const { + // Without GC, only top types like func and extern are supported. The + // exception is string, since stringref can be enabled without GC and we still + // expect to write stringref types in that case. + if (!feats.hasGC() && *this != HeapType::string) { + return getTop(); + } + return *this; +} + class Type { // The `id` uniquely represents each type, so type equality is just a // comparison of the ids. The basic types are packed at the bottom of the @@ -419,7 +434,7 @@ class Type { return isRef() && getHeapType().isContinuation(); } bool isDefaultable() const; - bool isCastable(); + bool isCastable() const; // TODO: Allow this only for reference types. Nullability getNullability() const { @@ -450,6 +465,11 @@ class Type { return !isExact() || feats.hasCustomDescriptors() ? *this : with(Inexact); } + // We support more precise types in the IR than the enabled feature set would + // suggest. Get the generalized version of the type that will be written by + // the binary writer given the feature set. + inline Type asWrittenGivenFeatures(FeatureSet feats) const; + private: template bool hasPredicate() { for (const auto& type : *this) { @@ -578,6 +598,20 @@ class Type { const Type& operator[](size_t i) const { return *Iterator{{this, i}}; } }; +Type Type::asWrittenGivenFeatures(FeatureSet feats) const { + if (!isRef()) { + return *this; + } + auto type = with(getHeapType().asWrittenGivenFeatures(feats)); + if (!feats.hasGC()) { + type = type.with(Nullable); + } + if (!feats.hasCustomDescriptors()) { + type = type.with(Inexact); + } + return type; +} + namespace HeapTypes { constexpr HeapType ext = HeapType::ext; @@ -878,6 +912,9 @@ struct TypeBuilder { InvalidUnsharedDescribes, // The custom descriptors feature is missing. RequiresCustomDescriptors, + // Two rec groups with different shapes would have the same shapes after + // the binary writer generalizes refined types that use disabled features. + RecGroupCollision, }; struct Error { diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index df8b6a28df5..319a5319daf 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1798,23 +1798,8 @@ void WasmBinaryWriter::writeInlineBuffer(const char* data, size_t size) { } void WasmBinaryWriter::writeType(Type type) { + type = type.asWrittenGivenFeatures(wasm->features); if (type.isRef()) { - // The only reference types allowed without GC are funcref, externref, and - // exnref. We internally use more refined versions of those types, but we - // cannot emit those without GC. - if (!wasm->features.hasGC()) { - auto ht = type.getHeapType(); - if (ht.isMaybeShared(HeapType::string)) { - // Do not overgeneralize stringref to anyref. We have tests that when a - // stringref is expected, we actually get a stringref. If we see a - // string, the stringref feature must be enabled. - type = Type(HeapTypes::string.getBasic(ht.getShared()), Nullable); - } else { - // Only the top type (func, extern, exn) is available, and only the - // nullable version. - type = Type(type.getHeapType().getTop(), Nullable); - } - } auto heapType = type.getHeapType(); if (type.isNullable() && heapType.isBasic() && !heapType.isShared()) { switch (heapType.getBasic(Unshared)) { @@ -1902,15 +1887,10 @@ void WasmBinaryWriter::writeType(Type type) { } void WasmBinaryWriter::writeHeapType(HeapType type, Exactness exactness) { - // ref.null always has a bottom heap type in Binaryen IR, but those types are - // only actually valid with GC. Otherwise, emit the corresponding valid top - // types instead. + type = type.asWrittenGivenFeatures(wasm->features); if (!wasm->features.hasCustomDescriptors()) { exactness = Inexact; } - if (!wasm->features.hasGC()) { - type = type.getTop(); - } assert(!type.isBasic() || exactness == Inexact); if (exactness == Exact) { o << uint8_t(BinaryConsts::EncodedType::Exact); diff --git a/src/wasm/wasm-type-shape.cpp b/src/wasm/wasm-type-shape.cpp index d2de6505d44..814959e7621 100644 --- a/src/wasm/wasm-type-shape.cpp +++ b/src/wasm/wasm-type-shape.cpp @@ -136,6 +136,10 @@ template struct RecGroupComparator { } Comparison compare(Type a, Type b) { + // Compare types as they will eventually be written out, not as they are in + // the IR. + a = a.asWrittenGivenFeatures(features); + b = b.asWrittenGivenFeatures(features); if (a.isBasic() != b.isBasic()) { return b.isBasic() < a.isBasic() ? LT : GT; } @@ -152,14 +156,12 @@ template struct RecGroupComparator { return compare(a.getTuple(), b.getTuple()); } assert(a.isRef() && b.isRef()); - // Only consider exactness if custom descriptors are enabled. Otherwise, it - // will be erased when the types are written, so we ignore it here, too. - if (features.hasCustomDescriptors() && a.isExact() != b.isExact()) { - return a.isExact() < b.isExact() ? LT : GT; - } if (a.isNullable() != b.isNullable()) { return a.isNullable() < b.isNullable() ? LT : GT; } + if (a.isExact() != b.isExact()) { + return a.isExact() < b.isExact() ? LT : GT; + } return compare(a.getHeapType(), b.getHeapType()); } @@ -294,6 +296,9 @@ struct RecGroupHasher { } size_t hash(Type type) { + // Hash types as they will eventually be written out, not as they are in the + // IR. + type = type.asWrittenGivenFeatures(features); size_t digest = wasm::hash(type.isBasic()); if (type.isBasic()) { wasm::rehash(digest, type.getBasic()); @@ -305,10 +310,8 @@ struct RecGroupHasher { return digest; } assert(type.isRef()); - if (features.hasCustomDescriptors()) { - wasm::rehash(digest, type.isExact()); - } wasm::rehash(digest, type.isNullable()); + wasm::rehash(digest, type.isExact()); hash_combine(digest, hash(type.getHeapType())); return digest; } @@ -372,15 +375,16 @@ bool ComparableRecGroupShape::operator>(const RecGroupShape& other) const { const std::vector& UniqueRecGroups::insert(std::vector types) { - auto& group = *groups.emplace(groups.end(), std::move(types)); - if (shapes.emplace(RecGroupShape(group, features)).second) { + auto groupIt = groups.emplace(groups.end(), std::move(types)); + auto& group = *groupIt; + if (shapes.emplace(RecGroupShape(group, features), groupIt).second) { // The types are already unique. return group; } // There is a conflict. Find a brand that makes the group unique. BrandTypeIterator brand; group.push_back(*brand); - while (!shapes.emplace(RecGroupShape(group, features)).second) { + while (!shapes.emplace(RecGroupShape(group, features), groupIt).second) { group.back() = *++brand; } // Rebuild the rec group to include the brand. Map the old types (excluding @@ -405,6 +409,17 @@ UniqueRecGroups::insert(std::vector types) { return group; } +const std::vector& +UniqueRecGroups::insertOrGet(std::vector types) { + auto groupIt = groups.emplace(groups.end(), std::move(types)); + auto [it, inserted] = + shapes.emplace(RecGroupShape(*groupIt, features), groupIt); + if (!inserted) { + groups.erase(groupIt); + } + return *it->second; +} + } // namespace wasm namespace std { diff --git a/src/wasm/wasm-type.cpp b/src/wasm/wasm-type.cpp index 265d4ac5b50..80fd246c8ba 100644 --- a/src/wasm/wasm-type.cpp +++ b/src/wasm/wasm-type.cpp @@ -15,20 +15,15 @@ */ #include -#include #include -#include -#include #include #include #include -#include -#include "compiler-support.h" #include "support/hash.h" -#include "support/insert_ordered.h" #include "wasm-features.h" #include "wasm-type-printing.h" +#include "wasm-type-shape.h" #include "wasm-type.h" #define TRACE_CANONICALIZATION 0 @@ -623,7 +618,7 @@ bool Type::isDefaultable() const { return isConcrete() && !isNonNullable(); } -bool Type::isCastable() { return isRef() && getHeapType().isCastable(); } +bool Type::isCastable() const { return isRef() && getHeapType().isCastable(); } unsigned Type::getByteSize() const { // TODO: alignment? @@ -1468,6 +1463,9 @@ std::ostream& operator<<(std::ostream& os, TypeBuilder::ErrorReason reason) { return os << "Heap type describes an invalid unshared type"; case TypeBuilder::ErrorReason::RequiresCustomDescriptors: return os << "custom descriptors required but not enabled"; + case TypeBuilder::ErrorReason::RecGroupCollision: + return os + << "distinct rec groups would be identical after binary writing"; } WASM_UNREACHABLE("Unexpected error reason"); } @@ -2260,7 +2258,19 @@ struct TypeBuilder::Impl { // We will validate features as we go. FeatureSet features; - Impl(size_t n, FeatureSet features) : entries(n), features(features) {} + // We allow some types to be used even if their corresponding features are not + // enabled. For example, we allow exact references without custom descriptors + // and typed function references without GC. Allowing these more-refined types + // in the IR helps the optimizer be more powerful. However, these disallowed + // refinements will be erased when a module is written out as a binary, which + // could cause distinct rec groups to become identical and potentially change + // the results of casts, etc. To avoid this, we must disallow building rec + // groups that vary only in some refinement that will be removed in binary + // writing. Track this with a UniqueRecGroups set, which is feature-aware. + UniqueRecGroups unique; + + Impl(size_t n, FeatureSet features) + : entries(n), features(features), unique(features) {} }; TypeBuilder::TypeBuilder(size_t n, FeatureSet features) { @@ -2692,6 +2702,17 @@ TypeBuilder::BuildResult TypeBuilder::build() { assert(built->size() == groupSize); results.insert(results.end(), built->begin(), built->end()); + // If we are building multiple groups, make sure there will be no conflicts + // after disallowed features are taken into account. + if (groupSize > 0 && groupSize != entryCount) { + auto expectedFirst = (*built)[0]; + auto& types = impl->unique.insertOrGet(*built); + if (types[0] != expectedFirst) { + return {TypeBuilder::Error{ + groupStart, TypeBuilder::ErrorReason::RecGroupCollision}}; + } + } + groupStart += groupSize; } diff --git a/test/lit/binary/erase-trivial-cast-exactness.wast b/test/lit/binary/erase-trivial-cast-exactness.wast new file mode 100644 index 00000000000..6e49cac65df --- /dev/null +++ b/test/lit/binary/erase-trivial-cast-exactness.wast @@ -0,0 +1,39 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s -all --disable-custom-descriptors --roundtrip -S -o - | filecheck %s + +(module + ;; CHECK: (type $foo (struct)) + (type $foo (struct)) + ;; CHECK: (func $trivial-exact-cast (type $1) (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $foo) + ;; CHECK-NEXT: (if (result (ref $foo)) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (struct.new_default $foo) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (struct.new_default $foo) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $trivial-exact-cast (param $x i32) + (drop + ;; We allow trivial exact casts even without custom descriptors enabled, + ;; so this is valid. However, when we round trip, the exactness in the if + ;; result will be erased. If we fail to erase the exactness in the cast, + ;; we will be emitting a binary that uses a disabled feature, and also we + ;; will fail validation when we read the module back in because the cast + ;; will no longer be trivial. + (ref.cast (ref (exact $foo)) + (if (result (ref (exact $foo))) + (local.get $x) + (then (struct.new $foo)) + (else (struct.new $foo)) + ) + ) + ) + ) +) diff --git a/test/lit/validation/no-type-collision-stringref.wast b/test/lit/validation/no-type-collision-stringref.wast new file mode 100644 index 00000000000..b8d00ce3259 --- /dev/null +++ b/test/lit/validation/no-type-collision-stringref.wast @@ -0,0 +1,24 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s -all --disable-gc -S -o - | filecheck %s + +;; No collision - we should not write a stringref as an externref. +(module + ;; CHECK: (type $A (func (param externref))) + (type $A (func (param externref))) + ;; CHECK: (type $B (func (param stringref))) + (type $B (func (param stringref))) + + ;; CHECK: (func $a (param $0 externref) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $a (type $A) + (nop) + ) + + ;; CHECK: (func $b (param $0 stringref) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $b (type $B) + (nop) + ) +) diff --git a/test/lit/validation/type-collision-exact.wast b/test/lit/validation/type-collision-exact.wast new file mode 100644 index 00000000000..3efda2de3b7 --- /dev/null +++ b/test/lit/validation/type-collision-exact.wast @@ -0,0 +1,9 @@ +;; RUN: not wasm-opt %s -all --disable-custom-descriptors 2>&1 | filecheck %s + +;; CHECK: error: invalid type: distinct rec groups would be identical after binary writing + +(module + (type $foo (struct)) + (type $A (struct (field (ref $foo)))) + (type $B (struct (field (ref (exact $foo))))) +) diff --git a/test/lit/validation/type-collision-funcref.wast b/test/lit/validation/type-collision-funcref.wast new file mode 100644 index 00000000000..953eb72ca3b --- /dev/null +++ b/test/lit/validation/type-collision-funcref.wast @@ -0,0 +1,9 @@ +;; RUN: not wasm-opt %s -all --disable-gc 2>&1 | filecheck %s + +;; CHECK: error: invalid type: distinct rec groups would be identical after binary writing + +(module + (type $foo (func)) + (type $A (func (param (ref null $foo)))) + (type $B (func (param funcref))) +) diff --git a/test/lit/validation/type-collision-null.wast b/test/lit/validation/type-collision-null.wast new file mode 100644 index 00000000000..cc8b29ad9ae --- /dev/null +++ b/test/lit/validation/type-collision-null.wast @@ -0,0 +1,8 @@ +;; RUN: not wasm-opt %s -all --disable-gc 2>&1 | filecheck %s + +;; CHECK: error: invalid type: distinct rec groups would be identical after binary writing + +(module + (type $A (func (param externref))) + (type $B (func (param (ref noextern)))) +)