From fafc7ad38c08bf8dd65f59ffe64ae76efe021c61 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nick@wasmer.io>
Date: Wed, 6 Nov 2019 13:40:51 -0800
Subject: [PATCH 1/8] Add "known to not contain non-arithmetic NaNs" to
 ExtraInfo in LLVM backend.

Not wired up yet.
---
 lib/llvm-backend/src/code.rs  | 118 ++++++++++++++++------------------
 lib/llvm-backend/src/state.rs |  72 ++++++++++++++++++---
 2 files changed, 120 insertions(+), 70 deletions(-)

diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs
index b4f113a2f1b..874033eef85 100644
--- a/lib/llvm-backend/src/code.rs
+++ b/lib/llvm-backend/src/code.rs
@@ -388,16 +388,14 @@ fn v128_into_int_vec(
     info: ExtraInfo,
     int_vec_ty: VectorType,
 ) -> VectorValue {
-    let value = match info {
-        ExtraInfo::None => value,
-        ExtraInfo::PendingF32NaN => {
-            let value = builder.build_bitcast(value, intrinsics.f32x4_ty, "");
-            canonicalize_nans(builder, intrinsics, value)
-        }
-        ExtraInfo::PendingF64NaN => {
-            let value = builder.build_bitcast(value, intrinsics.f64x2_ty, "");
-            canonicalize_nans(builder, intrinsics, value)
-        }
+    let value = if info.has_pending_f32_nan() {
+        let value = builder.build_bitcast(value, intrinsics.f32x4_ty, "");
+        canonicalize_nans(builder, intrinsics, value)
+    } else if info.has_pending_f64_nan() {
+        let value = builder.build_bitcast(value, intrinsics.f64x2_ty, "");
+        canonicalize_nans(builder, intrinsics, value)
+    } else {
+        value
     };
     builder
         .build_bitcast(value, int_vec_ty, "")
@@ -448,7 +446,7 @@ fn v128_into_f32x4(
     value: BasicValueEnum,
     info: ExtraInfo,
 ) -> VectorValue {
-    let value = if info == ExtraInfo::PendingF64NaN {
+    let value = if info.has_pending_f64_nan() {
         let value = builder.build_bitcast(value, intrinsics.f64x2_ty, "");
         canonicalize_nans(builder, intrinsics, value)
     } else {
@@ -467,7 +465,7 @@ fn v128_into_f64x2(
     value: BasicValueEnum,
     info: ExtraInfo,
 ) -> VectorValue {
-    let value = if info == ExtraInfo::PendingF32NaN {
+    let value = if info.has_pending_f32_nan() {
         let value = builder.build_bitcast(value, intrinsics.f32x4_ty, "");
         canonicalize_nans(builder, intrinsics, value)
     } else {
@@ -484,32 +482,30 @@ fn apply_pending_canonicalization(
     value: BasicValueEnum,
     info: ExtraInfo,
 ) -> BasicValueEnum {
-    match info {
-        ExtraInfo::None => value,
-        ExtraInfo::PendingF32NaN => {
-            if value.get_type().is_vector_type()
-                || value.get_type() == intrinsics.i128_ty.as_basic_type_enum()
-            {
-                let ty = value.get_type();
-                let value = builder.build_bitcast(value, intrinsics.f32x4_ty, "");
-                let value = canonicalize_nans(builder, intrinsics, value);
-                builder.build_bitcast(value, ty, "")
-            } else {
-                canonicalize_nans(builder, intrinsics, value)
-            }
+    if info.has_pending_f32_nan() {
+        if value.get_type().is_vector_type()
+            || value.get_type() == intrinsics.i128_ty.as_basic_type_enum()
+        {
+            let ty = value.get_type();
+            let value = builder.build_bitcast(value, intrinsics.f32x4_ty, "");
+            let value = canonicalize_nans(builder, intrinsics, value);
+            builder.build_bitcast(value, ty, "")
+        } else {
+            canonicalize_nans(builder, intrinsics, value)
         }
-        ExtraInfo::PendingF64NaN => {
-            if value.get_type().is_vector_type()
-                || value.get_type() == intrinsics.i128_ty.as_basic_type_enum()
-            {
-                let ty = value.get_type();
-                let value = builder.build_bitcast(value, intrinsics.f64x2_ty, "");
-                let value = canonicalize_nans(builder, intrinsics, value);
-                builder.build_bitcast(value, ty, "")
-            } else {
-                canonicalize_nans(builder, intrinsics, value)
-            }
+    } else if info.has_pending_f64_nan() {
+        if value.get_type().is_vector_type()
+            || value.get_type() == intrinsics.i128_ty.as_basic_type_enum()
+        {
+            let ty = value.get_type();
+            let value = builder.build_bitcast(value, intrinsics.f64x2_ty, "");
+            let value = canonicalize_nans(builder, intrinsics, value);
+            builder.build_bitcast(value, ty, "")
+        } else {
+            canonicalize_nans(builder, intrinsics, value)
         }
+    } else {
+        value
     }
 }
 
@@ -2747,13 +2743,13 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = state.pop2()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_add(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64Add => {
                 let (v1, v2) = state.pop2()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_add(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2761,7 +2757,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2769,19 +2765,19 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32Sub => {
                 let (v1, v2) = state.pop2()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64Sub => {
                 let (v1, v2) = state.pop2()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2789,7 +2785,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2797,19 +2793,19 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32Mul => {
                 let (v1, v2) = state.pop2()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64Mul => {
                 let (v1, v2) = state.pop2()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2817,7 +2813,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2825,19 +2821,19 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32Div => {
                 let (v1, v2) = state.pop2()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_div(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64Div => {
                 let (v1, v2) = state.pop2()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_div(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Div => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2845,7 +2841,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_div(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Div => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2853,7 +2849,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_div(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32Sqrt => {
                 let input = state.pop1()?;
@@ -2862,7 +2858,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64Sqrt => {
                 let input = state.pop1()?;
@@ -2871,7 +2867,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Sqrt => {
                 let (v, i) = state.pop1_extra()?;
@@ -2886,7 +2882,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .left()
                     .unwrap();
                 let bits = builder.build_bitcast(res, intrinsics.i128_ty, "bits");
-                state.push1_extra(bits, ExtraInfo::PendingF32NaN);
+                state.push1_extra(bits, ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Sqrt => {
                 let (v, i) = state.pop1_extra()?;
@@ -3386,7 +3382,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64Ceil => {
                 let input = state.pop1()?;
@@ -3395,7 +3391,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32Floor => {
                 let input = state.pop1()?;
@@ -3404,7 +3400,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64Floor => {
                 let input = state.pop1()?;
@@ -3413,7 +3409,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32Trunc => {
                 let (v, i) = state.pop1_extra()?;
@@ -4311,13 +4307,13 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v = state.pop1()?;
                 let v = v.into_float_value();
                 let res = builder.build_float_trunc(v, intrinsics.f32_ty, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF32NaN);
+                state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64PromoteF32 => {
                 let v = state.pop1()?;
                 let v = v.into_float_value();
                 let res = builder.build_float_ext(v, intrinsics.f64_ty, &state.var_name());
-                state.push1_extra(res, ExtraInfo::PendingF64NaN);
+                state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32ConvertSI32 | Operator::F32ConvertSI64 => {
                 let v1 = state.pop1()?.into_int_value();
diff --git a/lib/llvm-backend/src/state.rs b/lib/llvm-backend/src/state.rs
index 0d46dc5c099..c051dde0bd9 100644
--- a/lib/llvm-backend/src/state.rs
+++ b/lib/llvm-backend/src/state.rs
@@ -4,6 +4,7 @@ use inkwell::{
 };
 use smallvec::SmallVec;
 use std::cell::Cell;
+use std::ops::Add;
 use wasmparser::BinaryReaderError;
 
 #[derive(Debug)]
@@ -68,22 +69,75 @@ impl ControlFrame {
 }
 
 #[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
-pub enum ExtraInfo {
-    None,
-
-    // This values is required to be arithmetic 32-bit NaN (or 32x4) by the WAsm
+pub struct ExtraInfo {
+    state: u8,
+}
+impl ExtraInfo {
+    // This value is required to be arithmetic 32-bit NaN (or 32x4) by the WAsm
     // machine, but which might not be in the LLVM value. The conversion to
     // arithmetic NaN is pending. It is required for correctness.
-    PendingF32NaN,
+    pub fn pending_f32_nan() -> ExtraInfo {
+        ExtraInfo { state: 1 }
+    }
 
-    // This values is required to be arithmetic 64-bit NaN (or 64x2) by the WAsm
+    // This value is required to be arithmetic 64-bit NaN (or 64x2) by the WAsm
     // machine, but which might not be in the LLVM value. The conversion to
     // arithmetic NaN is pending. It is required for correctness.
-    PendingF64NaN,
+    pub fn pending_f64_nan() -> ExtraInfo {
+        ExtraInfo { state: 2 }
+    }
+
+    // This value either does not contain a 32-bit NaN, or it contains an
+    // arithmetic NaN. In SIMD, applies to all 4 lanes.
+    pub fn arithmetic_f32() -> ExtraInfo {
+        ExtraInfo { state: 4 }
+    }
+
+    // This value either does not contain a 64-bit NaN, or it contains an
+    // arithmetic NaN. In SIMD, applies to both lanes.
+    pub fn arithmetic_f64() -> ExtraInfo {
+        ExtraInfo { state: 8 }
+    }
+
+    pub fn has_pending_f32_nan(&self) -> bool {
+        self.state & ExtraInfo::pending_f32_nan().state != 0
+    }
+    pub fn has_pending_f64_nan(&self) -> bool {
+        self.state & ExtraInfo::pending_f64_nan().state != 0
+    }
+    pub fn is_arithmetic_f32(&self) -> bool {
+        self.state & ExtraInfo::arithmetic_f32().state != 0
+    }
+    pub fn is_arithmetic_f64(&self) -> bool {
+        self.state & ExtraInfo::arithmetic_f64().state != 0
+    }
 }
 impl Default for ExtraInfo {
     fn default() -> Self {
-        ExtraInfo::None
+        ExtraInfo { state: 0 }
+    }
+}
+impl Add for ExtraInfo {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        assert!(self.has_pending_f32_nan() && other.has_pending_f64_nan());
+        assert!(self.has_pending_f64_nan() && other.has_pending_f32_nan());
+        ExtraInfo {
+            state: if self.is_arithmetic_f32() || other.is_arithmetic_f32() {
+                ExtraInfo::arithmetic_f32().state
+            } else if self.has_pending_f32_nan() || other.has_pending_f32_nan() {
+                ExtraInfo::pending_f32_nan().state
+            } else {
+                0
+            } + if self.is_arithmetic_f64() || other.is_arithmetic_f64() {
+                ExtraInfo::arithmetic_f64().state
+            } else if self.has_pending_f64_nan() || other.has_pending_f64_nan() {
+                ExtraInfo::pending_f64_nan().state
+            } else {
+                0
+            },
+        }
     }
 }
 
@@ -165,7 +219,7 @@ impl State {
     }
 
     pub fn push1<T: BasicValue>(&mut self, value: T) {
-        self.push1_extra(value, ExtraInfo::None);
+        self.push1_extra(value, Default::default());
     }
 
     pub fn push1_extra<T: BasicValue>(&mut self, value: T, info: ExtraInfo) {

From 26c8fd52c8d5456479fc81114a56c8cbf2c71229 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nick@wasmer.io>
Date: Wed, 6 Nov 2019 17:14:32 -0800
Subject: [PATCH 2/8] Initial implementation of "known to be arithmetic NaN /
 not NaN".

---
 lib/llvm-backend/src/code.rs  | 430 ++++++++++++++++++++++------------
 lib/llvm-backend/src/state.rs |  50 ++--
 2 files changed, 319 insertions(+), 161 deletions(-)

diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs
index 874033eef85..5b8d078cc68 100644
--- a/lib/llvm-backend/src/code.rs
+++ b/lib/llvm-backend/src/code.rs
@@ -1484,21 +1484,41 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             // Generate const values.
             Operator::I32Const { value } => {
                 let i = intrinsics.i32_ty.const_int(value as u64, false);
-                state.push1(i);
+                let info = if is_f32_arithmetic(value as u32) {
+                    ExtraInfo::arithmetic_f32()
+                } else {
+                    Default::default()
+                };
+                state.push1_extra(i, info);
             }
             Operator::I64Const { value } => {
                 let i = intrinsics.i64_ty.const_int(value as u64, false);
-                state.push1(i);
+                let info = if is_f64_arithmetic(value as u64) {
+                    ExtraInfo::arithmetic_f64()
+                } else {
+                    Default::default()
+                };
+                state.push1_extra(i, info);
             }
             Operator::F32Const { value } => {
                 let bits = intrinsics.i32_ty.const_int(value.bits() as u64, false);
+                let info = if is_f32_arithmetic(value.bits()) {
+                    ExtraInfo::arithmetic_f32()
+                } else {
+                    Default::default()
+                };
                 let f = builder.build_bitcast(bits, intrinsics.f32_ty, "f");
-                state.push1(f);
+                state.push1_extra(f, info);
             }
             Operator::F64Const { value } => {
                 let bits = intrinsics.i64_ty.const_int(value.bits(), false);
+                let info = if is_f64_arithmetic(value.bits()) {
+                    ExtraInfo::arithmetic_f64()
+                } else {
+                    Default::default()
+                };
                 let f = builder.build_bitcast(bits, intrinsics.f64_ty, "f");
-                state.push1(f);
+                state.push1_extra(f, info);
             }
             Operator::V128Const { value } => {
                 let mut hi: [u8; 8] = Default::default();
@@ -1507,11 +1527,31 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 lo.copy_from_slice(&value.bytes()[8..16]);
                 let packed = [u64::from_le_bytes(hi), u64::from_le_bytes(lo)];
                 let i = intrinsics.i128_ty.const_int_arbitrary_precision(&packed);
-                state.push1(i);
+                let mut quad1: [u8; 4] = Default::default();
+                let mut quad2: [u8; 4] = Default::default();
+                let mut quad3: [u8; 4] = Default::default();
+                let mut quad4: [u8; 4] = Default::default();
+                quad1.copy_from_slice(&value.bytes()[0..4]);
+                quad2.copy_from_slice(&value.bytes()[4..8]);
+                quad3.copy_from_slice(&value.bytes()[8..12]);
+                quad4.copy_from_slice(&value.bytes()[12..16]);
+                let mut info: ExtraInfo = Default::default();
+                if is_f32_arithmetic(u32::from_le_bytes(quad1))
+                    && is_f32_arithmetic(u32::from_le_bytes(quad2))
+                    && is_f32_arithmetic(u32::from_le_bytes(quad3))
+                    && is_f32_arithmetic(u32::from_le_bytes(quad4))
+                {
+                    info |= ExtraInfo::arithmetic_f32();
+                }
+                if is_f64_arithmetic(packed[0]) && is_f64_arithmetic(packed[1]) {
+                    info |= ExtraInfo::arithmetic_f64();
+                }
+                state.push1_extra(i, info);
             }
 
             Operator::I8x16Splat => {
-                let v = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = v.into_int_value();
                 let v = builder.build_int_truncate(v, intrinsics.i8_ty, "");
                 let res = splat_vector(
                     builder,
@@ -1521,10 +1561,11 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1(res);
+                state.push1_extra(res, i);
             }
             Operator::I16x8Splat => {
-                let v = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = v.into_int_value();
                 let v = builder.build_int_truncate(v, intrinsics.i16_ty, "");
                 let res = splat_vector(
                     builder,
@@ -1534,10 +1575,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1(res);
+                state.push1_extra(res, i);
             }
             Operator::I32x4Splat => {
-                let v = state.pop1()?;
+                let (v, i) = state.pop1_extra()?;
                 let res = splat_vector(
                     builder,
                     intrinsics,
@@ -1546,10 +1587,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1(res);
+                state.push1_extra(res, i);
             }
             Operator::I64x2Splat => {
-                let v = state.pop1()?;
+                let (v, i) = state.pop1_extra()?;
                 let res = splat_vector(
                     builder,
                     intrinsics,
@@ -1558,7 +1599,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1(res);
+                state.push1_extra(res, i);
             }
             Operator::F32x4Splat => {
                 let (v, i) = state.pop1_extra()?;
@@ -1674,7 +1715,20 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
 
             Operator::Select => {
-                let (v1, v2, cond) = state.pop3()?;
+                let ((v1, i1), (v2, i2), (cond, _)) = state.pop3_extra()?;
+                // We don't bother canonicalizing 'cond' here because we only
+                // compare it to zero, and that's invariant under
+                // canonicalization.
+                let (v1, v2) = if i1.has_pending_f32_nan() != i1.has_pending_f32_nan()
+                    || i1.has_pending_f64_nan() != i2.has_pending_f64_nan()
+                {
+                    (
+                        apply_pending_canonicalization(builder, intrinsics, v1, i1),
+                        apply_pending_canonicalization(builder, intrinsics, v2, i2),
+                    )
+                } else {
+                    (v1, v2)
+                };
                 let cond_value = builder.build_int_compare(
                     IntPredicate::NE,
                     cond.into_int_value(),
@@ -1682,7 +1736,19 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_select(cond_value, v1, v2, &state.var_name());
-                state.push1(res);
+                let info = {
+                    let mut info = i1 & i2;
+                    if i1.has_pending_f32_nan() {
+                        assert!(i2.has_pending_f32_nan());
+                        info |= ExtraInfo::pending_f32_nan();
+                    }
+                    if i1.has_pending_f64_nan() {
+                        assert!(i2.has_pending_f64_nan());
+                        info |= ExtraInfo::pending_f64_nan();
+                    }
+                    info
+                };
+                state.push1_extra(res, info);
             }
             Operator::Call { function_index } => {
                 let func_index = FuncIndex::new(function_index as usize);
@@ -2650,7 +2716,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I64Clz => {
                 let input = state.pop1()?;
@@ -2664,7 +2730,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::I32Ctz => {
                 let input = state.pop1()?;
@@ -2678,7 +2744,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I64Ctz => {
                 let input = state.pop1()?;
@@ -2692,7 +2758,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::I32Popcnt => {
                 let input = state.pop1()?;
@@ -2701,7 +2767,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I64Popcnt => {
                 let input = state.pop1()?;
@@ -2710,7 +2776,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::I32Eqz => {
                 let input = state.pop1()?.into_int_value();
@@ -2721,7 +2787,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I64Eqz => {
                 let input = state.pop1()?.into_int_value();
@@ -2732,7 +2798,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
 
             /***************************
@@ -2740,16 +2806,16 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
              * https://github.com/sunfishcode/wasm-reference-manual/blob/master/WebAssembly.md#floating-point-arithmetic-instructions
              ***************************/
             Operator::F32Add => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_add(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64Add => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_add(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2757,7 +2823,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2765,19 +2831,19 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32Sub => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64Sub => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2785,7 +2851,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2793,19 +2859,19 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32Mul => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64Mul => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
-                state.push1_extra(res, ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2813,7 +2879,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -2821,7 +2887,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32Div => {
                 let (v1, v2) = state.pop2()?;
@@ -2951,7 +3017,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
                 // Because inputs were canonicalized, we always produce
                 // canonical NaN outputs. No pending NaN cleanup.
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::F64Min => {
                 // This implements the same logic as LLVM's @llvm.minimum
@@ -3005,7 +3071,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
                 // Because inputs were canonicalized, we always produce
                 // canonical NaN outputs. No pending NaN cleanup.
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::F32x4Min => {
                 // This implements the same logic as LLVM's @llvm.minimum
@@ -3071,7 +3137,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 // Because inputs were canonicalized, we always produce
                 // canonical NaN outputs. No pending NaN cleanup.
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::F64x2Min => {
                 // This implements the same logic as LLVM's @llvm.minimum
@@ -3137,7 +3203,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 // Because inputs were canonicalized, we always produce
                 // canonical NaN outputs. No pending NaN cleanup.
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::F32Max => {
                 // This implements the same logic as LLVM's @llvm.maximum
@@ -3190,7 +3256,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
                 // Because inputs were canonicalized, we always produce
                 // canonical NaN outputs. No pending NaN cleanup.
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::F64Max => {
                 // This implements the same logic as LLVM's @llvm.maximum
@@ -3243,7 +3309,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
                 // Because inputs were canonicalized, we always produce
                 // canonical NaN outputs. No pending NaN cleanup.
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::F32x4Max => {
                 // This implements the same logic as LLVM's @llvm.maximum
@@ -3308,7 +3374,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 // Because inputs were canonicalized, we always produce
                 // canonical NaN outputs. No pending NaN cleanup.
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::F64x2Max => {
                 // This implements the same logic as LLVM's @llvm.maximum
@@ -3373,43 +3439,43 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 // Because inputs were canonicalized, we always produce
                 // canonical NaN outputs. No pending NaN cleanup.
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::F32Ceil => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
                 let res = builder
                     .build_call(intrinsics.ceil_f32, &[input], &state.var_name())
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, info | ExtraInfo::pending_f32_nan());
             }
             Operator::F64Ceil => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
                 let res = builder
                     .build_call(intrinsics.ceil_f64, &[input], &state.var_name())
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, info | ExtraInfo::pending_f64_nan());
             }
             Operator::F32Floor => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
                 let res = builder
                     .build_call(intrinsics.floor_f32, &[input], &state.var_name())
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, info | ExtraInfo::pending_f32_nan());
             }
             Operator::F64Floor => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
                 let res = builder
                     .build_call(intrinsics.floor_f64, &[input], &state.var_name())
                     .try_as_basic_value()
                     .left()
                     .unwrap();
-                state.push1_extra(res, ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, info | ExtraInfo::pending_f64_nan());
             }
             Operator::F32Trunc => {
                 let (v, i) = state.pop1_extra()?;
@@ -3477,7 +3543,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap();
                 // The exact NaN returned by F32Abs is fully defined. Do not
                 // adjust.
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::F64Abs => {
                 let (v, i) = state.pop1_extra()?;
@@ -3493,7 +3559,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap();
                 // The exact NaN returned by F64Abs is fully defined. Do not
                 // adjust.
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::F32x4Abs => {
                 let (v, i) = state.pop1_extra()?;
@@ -3511,7 +3577,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 // The exact NaN returned by F32x4Abs is fully defined. Do not
                 // adjust.
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::F64x2Abs => {
                 let (v, i) = state.pop1_extra()?;
@@ -3525,7 +3591,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 // The exact NaN returned by F32x4Abs is fully defined. Do not
                 // adjust.
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::F32x4Neg => {
                 let (v, i) = state.pop1_extra()?;
@@ -3536,7 +3602,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 // The exact NaN returned by F32x4Neg is fully defined. Do not
                 // adjust.
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::F64x2Neg => {
                 let (v, i) = state.pop1_extra()?;
@@ -3547,7 +3613,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 // The exact NaN returned by F64x2Neg is fully defined. Do not
                 // adjust.
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::F32Neg | Operator::F64Neg => {
                 let (v, i) = state.pop1_extra()?;
@@ -3556,7 +3622,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let res = builder.build_float_neg(v, &state.var_name());
                 // The exact NaN returned by F32Neg and F64Neg are fully defined.
                 // Do not adjust.
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::F32Copysign => {
                 let ((mag, mag_info), (sgn, sgn_info)) = state.pop2_extra()?;
@@ -3569,7 +3635,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap();
                 // The exact NaN returned by F32Copysign is fully defined.
                 // Do not adjust.
-                state.push1(res);
+                state.push1_extra(res, mag_info.strip_pending());
             }
             Operator::F64Copysign => {
                 let ((mag, mag_info), (sgn, sgn_info)) = state.pop2_extra()?;
@@ -3582,7 +3648,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap();
                 // The exact NaN returned by F32Copysign is fully defined.
                 // Do not adjust.
-                state.push1(res);
+                state.push1_extra(res, mag_info.strip_pending());
             }
 
             /***************************
@@ -3594,7 +3660,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::EQ, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16Eq => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3628,7 +3697,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::NE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16Ne => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3662,7 +3734,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::SLT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16LtS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3730,7 +3805,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::SLE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16LeS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3764,7 +3842,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::ULE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16LeU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3798,7 +3879,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::SGT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16GtS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3832,7 +3916,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::UGT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16GtU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3900,7 +3987,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::UGE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16GeU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3940,7 +4030,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let cond =
                     builder.build_float_compare(FloatPredicate::OEQ, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::F32x4Eq => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3966,7 +4059,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let cond =
                     builder.build_float_compare(FloatPredicate::UNE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::F32x4Ne => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -3992,7 +4088,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let cond =
                     builder.build_float_compare(FloatPredicate::OLT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::F32x4Lt => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -4018,7 +4117,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let cond =
                     builder.build_float_compare(FloatPredicate::OLE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::F32x4Le => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -4044,7 +4146,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let cond =
                     builder.build_float_compare(FloatPredicate::OGT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::F32x4Gt => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -4070,7 +4175,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let cond =
                     builder.build_float_compare(FloatPredicate::OGE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::F32x4Ge => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -4108,7 +4216,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             Operator::I64ExtendUI32 => {
                 let v1 = state.pop1()?.into_int_value();
                 let res = builder.build_int_z_extend(v1, intrinsics.i64_ty, &state.var_name());
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::I32x4TruncSF32x4Sat => {
                 let v = state.pop1()?.into_int_value();
@@ -4383,23 +4491,23 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let (v, i) = state.pop1_extra()?;
                 let v = apply_pending_canonicalization(builder, intrinsics, v, i);
                 let ret = builder.build_bitcast(v, intrinsics.i32_ty, &state.var_name());
-                state.push1(ret);
+                state.push1_extra(ret, ExtraInfo::arithmetic_f32());
             }
             Operator::I64ReinterpretF64 => {
                 let (v, i) = state.pop1_extra()?;
                 let v = apply_pending_canonicalization(builder, intrinsics, v, i);
                 let ret = builder.build_bitcast(v, intrinsics.i64_ty, &state.var_name());
-                state.push1(ret);
+                state.push1_extra(ret, ExtraInfo::arithmetic_f64());
             }
             Operator::F32ReinterpretI32 => {
-                let v = state.pop1()?;
+                let (v, i) = state.pop1_extra()?;
                 let ret = builder.build_bitcast(v, intrinsics.f32_ty, &state.var_name());
-                state.push1(ret);
+                state.push1_extra(ret, i);
             }
             Operator::F64ReinterpretI64 => {
-                let v = state.pop1()?;
+                let (v, i) = state.pop1_extra()?;
                 let ret = builder.build_bitcast(v, intrinsics.f64_ty, &state.var_name());
-                state.push1(ret);
+                state.push1_extra(ret, i);
             }
 
             /***************************
@@ -4880,7 +4988,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     intrinsics.i32_ty,
                     &state.var_name(),
                 );
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f32());
             }
             Operator::I32Load16U { ref memarg } => {
                 let effective_address = resolve_memory_ptr(
@@ -4913,7 +5021,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     intrinsics.i32_ty,
                     &state.var_name(),
                 );
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f32());
             }
             Operator::I64Load8U { ref memarg } => {
                 let effective_address = resolve_memory_ptr(
@@ -4946,7 +5054,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     intrinsics.i64_ty,
                     &state.var_name(),
                 );
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f64());
             }
             Operator::I64Load16U { ref memarg } => {
                 let effective_address = resolve_memory_ptr(
@@ -4979,7 +5087,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     intrinsics.i64_ty,
                     &state.var_name(),
                 );
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f64());
             }
             Operator::I64Load32U { ref memarg } => {
                 let effective_address = resolve_memory_ptr(
@@ -5012,7 +5120,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     intrinsics.i64_ty,
                     &state.var_name(),
                 );
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f64());
             }
 
             Operator::I32Store8 { ref memarg } | Operator::I64Store8 { ref memarg } => {
@@ -5122,7 +5230,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_int_z_extend(res, intrinsics.i32_ty, "");
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16AllTrue
             | Operator::I16x8AllTrue
@@ -5151,7 +5262,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     &state.var_name(),
                 );
                 let res = builder.build_int_z_extend(res, intrinsics.i32_ty, "");
-                state.push1(res);
+                state.push1_extra(
+                    res,
+                    ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+                );
             }
             Operator::I8x16ExtractLaneS { lane } => {
                 let (v, i) = state.pop1_extra()?;
@@ -5171,7 +5285,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .build_extract_element(v, idx, &state.var_name())
                     .into_int_value();
                 let res = builder.build_int_z_extend(res, intrinsics.i32_ty, "");
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I16x8ExtractLaneS { lane } => {
                 let (v, i) = state.pop1_extra()?;
@@ -5191,35 +5305,45 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .build_extract_element(v, idx, &state.var_name())
                     .into_int_value();
                 let res = builder.build_int_z_extend(res, intrinsics.i32_ty, "");
-                state.push1(res);
+                state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I32x4ExtractLane { lane } => {
                 let (v, i) = state.pop1_extra()?;
                 let v = v128_into_i32x4(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_extract_element(v, idx, &state.var_name());
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::I64x2ExtractLane { lane } => {
                 let (v, i) = state.pop1_extra()?;
                 let v = v128_into_i64x2(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_extract_element(v, idx, &state.var_name());
-                state.push1(res);
+                state.push1_extra(res, i.strip_pending());
             }
             Operator::F32x4ExtractLane { lane } => {
                 let (v, i) = state.pop1_extra()?;
                 let v = v128_into_f32x4(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_extract_element(v, idx, &state.var_name());
-                state.push1(res);
+                let i = if i.has_pending_f64_nan() {
+                    i.strip_pending()
+                } else {
+                    i
+                };
+                state.push1_extra(res, i);
             }
             Operator::F64x2ExtractLane { lane } => {
                 let (v, i) = state.pop1_extra()?;
                 let v = v128_into_f64x2(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_extract_element(v, idx, &state.var_name());
-                state.push1(res);
+                let i = if i.has_pending_f32_nan() {
+                    i.strip_pending()
+                } else {
+                    i
+                };
+                state.push1_extra(res, i);
             }
             Operator::I8x16ReplaceLane { lane } => {
                 let ((v1, i1), (v2, _)) = state.pop2_extra()?;
@@ -5242,22 +5366,22 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32x4ReplaceLane { lane } => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
                 let v2 = v2.into_int_value();
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1(res);
+                state.push1_extra(res, i1 & i2 & ExtraInfo::arithmetic_f32());
             }
             Operator::I64x2ReplaceLane { lane } => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let v1 = v128_into_i64x2(builder, intrinsics, v1, i1);
                 let v2 = v2.into_int_value();
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1(res);
+                state.push1_extra(res, i1 & i2 & ExtraInfo::arithmetic_f64());
             }
             Operator::F32x4ReplaceLane { lane } => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -5267,7 +5391,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1(res);
+                state.push1_extra(res, i1 & i2 & ExtraInfo::arithmetic_f32());
             }
             Operator::F64x2ReplaceLane { lane } => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -5277,7 +5401,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1(res);
+                state.push1_extra(res, i1 & i2 & ExtraInfo::arithmetic_f64());
             }
             Operator::V8x16Swizzle => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
@@ -5601,7 +5725,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 tbaa_label(self.module.clone(), intrinsics, "memory", load, Some(0));
                 let result =
                     builder.build_int_z_extend(narrow_result, intrinsics.i32_ty, &state.var_name());
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicLoad16U { ref memarg } => {
                 let effective_address = resolve_memory_ptr(
@@ -5634,7 +5758,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 tbaa_label(self.module.clone(), intrinsics, "memory", load, Some(0));
                 let result =
                     builder.build_int_z_extend(narrow_result, intrinsics.i32_ty, &state.var_name());
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f32());
             }
             Operator::I64AtomicLoad8U { ref memarg } => {
                 let effective_address = resolve_memory_ptr(
@@ -5667,7 +5791,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 tbaa_label(self.module.clone(), intrinsics, "memory", load, Some(0));
                 let result =
                     builder.build_int_z_extend(narrow_result, intrinsics.i64_ty, &state.var_name());
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicLoad16U { ref memarg } => {
                 let effective_address = resolve_memory_ptr(
@@ -5700,7 +5824,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 tbaa_label(self.module.clone(), intrinsics, "memory", load, Some(0));
                 let result =
                     builder.build_int_z_extend(narrow_result, intrinsics.i64_ty, &state.var_name());
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicLoad32U { ref memarg } => {
                 let effective_address = resolve_memory_ptr(
@@ -5733,7 +5857,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 tbaa_label(self.module.clone(), intrinsics, "memory", load, Some(0));
                 let result =
                     builder.build_int_z_extend(narrow_result, intrinsics.i64_ty, &state.var_name());
-                state.push1(result);
+                state.push1_extra(result, ExtraInfo::arithmetic_f64());
             }
             Operator::I32AtomicStore { ref memarg } => {
                 let value = state.pop1()?;
@@ -5927,7 +6051,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmw16UAdd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -5969,7 +6093,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmwAdd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6050,7 +6174,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw16UAdd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6092,7 +6216,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw32UAdd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6134,7 +6258,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmwAdd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6215,7 +6339,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmw16USub { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6257,7 +6381,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmwSub { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6338,7 +6462,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I64AtomicRmw16USub { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6380,7 +6504,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw32USub { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6422,7 +6546,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmwSub { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6503,7 +6627,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmw16UAnd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6545,7 +6669,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmwAnd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6626,7 +6750,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw16UAnd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6668,7 +6792,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw32UAnd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6710,7 +6834,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmwAnd { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6791,7 +6915,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmw16UOr { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6833,7 +6957,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmwOr { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6873,7 +6997,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I64AtomicRmw8UOr { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6915,7 +7039,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw16UOr { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6957,7 +7081,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw32UOr { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -6999,7 +7123,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmwOr { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7080,7 +7204,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmw16UXor { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7122,7 +7246,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmwXor { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7203,7 +7327,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw16UXor { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7245,7 +7369,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw32UXor { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7287,7 +7411,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmwXor { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7368,7 +7492,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmw16UXchg { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7410,7 +7534,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmwXchg { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7491,7 +7615,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw16UXchg { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7533,7 +7657,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw32UXchg { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7575,7 +7699,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     Some(0),
                 );
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmwXchg { ref memarg } => {
                 let value = state.pop1()?.into_int_value();
@@ -7664,7 +7788,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap()
                     .into_int_value();
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmw16UCmpxchg { ref memarg } => {
                 let (cmp, new) = state.pop2()?;
@@ -7714,7 +7838,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap()
                     .into_int_value();
                 let old = builder.build_int_z_extend(old, intrinsics.i32_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmwCmpxchg { ref memarg } => {
                 let (cmp, new) = state.pop2()?;
@@ -7806,7 +7930,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap()
                     .into_int_value();
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw16UCmpxchg { ref memarg } => {
                 let (cmp, new) = state.pop2()?;
@@ -7856,7 +7980,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap()
                     .into_int_value();
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw32UCmpxchg { ref memarg } => {
                 let (cmp, new) = state.pop2()?;
@@ -7906,7 +8030,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                     .unwrap()
                     .into_int_value();
                 let old = builder.build_int_z_extend(old, intrinsics.i64_ty, &state.var_name());
-                state.push1(old);
+                state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmwCmpxchg { ref memarg } => {
                 let (cmp, new) = state.pop2()?;
@@ -8391,3 +8515,15 @@ impl ModuleCodeGenerator<LLVMFunctionCodeGenerator, LLVMBackend, CodegenError>
         })
     }
 }
+
+fn is_f32_arithmetic(bits: u32) -> bool {
+    // Mask off sign bit.
+    let bits = bits & 0x7FFF_FFFF;
+    bits < 0x7FC0_0000
+}
+
+fn is_f64_arithmetic(bits: u64) -> bool {
+    // Mask off sign bit.
+    let bits = bits & 0x7FFF_FFFF_FFFF_FFFF;
+    bits < 0x7FF8_0000_0000_0000
+}
diff --git a/lib/llvm-backend/src/state.rs b/lib/llvm-backend/src/state.rs
index c051dde0bd9..072d43324a2 100644
--- a/lib/llvm-backend/src/state.rs
+++ b/lib/llvm-backend/src/state.rs
@@ -4,7 +4,7 @@ use inkwell::{
 };
 use smallvec::SmallVec;
 use std::cell::Cell;
-use std::ops::Add;
+use std::ops::{BitAnd, BitOr, BitOrAssign};
 use wasmparser::BinaryReaderError;
 
 #[derive(Debug)]
@@ -111,18 +111,26 @@ impl ExtraInfo {
     pub fn is_arithmetic_f64(&self) -> bool {
         self.state & ExtraInfo::arithmetic_f64().state != 0
     }
+
+    pub fn strip_pending(&self) -> ExtraInfo {
+        ExtraInfo {
+            state: self.state
+                & !(ExtraInfo::arithmetic_f32().state | ExtraInfo::arithmetic_f64().state),
+        }
+    }
 }
 impl Default for ExtraInfo {
     fn default() -> Self {
         ExtraInfo { state: 0 }
     }
 }
-impl Add for ExtraInfo {
+// Union two ExtraInfos.
+impl BitOr for ExtraInfo {
     type Output = Self;
 
-    fn add(self, other: Self) -> Self {
-        assert!(self.has_pending_f32_nan() && other.has_pending_f64_nan());
-        assert!(self.has_pending_f64_nan() && other.has_pending_f32_nan());
+    fn bitor(self, other: Self) -> Self {
+        assert!(!(self.has_pending_f32_nan() && other.has_pending_f64_nan()));
+        assert!(!(self.has_pending_f64_nan() && other.has_pending_f32_nan()));
         ExtraInfo {
             state: if self.is_arithmetic_f32() || other.is_arithmetic_f32() {
                 ExtraInfo::arithmetic_f32().state
@@ -140,6 +148,29 @@ impl Add for ExtraInfo {
         }
     }
 }
+impl BitOrAssign for ExtraInfo {
+    fn bitor_assign(&mut self, other: Self) {
+        *self = *self | other;
+    }
+}
+
+// Intersection for ExtraInfo. Does not check the "pending" bits, since those
+// aren't safe to discard (or even to reorder). Callers are assumed to be in a
+// situation where the result will have a pending bit set unconditionally.
+impl BitAnd for ExtraInfo {
+    type Output = Self;
+    fn bitand(self, other: Self) -> Self {
+        match (
+            self.is_arithmetic_f32() && other.is_arithmetic_f32(),
+            self.is_arithmetic_f64() && other.is_arithmetic_f64(),
+        ) {
+            (false, false) => Default::default(),
+            (true, false) => ExtraInfo::arithmetic_f32(),
+            (false, true) => ExtraInfo::arithmetic_f64(),
+            (true, true) => ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
+        }
+    }
+}
 
 #[derive(Debug)]
 pub struct State {
@@ -251,15 +282,6 @@ impl State {
         Ok((v1, v2))
     }
 
-    pub fn pop3(
-        &mut self,
-    ) -> Result<(BasicValueEnum, BasicValueEnum, BasicValueEnum), BinaryReaderError> {
-        let v3 = self.pop1()?;
-        let v2 = self.pop1()?;
-        let v1 = self.pop1()?;
-        Ok((v1, v2, v3))
-    }
-
     pub fn pop3_extra(
         &mut self,
     ) -> Result<

From 284948b6d4c0603a1a425ed8a200b4e3103f1494 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nick@wasmer.io>
Date: Wed, 6 Nov 2019 20:45:50 -0800
Subject: [PATCH 3/8] Refactor so as to convert ExtraInfo when potentially
 canonicalizing.

It seemed like a good idea at the time, but in practice we discard the extra info all or almost all of the time.

This also introduces a new bug. In an operation like multiply, it's valid to multiply two values, one with a pending NaN and one without. As written, in the SIMD case (because of the two kinds of pending in play), we assert.
---
 lib/llvm-backend/src/code.rs  | 745 ++++++++++++++++++++--------------
 lib/llvm-backend/src/state.rs |  20 +-
 2 files changed, 465 insertions(+), 300 deletions(-)

diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs
index 5b8d078cc68..0bbc8e1b013 100644
--- a/lib/llvm-backend/src/code.rs
+++ b/lib/llvm-backend/src/code.rs
@@ -387,19 +387,28 @@ fn v128_into_int_vec(
     value: BasicValueEnum,
     info: ExtraInfo,
     int_vec_ty: VectorType,
-) -> VectorValue {
-    let value = if info.has_pending_f32_nan() {
+) -> (VectorValue, ExtraInfo) {
+    let (value, info) = if info.has_pending_f32_nan() {
         let value = builder.build_bitcast(value, intrinsics.f32x4_ty, "");
-        canonicalize_nans(builder, intrinsics, value)
+        (
+            canonicalize_nans(builder, intrinsics, value),
+            info.strip_pending(),
+        )
     } else if info.has_pending_f64_nan() {
         let value = builder.build_bitcast(value, intrinsics.f64x2_ty, "");
-        canonicalize_nans(builder, intrinsics, value)
+        (
+            canonicalize_nans(builder, intrinsics, value),
+            info.strip_pending(),
+        )
     } else {
-        value
+        (value, info)
     };
-    builder
-        .build_bitcast(value, int_vec_ty, "")
-        .into_vector_value()
+    (
+        builder
+            .build_bitcast(value, int_vec_ty, "")
+            .into_vector_value(),
+        info,
+    )
 }
 
 fn v128_into_i8x16(
@@ -407,7 +416,7 @@ fn v128_into_i8x16(
     intrinsics: &Intrinsics,
     value: BasicValueEnum,
     info: ExtraInfo,
-) -> VectorValue {
+) -> (VectorValue, ExtraInfo) {
     v128_into_int_vec(builder, intrinsics, value, info, intrinsics.i8x16_ty)
 }
 
@@ -416,7 +425,7 @@ fn v128_into_i16x8(
     intrinsics: &Intrinsics,
     value: BasicValueEnum,
     info: ExtraInfo,
-) -> VectorValue {
+) -> (VectorValue, ExtraInfo) {
     v128_into_int_vec(builder, intrinsics, value, info, intrinsics.i16x8_ty)
 }
 
@@ -425,7 +434,7 @@ fn v128_into_i32x4(
     intrinsics: &Intrinsics,
     value: BasicValueEnum,
     info: ExtraInfo,
-) -> VectorValue {
+) -> (VectorValue, ExtraInfo) {
     v128_into_int_vec(builder, intrinsics, value, info, intrinsics.i32x4_ty)
 }
 
@@ -434,7 +443,7 @@ fn v128_into_i64x2(
     intrinsics: &Intrinsics,
     value: BasicValueEnum,
     info: ExtraInfo,
-) -> VectorValue {
+) -> (VectorValue, ExtraInfo) {
     v128_into_int_vec(builder, intrinsics, value, info, intrinsics.i64x2_ty)
 }
 
@@ -445,16 +454,22 @@ fn v128_into_f32x4(
     intrinsics: &Intrinsics,
     value: BasicValueEnum,
     info: ExtraInfo,
-) -> VectorValue {
-    let value = if info.has_pending_f64_nan() {
+) -> (VectorValue, ExtraInfo) {
+    let (value, info) = if info.has_pending_f64_nan() {
         let value = builder.build_bitcast(value, intrinsics.f64x2_ty, "");
-        canonicalize_nans(builder, intrinsics, value)
+        (
+            canonicalize_nans(builder, intrinsics, value),
+            info.strip_pending(),
+        )
     } else {
-        value
+        (value, info)
     };
-    builder
-        .build_bitcast(value, intrinsics.f32x4_ty, "")
-        .into_vector_value()
+    (
+        builder
+            .build_bitcast(value, intrinsics.f32x4_ty, "")
+            .into_vector_value(),
+        info,
+    )
 }
 
 // If the value is pending a 32-bit canonicalization, do it now.
@@ -464,16 +479,22 @@ fn v128_into_f64x2(
     intrinsics: &Intrinsics,
     value: BasicValueEnum,
     info: ExtraInfo,
-) -> VectorValue {
-    let value = if info.has_pending_f32_nan() {
+) -> (VectorValue, ExtraInfo) {
+    let (value, info) = if info.has_pending_f32_nan() {
         let value = builder.build_bitcast(value, intrinsics.f32x4_ty, "");
-        canonicalize_nans(builder, intrinsics, value)
+        (
+            canonicalize_nans(builder, intrinsics, value),
+            info.strip_pending(),
+        )
     } else {
-        value
+        (value, info)
     };
-    builder
-        .build_bitcast(value, intrinsics.f64x2_ty, "")
-        .into_vector_value()
+    (
+        builder
+            .build_bitcast(value, intrinsics.f64x2_ty, "")
+            .into_vector_value(),
+        info,
+    )
 }
 
 fn apply_pending_canonicalization(
@@ -1737,7 +1758,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 );
                 let res = builder.build_select(cond_value, v1, v2, &state.var_name());
                 let info = {
-                    let mut info = i1 & i2;
+                    let mut info = i1.strip_pending() & i2.strip_pending();
                     if i1.has_pending_f32_nan() {
                         assert!(i2.has_pending_f32_nan());
                         info |= ExtraInfo::pending_f32_nan();
@@ -2120,49 +2141,55 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
              * https://github.com/sunfishcode/wasm-reference-manual/blob/master/WebAssembly.md#integer-arithmetic-instructions
              ***************************/
             Operator::I32Add | Operator::I64Add => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let res = builder.build_int_add(v1, v2, &state.var_name());
                 state.push1(res);
             }
             Operator::I8x16Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I16x8Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32x4Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I64x2Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_int_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I8x16AddSaturateS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1).as_basic_value_enum();
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2).as_basic_value_enum();
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder
-                    .build_call(intrinsics.sadd_sat_i8x16, &[v1, v2], &state.var_name())
+                    .build_call(
+                        intrinsics.sadd_sat_i8x16,
+                        &[v1.as_basic_value_enum(), v2.as_basic_value_enum()],
+                        &state.var_name(),
+                    )
                     .try_as_basic_value()
                     .left()
                     .unwrap();
@@ -2171,10 +2198,14 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8AddSaturateS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1).as_basic_value_enum();
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2).as_basic_value_enum();
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder
-                    .build_call(intrinsics.sadd_sat_i16x8, &[v1, v2], &state.var_name())
+                    .build_call(
+                        intrinsics.sadd_sat_i16x8,
+                        &[v1.as_basic_value_enum(), v2.as_basic_value_enum()],
+                        &state.var_name(),
+                    )
                     .try_as_basic_value()
                     .left()
                     .unwrap();
@@ -2183,10 +2214,14 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16AddSaturateU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1).as_basic_value_enum();
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2).as_basic_value_enum();
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder
-                    .build_call(intrinsics.uadd_sat_i8x16, &[v1, v2], &state.var_name())
+                    .build_call(
+                        intrinsics.uadd_sat_i8x16,
+                        &[v1.as_basic_value_enum(), v2.as_basic_value_enum()],
+                        &state.var_name(),
+                    )
                     .try_as_basic_value()
                     .left()
                     .unwrap();
@@ -2195,10 +2230,14 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8AddSaturateU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1).as_basic_value_enum();
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2).as_basic_value_enum();
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder
-                    .build_call(intrinsics.uadd_sat_i16x8, &[v1, v2], &state.var_name())
+                    .build_call(
+                        intrinsics.uadd_sat_i16x8,
+                        &[v1.as_basic_value_enum(), v2.as_basic_value_enum()],
+                        &state.var_name(),
+                    )
                     .try_as_basic_value()
                     .left()
                     .unwrap();
@@ -2206,49 +2245,55 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32Sub | Operator::I64Sub => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let res = builder.build_int_sub(v1, v2, &state.var_name());
                 state.push1(res);
             }
             Operator::I8x16Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I16x8Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32x4Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I64x2Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_int_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I8x16SubSaturateS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1).as_basic_value_enum();
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2).as_basic_value_enum();
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder
-                    .build_call(intrinsics.ssub_sat_i8x16, &[v1, v2], &state.var_name())
+                    .build_call(
+                        intrinsics.ssub_sat_i8x16,
+                        &[v1.as_basic_value_enum(), v2.as_basic_value_enum()],
+                        &state.var_name(),
+                    )
                     .try_as_basic_value()
                     .left()
                     .unwrap();
@@ -2257,10 +2302,14 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8SubSaturateS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1).as_basic_value_enum();
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2).as_basic_value_enum();
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder
-                    .build_call(intrinsics.ssub_sat_i16x8, &[v1, v2], &state.var_name())
+                    .build_call(
+                        intrinsics.ssub_sat_i16x8,
+                        &[v1.as_basic_value_enum(), v2.as_basic_value_enum()],
+                        &state.var_name(),
+                    )
                     .try_as_basic_value()
                     .left()
                     .unwrap();
@@ -2269,10 +2318,14 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16SubSaturateU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1).as_basic_value_enum();
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2).as_basic_value_enum();
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder
-                    .build_call(intrinsics.usub_sat_i8x16, &[v1, v2], &state.var_name())
+                    .build_call(
+                        intrinsics.usub_sat_i8x16,
+                        &[v1.as_basic_value_enum(), v2.as_basic_value_enum()],
+                        &state.var_name(),
+                    )
                     .try_as_basic_value()
                     .left()
                     .unwrap();
@@ -2281,10 +2334,14 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8SubSaturateU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1).as_basic_value_enum();
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2).as_basic_value_enum();
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder
-                    .build_call(intrinsics.usub_sat_i16x8, &[v1, v2], &state.var_name())
+                    .build_call(
+                        intrinsics.usub_sat_i16x8,
+                        &[v1.as_basic_value_enum(), v2.as_basic_value_enum()],
+                        &state.var_name(),
+                    )
                     .try_as_basic_value()
                     .left()
                     .unwrap();
@@ -2292,37 +2349,41 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32Mul | Operator::I64Mul => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let res = builder.build_int_mul(v1, v2, &state.var_name());
                 state.push1(res);
             }
             Operator::I8x16Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I16x8Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32x4Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32DivS | Operator::I64DivS => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
 
                 trap_if_zero_or_overflow(builder, intrinsics, context, &function, v1, v2);
@@ -2331,7 +2392,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32DivU | Operator::I64DivU => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
 
                 trap_if_zero(builder, intrinsics, context, &function, v2);
@@ -2340,7 +2403,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32RemS | Operator::I64RemS => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let int_type = v1.get_type();
                 let (min_value, neg_one_value) = if int_type == intrinsics.i32_ty {
@@ -2385,7 +2450,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32RemU | Operator::I64RemU => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
 
                 trap_if_zero(builder, intrinsics, context, &function, v2);
@@ -2436,15 +2503,18 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32Shl | Operator::I64Shl => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 // TODO: missing 'and' of v2?
                 let res = builder.build_left_shift(v1, v2, &state.var_name());
                 state.push1(res);
             }
             Operator::I8x16Shl => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(7, false), "");
                 let v2 = builder.build_int_truncate(v2, intrinsics.i8_ty, "");
@@ -2460,8 +2530,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I16x8Shl => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(15, false), "");
                 let v2 = builder.build_int_truncate(v2, intrinsics.i16_ty, "");
@@ -2477,8 +2548,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32x4Shl => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(31, false), "");
                 let v2 = splat_vector(
@@ -2493,8 +2565,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I64x2Shl => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(63, false), "");
                 let v2 = builder.build_int_z_extend(v2, intrinsics.i64_ty, "");
@@ -2510,15 +2583,18 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32ShrS | Operator::I64ShrS => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 // TODO: check wasm spec, is this missing v2 mod LaneBits?
                 let res = builder.build_right_shift(v1, v2, true, &state.var_name());
                 state.push1(res);
             }
             Operator::I8x16ShrS => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(7, false), "");
                 let v2 = builder.build_int_truncate(v2, intrinsics.i8_ty, "");
@@ -2534,8 +2610,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I16x8ShrS => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(15, false), "");
                 let v2 = builder.build_int_truncate(v2, intrinsics.i16_ty, "");
@@ -2551,8 +2628,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32x4ShrS => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(31, false), "");
                 let v2 = splat_vector(
@@ -2567,8 +2645,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I64x2ShrS => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(63, false), "");
                 let v2 = builder.build_int_z_extend(v2, intrinsics.i64_ty, "");
@@ -2584,14 +2663,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32ShrU | Operator::I64ShrU => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let res = builder.build_right_shift(v1, v2, false, &state.var_name());
                 state.push1(res);
             }
             Operator::I8x16ShrU => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(7, false), "");
                 let v2 = builder.build_int_truncate(v2, intrinsics.i8_ty, "");
@@ -2607,8 +2689,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I16x8ShrU => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(15, false), "");
                 let v2 = builder.build_int_truncate(v2, intrinsics.i16_ty, "");
@@ -2624,8 +2707,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32x4ShrU => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(31, false), "");
                 let v2 = splat_vector(
@@ -2640,8 +2724,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I64x2ShrU => {
-                let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let (v1, _) = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_and(v2, intrinsics.i32_ty.const_int(63, false), "");
                 let v2 = builder.build_int_z_extend(v2, intrinsics.i64_ty, "");
@@ -2657,7 +2742,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32Rotl => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let lhs = builder.build_left_shift(v1, v2, &state.var_name());
                 let rhs = {
@@ -2669,7 +2756,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I64Rotl => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let lhs = builder.build_left_shift(v1, v2, &state.var_name());
                 let rhs = {
@@ -2681,7 +2770,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32Rotr => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let lhs = builder.build_right_shift(v1, v2, false, &state.var_name());
                 let rhs = {
@@ -2693,7 +2784,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I64Rotr => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let lhs = builder.build_right_shift(v1, v2, false, &state.var_name());
                 let rhs = {
@@ -2705,7 +2798,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32Clz => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
+                let input = apply_pending_canonicalization(builder, intrinsics, input, info);
                 let is_zero_undef = intrinsics.i1_zero.as_basic_value_enum();
                 let res = builder
                     .build_call(
@@ -2719,7 +2813,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I64Clz => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
+                let input = apply_pending_canonicalization(builder, intrinsics, input, info);
                 let is_zero_undef = intrinsics.i1_zero.as_basic_value_enum();
                 let res = builder
                     .build_call(
@@ -2733,7 +2828,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::I32Ctz => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
+                let input = apply_pending_canonicalization(builder, intrinsics, input, info);
                 let is_zero_undef = intrinsics.i1_zero.as_basic_value_enum();
                 let res = builder
                     .build_call(
@@ -2747,7 +2843,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I64Ctz => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
+                let input = apply_pending_canonicalization(builder, intrinsics, input, info);
                 let is_zero_undef = intrinsics.i1_zero.as_basic_value_enum();
                 let res = builder
                     .build_call(
@@ -2761,7 +2858,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::I32Popcnt => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
+                let input = apply_pending_canonicalization(builder, intrinsics, input, info);
                 let res = builder
                     .build_call(intrinsics.ctpop_i32, &[input], &state.var_name())
                     .try_as_basic_value()
@@ -2770,7 +2868,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(res, ExtraInfo::arithmetic_f32());
             }
             Operator::I64Popcnt => {
-                let input = state.pop1()?;
+                let (input, info) = state.pop1_extra()?;
+                let input = apply_pending_canonicalization(builder, intrinsics, input, info);
                 let res = builder
                     .build_call(intrinsics.ctpop_i64, &[input], &state.var_name())
                     .try_as_basic_value()
@@ -2808,27 +2907,29 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             Operator::F32Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
+                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
+                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
@@ -2836,27 +2937,29 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             Operator::F32Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
+                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
+                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
@@ -2864,27 +2967,29 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             Operator::F32Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
+                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
+                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
             }
             Operator::F32x4Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
@@ -2903,16 +3008,16 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4Div => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_div(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1_extra(res, ExtraInfo::pending_f32_nan());
             }
             Operator::F64x2Div => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_div(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1_extra(res, ExtraInfo::pending_f64_nan());
@@ -2937,7 +3042,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4Sqrt => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_f32x4(builder, intrinsics, v, i);
+                let (v, _) = v128_into_f32x4(builder, intrinsics, v, i);
                 let res = builder
                     .build_call(
                         intrinsics.sqrt_f32x4,
@@ -2952,7 +3057,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F64x2Sqrt => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_f64x2(builder, intrinsics, v, i);
+                let (v, _) = v128_into_f64x2(builder, intrinsics, v, i);
                 let res = builder
                     .build_call(
                         intrinsics.sqrt_f64x2,
@@ -3078,8 +3183,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 // intrinsic would, but x86 lowering of that intrinsic
                 // encounters a fatal error in LLVM 8 and LLVM 9.
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
 
                 // To detect min(-0.0, 0.0), we check whether the integer
                 // representations are equal. There's one other case where that
@@ -3144,8 +3249,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 // intrinsic would, but x86 lowering of that intrinsic
                 // encounters a fatal error in LLVM 8 and LLVM 9.
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
 
                 // To detect min(-0.0, 0.0), we check whether the integer
                 // representations are equal. There's one other case where that
@@ -3316,8 +3421,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 // intrinsic would, but x86 lowering of that intrinsic
                 // encounters a fatal error in LLVM 8 and LLVM 9.
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
 
                 // To detect min(-0.0, 0.0), we check whether the integer
                 // representations are equal. There's one other case where that
@@ -3381,8 +3486,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 // intrinsic would, but x86 lowering of that intrinsic
                 // encounters a fatal error in LLVM 8 and LLVM 9.
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
 
                 // To detect min(-0.0, 0.0), we check whether the integer
                 // representations are equal. There's one other case where that
@@ -3656,7 +3761,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
              * https://github.com/sunfishcode/wasm-reference-manual/blob/master/WebAssembly.md#integer-comparison-instructions
              ***************************/
             Operator::I32Eq | Operator::I64Eq => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::EQ, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3667,8 +3774,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16Eq => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::EQ, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3676,8 +3783,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8Eq => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::EQ, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3685,15 +3792,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4Eq => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::EQ, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32Ne | Operator::I64Ne => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::NE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3704,8 +3813,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16Ne => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::NE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3713,8 +3822,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8Ne => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::NE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3722,15 +3831,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4Ne => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::NE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32LtS | Operator::I64LtS => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::SLT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3741,8 +3852,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16LtS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SLT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3750,8 +3861,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8LtS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SLT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3759,15 +3870,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4LtS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SLT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32LtU | Operator::I64LtU => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::ULT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3775,8 +3888,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16LtU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::ULT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3784,8 +3897,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8LtU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::ULT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3793,15 +3906,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4LtU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::ULT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32LeS | Operator::I64LeS => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::SLE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3812,8 +3927,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16LeS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SLE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3821,8 +3936,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8LeS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SLE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3830,15 +3945,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4LeS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SLE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32LeU | Operator::I64LeU => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::ULE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3849,8 +3966,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16LeU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::ULE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3858,8 +3975,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8LeU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::ULE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3867,15 +3984,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4LeU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::ULE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32GtS | Operator::I64GtS => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::SGT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3886,8 +4005,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16GtS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SGT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3895,8 +4014,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8GtS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SGT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3904,15 +4023,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4GtS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SGT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32GtU | Operator::I64GtU => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::UGT, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3923,8 +4044,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16GtU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::UGT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3932,8 +4053,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8GtU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::UGT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3941,15 +4062,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4GtU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::UGT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32GeS | Operator::I64GeS => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::SGE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3957,8 +4080,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16GeS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SGE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3966,8 +4089,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8GeS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SGE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -3975,15 +4098,17 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4GeS => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::SGE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32GeU | Operator::I64GeU => {
-                let (v1, v2) = state.pop2()?;
+                let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
+                let v1 = apply_pending_canonicalization(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let (v1, v2) = (v1.into_int_value(), v2.into_int_value());
                 let cond = builder.build_int_compare(IntPredicate::UGE, v1, v2, &state.var_name());
                 let res = builder.build_int_z_extend(cond, intrinsics.i32_ty, &state.var_name());
@@ -3994,8 +4119,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16GeU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i8x16(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i8x16(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::UGE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i8x16_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4003,8 +4128,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8GeU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i16x8(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i16x8(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::UGE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i16x8_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4012,8 +4137,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4GeU => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_i32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_i32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_int_compare(IntPredicate::UGE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4037,8 +4162,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4Eq => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4046,8 +4171,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F64x2Eq => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i64x2_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4066,8 +4191,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4Ne => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::UNE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4075,8 +4200,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F64x2Ne => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::UNE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i64x2_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4095,8 +4220,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4Lt => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OLT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4104,8 +4229,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F64x2Lt => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OLT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i64x2_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4124,8 +4249,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4Le => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OLE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4133,8 +4258,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F64x2Le => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OLE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i64x2_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4153,8 +4278,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4Gt => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OGT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4162,8 +4287,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F64x2Gt => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OGT, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i64x2_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4182,8 +4307,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4Ge => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f32x4(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OGE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i32x4_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4191,8 +4316,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F64x2Ge => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let (v1, _) = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v2, _) = v128_into_f64x2(builder, intrinsics, v2, i2);
                 let res = builder.build_float_compare(FloatPredicate::OGE, v1, v2, "");
                 let res = builder.build_int_s_extend(res, intrinsics.i64x2_ty, "");
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -4204,22 +4329,30 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
              * https://github.com/sunfishcode/wasm-reference-manual/blob/master/WebAssembly.md#conversion-instructions
              ***************************/
             Operator::I32WrapI64 => {
-                let v1 = state.pop1()?.into_int_value();
-                let res = builder.build_int_truncate(v1, intrinsics.i32_ty, &state.var_name());
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
+                let res = builder.build_int_truncate(v, intrinsics.i32_ty, &state.var_name());
                 state.push1(res);
             }
             Operator::I64ExtendSI32 => {
-                let v1 = state.pop1()?.into_int_value();
-                let res = builder.build_int_s_extend(v1, intrinsics.i64_ty, &state.var_name());
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
+                let res = builder.build_int_s_extend(v, intrinsics.i64_ty, &state.var_name());
                 state.push1(res);
             }
             Operator::I64ExtendUI32 => {
-                let v1 = state.pop1()?.into_int_value();
-                let res = builder.build_int_z_extend(v1, intrinsics.i64_ty, &state.var_name());
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
+                let res = builder.build_int_z_extend(v, intrinsics.i64_ty, &state.var_name());
                 state.push1_extra(res, ExtraInfo::arithmetic_f64());
             }
             Operator::I32x4TruncSF32x4Sat => {
-                let v = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
                 let res = trunc_sat(
                     builder,
                     intrinsics,
@@ -4235,7 +4368,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I32x4TruncUF32x4Sat => {
-                let v = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
                 let res = trunc_sat(
                     builder,
                     intrinsics,
@@ -4251,7 +4386,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I64x2TruncSF64x2Sat => {
-                let v = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
                 let res = trunc_sat(
                     builder,
                     intrinsics,
@@ -4267,7 +4404,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(res);
             }
             Operator::I64x2TruncUF64x2Sat => {
-                let v = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
                 let res = trunc_sat(
                     builder,
                     intrinsics,
@@ -4424,27 +4563,35 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(res, ExtraInfo::pending_f64_nan());
             }
             Operator::F32ConvertSI32 | Operator::F32ConvertSI64 => {
-                let v1 = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
                 let res =
-                    builder.build_signed_int_to_float(v1, intrinsics.f32_ty, &state.var_name());
+                    builder.build_signed_int_to_float(v, intrinsics.f32_ty, &state.var_name());
                 state.push1(res);
             }
             Operator::F64ConvertSI32 | Operator::F64ConvertSI64 => {
-                let v1 = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
                 let res =
-                    builder.build_signed_int_to_float(v1, intrinsics.f64_ty, &state.var_name());
+                    builder.build_signed_int_to_float(v, intrinsics.f64_ty, &state.var_name());
                 state.push1(res);
             }
             Operator::F32ConvertUI32 | Operator::F32ConvertUI64 => {
-                let v1 = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
                 let res =
-                    builder.build_unsigned_int_to_float(v1, intrinsics.f32_ty, &state.var_name());
+                    builder.build_unsigned_int_to_float(v, intrinsics.f32_ty, &state.var_name());
                 state.push1(res);
             }
             Operator::F64ConvertUI32 | Operator::F64ConvertUI64 => {
-                let v1 = state.pop1()?.into_int_value();
+                let (v, i) = state.pop1_extra()?;
+                let v = apply_pending_canonicalization(builder, intrinsics, v, i);
+                let v = v.into_int_value();
                 let res =
-                    builder.build_unsigned_int_to_float(v1, intrinsics.f64_ty, &state.var_name());
+                    builder.build_unsigned_int_to_float(v, intrinsics.f64_ty, &state.var_name());
                 state.push1(res);
             }
             Operator::F32x4ConvertSI32x4 => {
@@ -5185,28 +5332,28 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16Neg => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i8x16(builder, intrinsics, v, i);
+                let (v, _) = v128_into_i8x16(builder, intrinsics, v, i);
                 let res = builder.build_int_sub(v.get_type().const_zero(), v, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I16x8Neg => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i16x8(builder, intrinsics, v, i);
+                let (v, _) = v128_into_i16x8(builder, intrinsics, v, i);
                 let res = builder.build_int_sub(v.get_type().const_zero(), v, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I32x4Neg => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i32x4(builder, intrinsics, v, i);
+                let (v, _) = v128_into_i32x4(builder, intrinsics, v, i);
                 let res = builder.build_int_sub(v.get_type().const_zero(), v, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
             }
             Operator::I64x2Neg => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i64x2(builder, intrinsics, v, i);
+                let (v, _) = v128_into_i64x2(builder, intrinsics, v, i);
                 let res = builder.build_int_sub(v.get_type().const_zero(), v, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
                 state.push1(res);
@@ -5221,8 +5368,8 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             | Operator::I16x8AnyTrue
             | Operator::I32x4AnyTrue
             | Operator::I64x2AnyTrue => {
-                let (v, i) = state.pop1_extra()?;
-                let v = apply_pending_canonicalization(builder, intrinsics, v, i).into_int_value();
+                // Skip canonicalization, it never changes non-zero values to zero or vice versa.
+                let v = state.pop1()?.into_int_value();
                 let res = builder.build_int_compare(
                     IntPredicate::NE,
                     v,
@@ -5269,7 +5416,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16ExtractLaneS { lane } => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i8x16(builder, intrinsics, v, i);
+                let (v, _) = v128_into_i8x16(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder
                     .build_extract_element(v, idx, &state.var_name())
@@ -5279,7 +5426,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I8x16ExtractLaneU { lane } => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i8x16(builder, intrinsics, v, i);
+                let (v, _) = v128_into_i8x16(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder
                     .build_extract_element(v, idx, &state.var_name())
@@ -5289,7 +5436,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8ExtractLaneS { lane } => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i16x8(builder, intrinsics, v, i);
+                let (v, _) = v128_into_i16x8(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder
                     .build_extract_element(v, idx, &state.var_name())
@@ -5299,7 +5446,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8ExtractLaneU { lane } => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i16x8(builder, intrinsics, v, i);
+                let (v, _) = v128_into_i16x8(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder
                     .build_extract_element(v, idx, &state.var_name())
@@ -5309,45 +5456,35 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4ExtractLane { lane } => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i32x4(builder, intrinsics, v, i);
+                let (v, i) = v128_into_i32x4(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_extract_element(v, idx, &state.var_name());
-                state.push1_extra(res, i.strip_pending());
+                state.push1_extra(res, i);
             }
             Operator::I64x2ExtractLane { lane } => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_i64x2(builder, intrinsics, v, i);
+                let (v, i) = v128_into_i64x2(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_extract_element(v, idx, &state.var_name());
-                state.push1_extra(res, i.strip_pending());
+                state.push1_extra(res, i);
             }
             Operator::F32x4ExtractLane { lane } => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_f32x4(builder, intrinsics, v, i);
+                let (v, i) = v128_into_f32x4(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_extract_element(v, idx, &state.var_name());
-                let i = if i.has_pending_f64_nan() {
-                    i.strip_pending()
-                } else {
-                    i
-                };
                 state.push1_extra(res, i);
             }
             Operator::F64x2ExtractLane { lane } => {
                 let (v, i) = state.pop1_extra()?;
-                let v = v128_into_f64x2(builder, intrinsics, v, i);
+                let (v, i) = v128_into_f64x2(builder, intrinsics, v, i);
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_extract_element(v, idx, &state.var_name());
-                let i = if i.has_pending_f32_nan() {
-                    i.strip_pending()
-                } else {
-                    i
-                };
                 state.push1_extra(res, i);
             }
             Operator::I8x16ReplaceLane { lane } => {
                 let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i8x16(builder, intrinsics, v1, i1);
+                let (v1, _) = v128_into_i8x16(builder, intrinsics, v1, i1);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_int_cast(v2, intrinsics.i8_ty, "");
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
@@ -5357,7 +5494,7 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I16x8ReplaceLane { lane } => {
                 let ((v1, i1), (v2, _)) = state.pop2_extra()?;
-                let v1 = v128_into_i16x8(builder, intrinsics, v1, i1);
+                let (v1, _) = v128_into_i16x8(builder, intrinsics, v1, i1);
                 let v2 = v2.into_int_value();
                 let v2 = builder.build_int_cast(v2, intrinsics.i16_ty, "");
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
@@ -5367,8 +5504,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I32x4ReplaceLane { lane } => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let (v1, i1) = v128_into_i32x4(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
+                let i2 = i2.strip_pending();
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -5376,8 +5515,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::I64x2ReplaceLane { lane } => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let (v1, i1) = v128_into_i64x2(builder, intrinsics, v1, i1);
+                let v2 = apply_pending_canonicalization(builder, intrinsics, v2, i2);
                 let v2 = v2.into_int_value();
+                let i2 = i2.strip_pending();
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -5385,9 +5526,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F32x4ReplaceLane { lane } => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f32x4(builder, intrinsics, v1, i1);
+                let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
                 let v2 =
                     apply_pending_canonicalization(builder, intrinsics, v2, i2).into_float_value();
+                let i2 = i2.strip_pending();
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -5395,9 +5537,10 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             }
             Operator::F64x2ReplaceLane { lane } => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
-                let v1 = v128_into_f64x2(builder, intrinsics, v1, i1);
+                let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
                 let v2 =
                     apply_pending_canonicalization(builder, intrinsics, v2, i2).into_float_value();
+                let i2 = i2.strip_pending();
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
@@ -7741,7 +7884,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(old);
             }
             Operator::I32AtomicRmw8UCmpxchg { ref memarg } => {
-                let (cmp, new) = state.pop2()?;
+                let ((cmp, cmp_info), (new, new_info)) = state.pop2_extra()?;
+                let cmp = apply_pending_canonicalization(builder, intrinsics, cmp, cmp_info);
+                let new = apply_pending_canonicalization(builder, intrinsics, new, new_info);
                 let (cmp, new) = (cmp.into_int_value(), new.into_int_value());
                 let effective_address = resolve_memory_ptr(
                     builder,
@@ -7791,7 +7936,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmw16UCmpxchg { ref memarg } => {
-                let (cmp, new) = state.pop2()?;
+                let ((cmp, cmp_info), (new, new_info)) = state.pop2_extra()?;
+                let cmp = apply_pending_canonicalization(builder, intrinsics, cmp, cmp_info);
+                let new = apply_pending_canonicalization(builder, intrinsics, new, new_info);
                 let (cmp, new) = (cmp.into_int_value(), new.into_int_value());
                 let effective_address = resolve_memory_ptr(
                     builder,
@@ -7841,7 +7988,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(old, ExtraInfo::arithmetic_f32());
             }
             Operator::I32AtomicRmwCmpxchg { ref memarg } => {
-                let (cmp, new) = state.pop2()?;
+                let ((cmp, cmp_info), (new, new_info)) = state.pop2_extra()?;
+                let cmp = apply_pending_canonicalization(builder, intrinsics, cmp, cmp_info);
+                let new = apply_pending_canonicalization(builder, intrinsics, new, new_info);
                 let (cmp, new) = (cmp.into_int_value(), new.into_int_value());
                 let effective_address = resolve_memory_ptr(
                     builder,
@@ -7883,7 +8032,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1(old);
             }
             Operator::I64AtomicRmw8UCmpxchg { ref memarg } => {
-                let (cmp, new) = state.pop2()?;
+                let ((cmp, cmp_info), (new, new_info)) = state.pop2_extra()?;
+                let cmp = apply_pending_canonicalization(builder, intrinsics, cmp, cmp_info);
+                let new = apply_pending_canonicalization(builder, intrinsics, new, new_info);
                 let (cmp, new) = (cmp.into_int_value(), new.into_int_value());
                 let effective_address = resolve_memory_ptr(
                     builder,
@@ -7933,7 +8084,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw16UCmpxchg { ref memarg } => {
-                let (cmp, new) = state.pop2()?;
+                let ((cmp, cmp_info), (new, new_info)) = state.pop2_extra()?;
+                let cmp = apply_pending_canonicalization(builder, intrinsics, cmp, cmp_info);
+                let new = apply_pending_canonicalization(builder, intrinsics, new, new_info);
                 let (cmp, new) = (cmp.into_int_value(), new.into_int_value());
                 let effective_address = resolve_memory_ptr(
                     builder,
@@ -7983,7 +8136,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmw32UCmpxchg { ref memarg } => {
-                let (cmp, new) = state.pop2()?;
+                let ((cmp, cmp_info), (new, new_info)) = state.pop2_extra()?;
+                let cmp = apply_pending_canonicalization(builder, intrinsics, cmp, cmp_info);
+                let new = apply_pending_canonicalization(builder, intrinsics, new, new_info);
                 let (cmp, new) = (cmp.into_int_value(), new.into_int_value());
                 let effective_address = resolve_memory_ptr(
                     builder,
@@ -8033,7 +8188,9 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 state.push1_extra(old, ExtraInfo::arithmetic_f64());
             }
             Operator::I64AtomicRmwCmpxchg { ref memarg } => {
-                let (cmp, new) = state.pop2()?;
+                let ((cmp, cmp_info), (new, new_info)) = state.pop2_extra()?;
+                let cmp = apply_pending_canonicalization(builder, intrinsics, cmp, cmp_info);
+                let new = apply_pending_canonicalization(builder, intrinsics, new, new_info);
                 let (cmp, new) = (cmp.into_int_value(), new.into_int_value());
                 let effective_address = resolve_memory_ptr(
                     builder,
diff --git a/lib/llvm-backend/src/state.rs b/lib/llvm-backend/src/state.rs
index 072d43324a2..82dbdbcf415 100644
--- a/lib/llvm-backend/src/state.rs
+++ b/lib/llvm-backend/src/state.rs
@@ -115,7 +115,7 @@ impl ExtraInfo {
     pub fn strip_pending(&self) -> ExtraInfo {
         ExtraInfo {
             state: self.state
-                & !(ExtraInfo::arithmetic_f32().state | ExtraInfo::arithmetic_f64().state),
+                & !(ExtraInfo::pending_f32_nan().state | ExtraInfo::pending_f64_nan().state),
         }
     }
 }
@@ -154,13 +154,14 @@ impl BitOrAssign for ExtraInfo {
     }
 }
 
-// Intersection for ExtraInfo. Does not check the "pending" bits, since those
-// aren't safe to discard (or even to reorder). Callers are assumed to be in a
-// situation where the result will have a pending bit set unconditionally.
+// Intersection for ExtraInfo.
 impl BitAnd for ExtraInfo {
     type Output = Self;
     fn bitand(self, other: Self) -> Self {
-        match (
+        // Pending canonicalizations are not safe to discard, or even reorder.
+        assert!(self.has_pending_f32_nan() == other.has_pending_f32_nan());
+        assert!(self.has_pending_f64_nan() == other.has_pending_f64_nan());
+        let info = match (
             self.is_arithmetic_f32() && other.is_arithmetic_f32(),
             self.is_arithmetic_f64() && other.is_arithmetic_f64(),
         ) {
@@ -168,7 +169,14 @@ impl BitAnd for ExtraInfo {
             (true, false) => ExtraInfo::arithmetic_f32(),
             (false, true) => ExtraInfo::arithmetic_f64(),
             (true, true) => ExtraInfo::arithmetic_f32() | ExtraInfo::arithmetic_f64(),
-        }
+        };
+        let info = match (self.has_pending_f32_nan(), self.has_pending_f64_nan()) {
+            (false, false) => info,
+            (true, false) => info | ExtraInfo::pending_f32_nan(),
+            (false, true) => info | ExtraInfo::pending_f64_nan(),
+            (true, true) => panic!(""),
+        };
+        info
     }
 }
 

From a06c858087f166e77d7fffe037b922d88e049497 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nick@wasmer.io>
Date: Wed, 6 Nov 2019 22:54:14 -0800
Subject: [PATCH 4/8] Make ExtraInfo bitand check for pending validity.

Unfortunately, this is quite buggy. For something as simple as F32Sub, to combine two ExtraInfos, we want to add a new pending_f32_nan(), unless both of the inputs are arithmetic_f32(). In this commit, we incorrectly calculate that we don't need a pending_f32_nan if either one of the inputs was arithmetic_f32().
---
 lib/llvm-backend/src/code.rs  | 118 ++++++++++++++++++++++++++--------
 lib/llvm-backend/src/state.rs |  12 +++-
 2 files changed, 102 insertions(+), 28 deletions(-)

diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs
index 0bbc8e1b013..392f931e299 100644
--- a/lib/llvm-backend/src/code.rs
+++ b/lib/llvm-backend/src/code.rs
@@ -2907,92 +2907,110 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             Operator::F32Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
+                let i1 = i1 | ExtraInfo::pending_f32_nan();
+                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_add(v1, v2, &state.var_name());
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F64Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
+                let i1 = i1 | ExtraInfo::pending_f64_nan();
+                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_add(v1, v2, &state.var_name());
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F32x4Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let i1 = i1 | ExtraInfo::pending_f32_nan();
+                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F64x2Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let i1 = i1 | ExtraInfo::pending_f64_nan();
+                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F32Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
+                let i1 = i1 | ExtraInfo::pending_f32_nan();
+                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F64Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
+                let i1 = i1 | ExtraInfo::pending_f64_nan();
+                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F32x4Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let i1 = i1 | ExtraInfo::pending_f32_nan();
+                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F64x2Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let i1 = i1 | ExtraInfo::pending_f64_nan();
+                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F32Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
+                let i1 = i1 | ExtraInfo::pending_f32_nan();
+                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F64Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let (i1, i2) = (i1.strip_pending(), i2.strip_pending());
+                let i1 = i1 | ExtraInfo::pending_f64_nan();
+                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F32x4Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
+                let i1 = i1 | ExtraInfo::pending_f32_nan();
+                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f32_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F64x2Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
+                let i1 = i1 | ExtraInfo::pending_f64_nan();
+                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, (i1 & i2) | ExtraInfo::pending_f64_nan());
+                state.push1_extra(res, i1 & i2);
             }
             Operator::F32Div => {
                 let (v1, v2) = state.pop2()?;
@@ -5527,24 +5545,72 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             Operator::F32x4ReplaceLane { lane } => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
-                let v2 =
-                    apply_pending_canonicalization(builder, intrinsics, v2, i2).into_float_value();
-                let i2 = i2.strip_pending();
+                let push_pending_f32_nan_to_result =
+                    i1.has_pending_f32_nan() && i2.has_pending_f32_nan();
+                let (v1, v2) = if !push_pending_f32_nan_to_result {
+                    (
+                        apply_pending_canonicalization(
+                            builder,
+                            intrinsics,
+                            v1.as_basic_value_enum(),
+                            i1,
+                        )
+                        .into_vector_value(),
+                        apply_pending_canonicalization(
+                            builder,
+                            intrinsics,
+                            v2.as_basic_value_enum(),
+                            i2,
+                        )
+                        .into_float_value(),
+                    )
+                } else {
+                    (v1, v2.into_float_value())
+                };
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, i1 & i2 & ExtraInfo::arithmetic_f32());
+                let info = if push_pending_f32_nan_to_result {
+                    ExtraInfo::pending_f32_nan()
+                } else {
+                    i1.strip_pending() & i2.strip_pending()
+                };
+                state.push1_extra(res, info);
             }
             Operator::F64x2ReplaceLane { lane } => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
-                let v2 =
-                    apply_pending_canonicalization(builder, intrinsics, v2, i2).into_float_value();
-                let i2 = i2.strip_pending();
+                let push_pending_f64_nan_to_result =
+                    i1.has_pending_f64_nan() && i2.has_pending_f64_nan();
+                let (v1, v2) = if !push_pending_f64_nan_to_result {
+                    (
+                        apply_pending_canonicalization(
+                            builder,
+                            intrinsics,
+                            v1.as_basic_value_enum(),
+                            i1,
+                        )
+                        .into_vector_value(),
+                        apply_pending_canonicalization(
+                            builder,
+                            intrinsics,
+                            v2.as_basic_value_enum(),
+                            i2,
+                        )
+                        .into_float_value(),
+                    )
+                } else {
+                    (v1, v2.into_float_value())
+                };
                 let idx = intrinsics.i32_ty.const_int(lane.into(), false);
                 let res = builder.build_insert_element(v1, v2, idx, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, i1 & i2 & ExtraInfo::arithmetic_f64());
+                let info = if push_pending_f64_nan_to_result {
+                    ExtraInfo::pending_f64_nan()
+                } else {
+                    i1.strip_pending() & i2.strip_pending()
+                };
+                state.push1_extra(res, info);
             }
             Operator::V8x16Swizzle => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
diff --git a/lib/llvm-backend/src/state.rs b/lib/llvm-backend/src/state.rs
index 82dbdbcf415..254b9b34442 100644
--- a/lib/llvm-backend/src/state.rs
+++ b/lib/llvm-backend/src/state.rs
@@ -159,8 +159,16 @@ impl BitAnd for ExtraInfo {
     type Output = Self;
     fn bitand(self, other: Self) -> Self {
         // Pending canonicalizations are not safe to discard, or even reorder.
-        assert!(self.has_pending_f32_nan() == other.has_pending_f32_nan());
-        assert!(self.has_pending_f64_nan() == other.has_pending_f64_nan());
+        assert!(
+            self.has_pending_f32_nan() == other.has_pending_f32_nan()
+                || self.is_arithmetic_f32()
+                || other.is_arithmetic_f32()
+        );
+        assert!(
+            self.has_pending_f64_nan() == other.has_pending_f64_nan()
+                || self.is_arithmetic_f64()
+                || other.is_arithmetic_f64()
+        );
         let info = match (
             self.is_arithmetic_f32() && other.is_arithmetic_f32(),
             self.is_arithmetic_f64() && other.is_arithmetic_f64(),

From fa576093c276a6b89750d017586d488f718a7bf0 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nick@wasmer.io>
Date: Wed, 6 Nov 2019 23:15:49 -0800
Subject: [PATCH 5/8] Fix ExtraInfo on F32Add and similar.

We want to ignore the incoming pending NaN state (since the pending will propagate to the output if there was one on the input), and we want to add a new pending NaN state if we can (that is to say, if it isn't cancelled out by both inputs having arithmetic state). Do this by discarding the pending states on the inputs, intersecting them (to keep only the arithmetic state), then union in a pending nan state (which might do nothing, if it's arithmetic).

If the above sounds confusing, keep in mind that when a value is arithmetic, the act of performing the "NaN canonicalization" is a no-op. Thus, being arithmetic cancels out pending NaN states.
---
 lib/llvm-backend/src/code.rs | 84 ++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 36 deletions(-)

diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs
index 392f931e299..3ecad44e7d5 100644
--- a/lib/llvm-backend/src/code.rs
+++ b/lib/llvm-backend/src/code.rs
@@ -2907,110 +2907,122 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
             Operator::F32Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let i1 = i1 | ExtraInfo::pending_f32_nan();
-                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_add(v1, v2, &state.var_name());
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(),
+                );
             }
             Operator::F64Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let i1 = i1 | ExtraInfo::pending_f64_nan();
-                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_add(v1, v2, &state.var_name());
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(),
+                );
             }
             Operator::F32x4Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
-                let i1 = i1 | ExtraInfo::pending_f32_nan();
-                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(),
+                );
             }
             Operator::F64x2Add => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
-                let i1 = i1 | ExtraInfo::pending_f64_nan();
-                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_add(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(),
+                );
             }
             Operator::F32Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let i1 = i1 | ExtraInfo::pending_f32_nan();
-                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(),
+                );
             }
             Operator::F64Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let i1 = i1 | ExtraInfo::pending_f64_nan();
-                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(),
+                );
             }
             Operator::F32x4Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
-                let i1 = i1 | ExtraInfo::pending_f32_nan();
-                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(),
+                );
             }
             Operator::F64x2Sub => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
-                let i1 = i1 | ExtraInfo::pending_f64_nan();
-                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_sub(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(),
+                );
             }
             Operator::F32Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let i1 = i1 | ExtraInfo::pending_f32_nan();
-                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(),
+                );
             }
             Operator::F64Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
-                let i1 = i1 | ExtraInfo::pending_f64_nan();
-                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(),
+                );
             }
             Operator::F32x4Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f32x4(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f32x4(builder, intrinsics, v2, i2);
-                let i1 = i1 | ExtraInfo::pending_f32_nan();
-                let i2 = i2 | ExtraInfo::pending_f32_nan();
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(),
+                );
             }
             Operator::F64x2Mul => {
                 let ((v1, i1), (v2, i2)) = state.pop2_extra()?;
                 let (v1, i1) = v128_into_f64x2(builder, intrinsics, v1, i1);
                 let (v2, i2) = v128_into_f64x2(builder, intrinsics, v2, i2);
-                let i1 = i1 | ExtraInfo::pending_f64_nan();
-                let i2 = i2 | ExtraInfo::pending_f64_nan();
                 let res = builder.build_float_mul(v1, v2, &state.var_name());
                 let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
-                state.push1_extra(res, i1 & i2);
+                state.push1_extra(
+                    res,
+                    (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(),
+                );
             }
             Operator::F32Div => {
                 let (v1, v2) = state.pop2()?;

From d1ce8ee20d67d5a44bcdb169c368f66678f10240 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nick@wasmer.io>
Date: Thu, 7 Nov 2019 10:57:10 -0800
Subject: [PATCH 6/8] Give that panic! a message. Also, make it an
 unreachable!.

---
 lib/llvm-backend/src/state.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/llvm-backend/src/state.rs b/lib/llvm-backend/src/state.rs
index 254b9b34442..357d83d1f00 100644
--- a/lib/llvm-backend/src/state.rs
+++ b/lib/llvm-backend/src/state.rs
@@ -182,7 +182,7 @@ impl BitAnd for ExtraInfo {
             (false, false) => info,
             (true, false) => info | ExtraInfo::pending_f32_nan(),
             (false, true) => info | ExtraInfo::pending_f64_nan(),
-            (true, true) => panic!(""),
+            (true, true) => unreachable!("Can't form ExtraInfo with two pending canonicalizations"),
         };
         info
     }

From d3fabe576b1ab9f5167acf885895ad46f613e681 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nick@wasmer.io>
Date: Thu, 7 Nov 2019 10:59:54 -0800
Subject: [PATCH 7/8] Add changelog entry.

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5df0b8ae3ab..30af3c6eb78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@ Special thanks to [@newpavlov](https://github.com/newpavlov) and [@Maxgy](https:
 - [#939](https://github.com/wasmerio/wasmer/pull/939) Fix bug causing attempts to append to files with WASI to delete the contents of the file
 - [#940](https://github.com/wasmerio/wasmer/pull/940) Update supported Rust version to 1.38+
 - [#923](https://github.com/wasmerio/wasmer/pull/923) Fix memory leak in the C API caused by an incorrect cast in `wasmer_trampoline_buffer_destroy`
+- [#934](https://github.com/wasmerio/wasmer/pull/934) Simplify float expressions in the LLVM backend.
 - [#921](https://github.com/wasmerio/wasmer/pull/921) In LLVM backend, annotate all memory accesses with TBAA metadata.
 - [#883](https://github.com/wasmerio/wasmer/pull/883) Allow floating point operations to have arbitrary inputs, even including SNaNs.
 - [#856](https://github.com/wasmerio/wasmer/pull/856) Expose methods in the runtime C API to get a WASI import object

From ff73c5d71b441c906f13faa8fa8ecc5ba2d18d2f Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nick@wasmer.io>
Date: Tue, 26 Nov 2019 12:15:26 -0800
Subject: [PATCH 8/8] Address review feedback from Mark.

Fix a bug in Operator::Select and add a comment to explain the intention.

Use derived default for ExtraInfo.

Make ExtraInfo associated functions const.

Turn two asserts into debug_asserts.
---
 CHANGELOG.md                  |  2 +-
 lib/llvm-backend/src/code.rs  |  6 +++++-
 lib/llvm-backend/src/state.rs | 38 +++++++++++++++++++----------------
 3 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 30af3c6eb78..aafd7885b72 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@
 - [#990](https://github.com/wasmerio/wasmer/pull/990) Default wasmer CLI to `run`.  Wasmer will now attempt to parse unrecognized command line options as if they were applied to the run command: `wasmer mywasm.wasm --dir=.` now works!
 - [#987](https://github.com/wasmerio/wasmer/pull/987) Fix `runtime-c-api` header files when compiled by gnuc.
 - [#957](https://github.com/wasmerio/wasmer/pull/957) Change the meaning of `wasmer_wasi::is_wasi_module` to detect any type of WASI module, add support for new wasi snapshot_preview1
+- [#934](https://github.com/wasmerio/wasmer/pull/934) Simplify float expressions in the LLVM backend.
 
 ## 0.10.2 - 2019-11-18
 
@@ -36,7 +37,6 @@ Special thanks to [@newpavlov](https://github.com/newpavlov) and [@Maxgy](https:
 - [#939](https://github.com/wasmerio/wasmer/pull/939) Fix bug causing attempts to append to files with WASI to delete the contents of the file
 - [#940](https://github.com/wasmerio/wasmer/pull/940) Update supported Rust version to 1.38+
 - [#923](https://github.com/wasmerio/wasmer/pull/923) Fix memory leak in the C API caused by an incorrect cast in `wasmer_trampoline_buffer_destroy`
-- [#934](https://github.com/wasmerio/wasmer/pull/934) Simplify float expressions in the LLVM backend.
 - [#921](https://github.com/wasmerio/wasmer/pull/921) In LLVM backend, annotate all memory accesses with TBAA metadata.
 - [#883](https://github.com/wasmerio/wasmer/pull/883) Allow floating point operations to have arbitrary inputs, even including SNaNs.
 - [#856](https://github.com/wasmerio/wasmer/pull/856) Expose methods in the runtime C API to get a WASI import object
diff --git a/lib/llvm-backend/src/code.rs b/lib/llvm-backend/src/code.rs
index 3ecad44e7d5..d60ec5aea95 100644
--- a/lib/llvm-backend/src/code.rs
+++ b/lib/llvm-backend/src/code.rs
@@ -1740,7 +1740,11 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
                 // We don't bother canonicalizing 'cond' here because we only
                 // compare it to zero, and that's invariant under
                 // canonicalization.
-                let (v1, v2) = if i1.has_pending_f32_nan() != i1.has_pending_f32_nan()
+
+                // If the pending bits of v1 and v2 are the same, we can pass
+                // them along to the result. Otherwise, apply pending
+                // canonicalizations now.
+                let (v1, v2) = if i1.has_pending_f32_nan() != i2.has_pending_f32_nan()
                     || i1.has_pending_f64_nan() != i2.has_pending_f64_nan()
                 {
                     (
diff --git a/lib/llvm-backend/src/state.rs b/lib/llvm-backend/src/state.rs
index 357d83d1f00..b13b3c358ec 100644
--- a/lib/llvm-backend/src/state.rs
+++ b/lib/llvm-backend/src/state.rs
@@ -68,7 +68,7 @@ impl ControlFrame {
     }
 }
 
-#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
+#[derive(Debug, Default, Eq, PartialEq, Copy, Clone, Hash)]
 pub struct ExtraInfo {
     state: u8,
 }
@@ -76,61 +76,65 @@ impl ExtraInfo {
     // This value is required to be arithmetic 32-bit NaN (or 32x4) by the WAsm
     // machine, but which might not be in the LLVM value. The conversion to
     // arithmetic NaN is pending. It is required for correctness.
-    pub fn pending_f32_nan() -> ExtraInfo {
+    //
+    // When applied to a 64-bit value, this flag has no meaning and must be
+    // ignored. It may be set in such cases to allow for common handling of
+    // 32 and 64-bit operations.
+    pub const fn pending_f32_nan() -> ExtraInfo {
         ExtraInfo { state: 1 }
     }
 
     // This value is required to be arithmetic 64-bit NaN (or 64x2) by the WAsm
     // machine, but which might not be in the LLVM value. The conversion to
     // arithmetic NaN is pending. It is required for correctness.
-    pub fn pending_f64_nan() -> ExtraInfo {
+    //
+    // When applied to a 32-bit value, this flag has no meaning and must be
+    // ignored. It may be set in such cases to allow for common handling of
+    // 32 and 64-bit operations.
+    pub const fn pending_f64_nan() -> ExtraInfo {
         ExtraInfo { state: 2 }
     }
 
     // This value either does not contain a 32-bit NaN, or it contains an
     // arithmetic NaN. In SIMD, applies to all 4 lanes.
-    pub fn arithmetic_f32() -> ExtraInfo {
+    pub const fn arithmetic_f32() -> ExtraInfo {
         ExtraInfo { state: 4 }
     }
 
     // This value either does not contain a 64-bit NaN, or it contains an
     // arithmetic NaN. In SIMD, applies to both lanes.
-    pub fn arithmetic_f64() -> ExtraInfo {
+    pub const fn arithmetic_f64() -> ExtraInfo {
         ExtraInfo { state: 8 }
     }
 
-    pub fn has_pending_f32_nan(&self) -> bool {
+    pub const fn has_pending_f32_nan(&self) -> bool {
         self.state & ExtraInfo::pending_f32_nan().state != 0
     }
-    pub fn has_pending_f64_nan(&self) -> bool {
+    pub const fn has_pending_f64_nan(&self) -> bool {
         self.state & ExtraInfo::pending_f64_nan().state != 0
     }
-    pub fn is_arithmetic_f32(&self) -> bool {
+    pub const fn is_arithmetic_f32(&self) -> bool {
         self.state & ExtraInfo::arithmetic_f32().state != 0
     }
-    pub fn is_arithmetic_f64(&self) -> bool {
+    pub const fn is_arithmetic_f64(&self) -> bool {
         self.state & ExtraInfo::arithmetic_f64().state != 0
     }
 
-    pub fn strip_pending(&self) -> ExtraInfo {
+    pub const fn strip_pending(&self) -> ExtraInfo {
         ExtraInfo {
             state: self.state
                 & !(ExtraInfo::pending_f32_nan().state | ExtraInfo::pending_f64_nan().state),
         }
     }
 }
-impl Default for ExtraInfo {
-    fn default() -> Self {
-        ExtraInfo { state: 0 }
-    }
-}
+
 // Union two ExtraInfos.
 impl BitOr for ExtraInfo {
     type Output = Self;
 
     fn bitor(self, other: Self) -> Self {
-        assert!(!(self.has_pending_f32_nan() && other.has_pending_f64_nan()));
-        assert!(!(self.has_pending_f64_nan() && other.has_pending_f32_nan()));
+        debug_assert!(!(self.has_pending_f32_nan() && other.has_pending_f64_nan()));
+        debug_assert!(!(self.has_pending_f64_nan() && other.has_pending_f32_nan()));
         ExtraInfo {
             state: if self.is_arithmetic_f32() || other.is_arithmetic_f32() {
                 ExtraInfo::arithmetic_f32().state