diff --git a/Android.mk b/Android.mk index 94281162ad..361cfce7a5 100644 --- a/Android.mk +++ b/Android.mk @@ -89,6 +89,7 @@ SPVTOOLS_OPT_SRC_FILES := \ source/opt/composite.cpp \ source/opt/const_folding_rules.cpp \ source/opt/constants.cpp \ + source/opt/convert_to_half_pass.cpp \ source/opt/copy_prop_arrays.cpp \ source/opt/dead_branch_elim_pass.cpp \ source/opt/dead_insert_elim_pass.cpp \ @@ -153,6 +154,7 @@ SPVTOOLS_OPT_SRC_FILES := \ source/opt/reduce_load_size.cpp \ source/opt/redundancy_elimination.cpp \ source/opt/register_pressure.cpp \ + source/opt/relax_float_ops_pass.cpp \ source/opt/remove_duplicates_pass.cpp \ source/opt/replace_invalid_opc.cpp \ source/opt/scalar_analysis.cpp \ diff --git a/BUILD.gn b/BUILD.gn index b848eaf69f..01167b40d9 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -479,6 +479,8 @@ static_library("spvtools_opt") { "source/opt/const_folding_rules.h", "source/opt/constants.cpp", "source/opt/constants.h", + "source/opt/convert_to_half_pass.cpp", + "source/opt/convert_to_half_pass.h", "source/opt/copy_prop_arrays.cpp", "source/opt/copy_prop_arrays.h", "source/opt/dead_branch_elim_pass.cpp", @@ -611,6 +613,8 @@ static_library("spvtools_opt") { "source/opt/reflect.h", "source/opt/register_pressure.cpp", "source/opt/register_pressure.h", + "source/opt/relax_float_ops_pass.cpp", + "source/opt/relax_float_ops_pass.h", "source/opt/remove_duplicates_pass.cpp", "source/opt/remove_duplicates_pass.h", "source/opt/replace_invalid_opc.cpp", diff --git a/include/spirv-tools/optimizer.hpp b/include/spirv-tools/optimizer.hpp index 4e54b1a1c1..4a95a7e236 100644 --- a/include/spirv-tools/optimizer.hpp +++ b/include/spirv-tools/optimizer.hpp @@ -674,6 +674,22 @@ Optimizer::PassToken CreateLoopUnrollPass(bool fully_unroll, int factor = 0); // processed (see IsSSATargetVar for details). Optimizer::PassToken CreateSSARewritePass(); +// Create pass to convert relaxed precision instructions to half precision. +// This pass converts as many relaxed float32 arithmetic operations to half as +// possible. It converts any float32 operands to half if needed. It converts +// any resulting half precision values back to float32 as needed. No variables +// are changed. No image operations are changed. +// +// Best if run late since it will generate better code with unneeded function +// scope loads and stores and composite inserts and extracts removed. Also best +// if followed by instruction simplification, redundancy elimination and DCE. +Optimizer::PassToken CreateConvertRelaxedToHalfPass(); + +// Create relax float ops pass. +// This pass decorates all float32 result instructions with RelaxedPrecision +// if not already so decorated. +Optimizer::PassToken CreateRelaxFloatOpsPass(); + // Create copy propagate arrays pass. // This pass looks to copy propagate memory references for arrays. It looks // for specific code patterns to recognize array copies. diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt index 2309ca9192..0cb2018af0 100644 --- a/source/opt/CMakeLists.txt +++ b/source/opt/CMakeLists.txt @@ -27,6 +27,7 @@ set(SPIRV_TOOLS_OPT_SOURCES composite.h const_folding_rules.h constants.h + convert_to_half_pass.h copy_prop_arrays.h dead_branch_elim_pass.h dead_insert_elim_pass.h @@ -93,6 +94,7 @@ set(SPIRV_TOOLS_OPT_SOURCES redundancy_elimination.h reflect.h register_pressure.h + relax_float_ops_pass.h remove_duplicates_pass.h replace_invalid_opc.h scalar_analysis.h @@ -132,6 +134,7 @@ set(SPIRV_TOOLS_OPT_SOURCES composite.cpp const_folding_rules.cpp constants.cpp + convert_to_half_pass.cpp copy_prop_arrays.cpp dead_branch_elim_pass.cpp dead_insert_elim_pass.cpp @@ -196,6 +199,7 @@ set(SPIRV_TOOLS_OPT_SOURCES reduce_load_size.cpp redundancy_elimination.cpp register_pressure.cpp + relax_float_ops_pass.cpp remove_duplicates_pass.cpp replace_invalid_opc.cpp scalar_analysis.cpp diff --git a/source/opt/convert_to_half_pass.cpp b/source/opt/convert_to_half_pass.cpp new file mode 100644 index 0000000000..4c02c73e2b --- /dev/null +++ b/source/opt/convert_to_half_pass.cpp @@ -0,0 +1,460 @@ +// Copyright (c) 2019 The Khronos Group Inc. +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "convert_to_half_pass.h" + +#include "source/opt/ir_builder.h" + +namespace { + +// Indices of operands in SPIR-V instructions +static const int kImageSampleDrefIdInIdx = 2; + +} // anonymous namespace + +namespace spvtools { +namespace opt { + +bool ConvertToHalfPass::IsArithmetic(Instruction* inst) { + return target_ops_core_.count(inst->opcode()) != 0 || + (inst->opcode() == SpvOpExtInst && + inst->GetSingleWordInOperand(0) == + context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() && + target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0); +} + +bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) { + uint32_t ty_id = inst->type_id(); + if (ty_id == 0) return false; + return Pass::IsFloat(ty_id, width); +} + +bool ConvertToHalfPass::IsRelaxed(Instruction* inst) { + uint32_t r_id = inst->result_id(); + for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false)) + if (r_inst->opcode() == SpvOpDecorate && + r_inst->GetSingleWordInOperand(1) == SpvDecorationRelaxedPrecision) + return true; + return false; +} + +analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) { + analysis::Float float_ty(width); + return context()->get_type_mgr()->GetRegisteredType(&float_ty); +} + +analysis::Type* ConvertToHalfPass::FloatVectorType(uint32_t v_len, + uint32_t width) { + analysis::Type* reg_float_ty = FloatScalarType(width); + analysis::Vector vec_ty(reg_float_ty, v_len); + return context()->get_type_mgr()->GetRegisteredType(&vec_ty); +} + +analysis::Type* ConvertToHalfPass::FloatMatrixType(uint32_t v_cnt, + uint32_t vty_id, + uint32_t width) { + Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id); + uint32_t v_len = vty_inst->GetSingleWordInOperand(1); + analysis::Type* reg_vec_ty = FloatVectorType(v_len, width); + analysis::Matrix mat_ty(reg_vec_ty, v_cnt); + return context()->get_type_mgr()->GetRegisteredType(&mat_ty); +} + +uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) { + analysis::Type* reg_equiv_ty; + Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id); + if (ty_inst->opcode() == SpvOpTypeMatrix) + reg_equiv_ty = FloatMatrixType(ty_inst->GetSingleWordInOperand(1), + ty_inst->GetSingleWordInOperand(0), width); + else if (ty_inst->opcode() == SpvOpTypeVector) + reg_equiv_ty = FloatVectorType(ty_inst->GetSingleWordInOperand(1), width); + else // SpvOpTypeFloat + reg_equiv_ty = FloatScalarType(width); + return context()->get_type_mgr()->GetTypeInstruction(reg_equiv_ty); +} + +void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width, + InstructionBuilder* builder) { + Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp); + uint32_t ty_id = val_inst->type_id(); + uint32_t nty_id = EquivFloatTypeId(ty_id, width); + if (nty_id == ty_id) return; + Instruction* cvt_inst; + if (val_inst->opcode() == SpvOpUndef) + cvt_inst = builder->AddNullaryOp(nty_id, SpvOpUndef); + else + cvt_inst = builder->AddUnaryOp(nty_id, SpvOpFConvert, *val_idp); + *val_idp = cvt_inst->result_id(); +} + +bool ConvertToHalfPass::MatConvertCleanup(Instruction* inst) { + if (inst->opcode() != SpvOpFConvert) return false; + uint32_t mty_id = inst->type_id(); + Instruction* mty_inst = get_def_use_mgr()->GetDef(mty_id); + if (mty_inst->opcode() != SpvOpTypeMatrix) return false; + uint32_t vty_id = mty_inst->GetSingleWordInOperand(0); + uint32_t v_cnt = mty_inst->GetSingleWordInOperand(1); + Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id); + uint32_t cty_id = vty_inst->GetSingleWordInOperand(0); + Instruction* cty_inst = get_def_use_mgr()->GetDef(cty_id); + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + // Convert each component vector, combine them with OpCompositeConstruct + // and replace original instruction. + uint32_t orig_width = (cty_inst->GetSingleWordInOperand(0) == 16) ? 32 : 16; + uint32_t orig_mat_id = inst->GetSingleWordInOperand(0); + uint32_t orig_vty_id = EquivFloatTypeId(vty_id, orig_width); + std::vector opnds = {}; + for (uint32_t vidx = 0; vidx < v_cnt; ++vidx) { + Instruction* ext_inst = builder.AddIdLiteralOp( + orig_vty_id, SpvOpCompositeExtract, orig_mat_id, vidx); + Instruction* cvt_inst = + builder.AddUnaryOp(vty_id, SpvOpFConvert, ext_inst->result_id()); + opnds.push_back({SPV_OPERAND_TYPE_ID, {cvt_inst->result_id()}}); + } + uint32_t mat_id = TakeNextId(); + std::unique_ptr mat_inst(new Instruction( + context(), SpvOpCompositeConstruct, mty_id, mat_id, opnds)); + (void)builder.AddInstruction(std::move(mat_inst)); + context()->ReplaceAllUsesWith(inst->result_id(), mat_id); + // Turn original instruction into copy so it is valid. + inst->SetOpcode(SpvOpCopyObject); + inst->SetResultType(EquivFloatTypeId(mty_id, orig_width)); + get_def_use_mgr()->AnalyzeInstUse(inst); + return true; +} + +void ConvertToHalfPass::RemoveRelaxedDecoration(uint32_t id) { + context()->get_decoration_mgr()->RemoveDecorationsFrom( + id, [](const Instruction& dec) { + if (dec.opcode() == SpvOpDecorate && + dec.GetSingleWordInOperand(1u) == SpvDecorationRelaxedPrecision) + return true; + else + return false; + }); +} + +bool ConvertToHalfPass::GenHalfArith(Instruction* inst) { + bool modified = false; + // Convert all float32 based operands to float16 equivalent and change + // instruction type to float16 equivalent. + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + inst->ForEachInId([&builder, &modified, this](uint32_t* idp) { + Instruction* op_inst = get_def_use_mgr()->GetDef(*idp); + if (!IsFloat(op_inst, 32)) return; + GenConvert(idp, 16, &builder); + modified = true; + }); + if (IsFloat(inst, 32)) { + inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16)); + modified = true; + } + if (modified) get_def_use_mgr()->AnalyzeInstUse(inst); + return modified; +} + +bool ConvertToHalfPass::ProcessPhi(Instruction* inst) { + // Skip if not float32 + if (!IsFloat(inst, 32)) return false; + // Skip if no relaxed operands. + bool relaxed_found = false; + uint32_t ocnt = 0; + inst->ForEachInId([&ocnt, &relaxed_found, this](uint32_t* idp) { + if (ocnt % 2 == 0) { + Instruction* val_inst = get_def_use_mgr()->GetDef(*idp); + if (IsRelaxed(val_inst)) relaxed_found = true; + } + ++ocnt; + }); + if (!relaxed_found) return false; + // Add float16 converts of any float32 operands and change type + // of phi to float16 equivalent. Operand converts need to be added to + // preceeding blocks. + ocnt = 0; + uint32_t* prev_idp; + inst->ForEachInId([&ocnt, &prev_idp, this](uint32_t* idp) { + if (ocnt % 2 == 0) { + prev_idp = idp; + } else { + Instruction* val_inst = get_def_use_mgr()->GetDef(*prev_idp); + if (IsFloat(val_inst, 32)) { + BasicBlock* bp = context()->get_instr_block(*idp); + auto insert_before = bp->tail(); + if (insert_before != bp->begin()) { + --insert_before; + if (insert_before->opcode() != SpvOpSelectionMerge && + insert_before->opcode() != SpvOpLoopMerge) + ++insert_before; + } + InstructionBuilder builder(context(), &*insert_before, + IRContext::kAnalysisDefUse | + IRContext::kAnalysisInstrToBlockMapping); + GenConvert(prev_idp, 16, &builder); + } + } + ++ocnt; + }); + inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16)); + get_def_use_mgr()->AnalyzeInstUse(inst); + return true; +} + +bool ConvertToHalfPass::ProcessExtract(Instruction* inst) { + bool modified = false; + uint32_t comp_id = inst->GetSingleWordInOperand(0); + Instruction* comp_inst = get_def_use_mgr()->GetDef(comp_id); + // If extract is relaxed float32 based type and the composite is a relaxed + // float32 based type, convert it to float16 equivalent. This is slightly + // aggressive and pushes any likely conversion to apply to the whole + // composite rather than apply to each extracted component later. This + // can be a win if the platform can convert the entire composite in the same + // time as one component. It risks converting components that may not be + // used, although empirical data on a large set of real-world shaders seems + // to suggest this is not common and the composite convert is the best choice. + if (IsFloat(inst, 32) && IsRelaxed(inst) && IsFloat(comp_inst, 32) && + IsRelaxed(comp_inst)) { + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + GenConvert(&comp_id, 16, &builder); + inst->SetInOperand(0, {comp_id}); + comp_inst = get_def_use_mgr()->GetDef(comp_id); + modified = true; + } + // If the composite is a float16 based type, make sure the type of the + // extract agrees. + if (IsFloat(comp_inst, 16) && !IsFloat(inst, 16)) { + inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16)); + modified = true; + } + if (modified) get_def_use_mgr()->AnalyzeInstUse(inst); + return modified; +} + +bool ConvertToHalfPass::ProcessConvert(Instruction* inst) { + // If float32 and relaxed, change to float16 convert + if (IsFloat(inst, 32) && IsRelaxed(inst)) { + inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16)); + get_def_use_mgr()->AnalyzeInstUse(inst); + } + // If operand and result types are the same, replace result with operand + // and change convert to copy to keep validator happy; DCE will clean it up + uint32_t val_id = inst->GetSingleWordInOperand(0); + Instruction* val_inst = get_def_use_mgr()->GetDef(val_id); + if (inst->type_id() == val_inst->type_id()) { + context()->ReplaceAllUsesWith(inst->result_id(), val_id); + inst->SetOpcode(SpvOpCopyObject); + } + return true; // modified +} + +bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) { + bool modified = false; + // If image reference, only need to convert dref args back to float32 + if (dref_image_ops_.count(inst->opcode()) != 0) { + uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx); + Instruction* dref_inst = get_def_use_mgr()->GetDef(dref_id); + if (IsFloat(dref_inst, 16) && IsRelaxed(dref_inst)) { + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + GenConvert(&dref_id, 32, &builder); + inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id}); + get_def_use_mgr()->AnalyzeInstUse(inst); + modified = true; + } + } + return modified; +} + +bool ConvertToHalfPass::ProcessDefault(Instruction* inst) { + bool modified = false; + // If non-relaxed instruction has changed operands, need to convert + // them back to float32 + InstructionBuilder builder( + context(), inst, + IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping); + inst->ForEachInId([&builder, &modified, this](uint32_t* idp) { + Instruction* op_inst = get_def_use_mgr()->GetDef(*idp); + if (!IsFloat(op_inst, 16)) return; + if (!IsRelaxed(op_inst)) return; + uint32_t old_id = *idp; + GenConvert(idp, 32, &builder); + if (*idp != old_id) modified = true; + }); + if (modified) get_def_use_mgr()->AnalyzeInstUse(inst); + return modified; +} + +bool ConvertToHalfPass::GenHalfCode(Instruction* inst) { + bool modified = false; + // Remember id for later deletion of RelaxedPrecision decoration + bool inst_relaxed = IsRelaxed(inst); + if (inst_relaxed) relaxed_ids_.push_back(inst->result_id()); + if (IsArithmetic(inst) && inst_relaxed) + modified = GenHalfArith(inst); + else if (inst->opcode() == SpvOpPhi) + modified = ProcessPhi(inst); + else if (inst->opcode() == SpvOpCompositeExtract) + modified = ProcessExtract(inst); + else if (inst->opcode() == SpvOpFConvert) + modified = ProcessConvert(inst); + else if (image_ops_.count(inst->opcode()) != 0) + modified = ProcessImageRef(inst); + else + modified = ProcessDefault(inst); + return modified; +} + +bool ConvertToHalfPass::ProcessFunction(Function* func) { + bool modified = false; + cfg()->ForEachBlockInReversePostOrder( + func->entry().get(), [&modified, this](BasicBlock* bb) { + for (auto ii = bb->begin(); ii != bb->end(); ++ii) + modified |= GenHalfCode(&*ii); + }); + cfg()->ForEachBlockInReversePostOrder( + func->entry().get(), [&modified, this](BasicBlock* bb) { + for (auto ii = bb->begin(); ii != bb->end(); ++ii) + modified |= MatConvertCleanup(&*ii); + }); + return modified; +} + +Pass::Status ConvertToHalfPass::ProcessImpl() { + Pass::ProcessFunction pfn = [this](Function* fp) { + return ProcessFunction(fp); + }; + bool modified = context()->ProcessEntryPointCallTree(pfn); + // If modified, make sure module has Float16 capability + if (modified) context()->AddCapability(SpvCapabilityFloat16); + // Remove all RelaxedPrecision decorations from instructions and globals + for (auto c_id : relaxed_ids_) RemoveRelaxedDecoration(c_id); + for (auto& val : get_module()->types_values()) { + uint32_t v_id = val.result_id(); + if (v_id != 0) RemoveRelaxedDecoration(v_id); + } + return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +Pass::Status ConvertToHalfPass::Process() { + Initialize(); + return ProcessImpl(); +} + +void ConvertToHalfPass::Initialize() { + target_ops_core_ = { + SpvOpVectorExtractDynamic, + SpvOpVectorInsertDynamic, + SpvOpVectorShuffle, + SpvOpCompositeConstruct, + SpvOpCompositeInsert, + SpvOpCopyObject, + SpvOpTranspose, + SpvOpConvertSToF, + SpvOpConvertUToF, + // SpvOpFConvert, + // SpvOpQuantizeToF16, + SpvOpFNegate, + SpvOpFAdd, + SpvOpFSub, + SpvOpFMul, + SpvOpFDiv, + SpvOpFMod, + SpvOpVectorTimesScalar, + SpvOpMatrixTimesScalar, + SpvOpVectorTimesMatrix, + SpvOpMatrixTimesVector, + SpvOpMatrixTimesMatrix, + SpvOpOuterProduct, + SpvOpDot, + SpvOpSelect, + SpvOpFOrdEqual, + SpvOpFUnordEqual, + SpvOpFOrdNotEqual, + SpvOpFUnordNotEqual, + SpvOpFOrdLessThan, + SpvOpFUnordLessThan, + SpvOpFOrdGreaterThan, + SpvOpFUnordGreaterThan, + SpvOpFOrdLessThanEqual, + SpvOpFUnordLessThanEqual, + SpvOpFOrdGreaterThanEqual, + SpvOpFUnordGreaterThanEqual, + }; + target_ops_450_ = { + GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs, + GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract, + GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos, + GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan, + GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh, + GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow, + GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2, + GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant, + GLSLstd450MatrixInverse, + // TODO(greg-lunarg): GLSLstd450ModfStruct, + GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix, + GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma, + // TODO(greg-lunarg): GLSLstd450FrexpStruct, + GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross, + GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect, + GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp}; + image_ops_ = {SpvOpImageSampleImplicitLod, + SpvOpImageSampleExplicitLod, + SpvOpImageSampleDrefImplicitLod, + SpvOpImageSampleDrefExplicitLod, + SpvOpImageSampleProjImplicitLod, + SpvOpImageSampleProjExplicitLod, + SpvOpImageSampleProjDrefImplicitLod, + SpvOpImageSampleProjDrefExplicitLod, + SpvOpImageFetch, + SpvOpImageGather, + SpvOpImageDrefGather, + SpvOpImageRead, + SpvOpImageSparseSampleImplicitLod, + SpvOpImageSparseSampleExplicitLod, + SpvOpImageSparseSampleDrefImplicitLod, + SpvOpImageSparseSampleDrefExplicitLod, + SpvOpImageSparseSampleProjImplicitLod, + SpvOpImageSparseSampleProjExplicitLod, + SpvOpImageSparseSampleProjDrefImplicitLod, + SpvOpImageSparseSampleProjDrefExplicitLod, + SpvOpImageSparseFetch, + SpvOpImageSparseGather, + SpvOpImageSparseDrefGather, + SpvOpImageSparseTexelsResident, + SpvOpImageSparseRead}; + dref_image_ops_ = { + SpvOpImageSampleDrefImplicitLod, + SpvOpImageSampleDrefExplicitLod, + SpvOpImageSampleProjDrefImplicitLod, + SpvOpImageSampleProjDrefExplicitLod, + SpvOpImageDrefGather, + SpvOpImageSparseSampleDrefImplicitLod, + SpvOpImageSparseSampleDrefExplicitLod, + SpvOpImageSparseSampleProjDrefImplicitLod, + SpvOpImageSparseSampleProjDrefExplicitLod, + SpvOpImageSparseDrefGather, + }; + relaxed_ids_.clear(); +} + +} // namespace opt +} // namespace spvtools diff --git a/source/opt/convert_to_half_pass.h b/source/opt/convert_to_half_pass.h new file mode 100644 index 0000000000..5225848619 --- /dev/null +++ b/source/opt/convert_to_half_pass.h @@ -0,0 +1,134 @@ +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_ +#define LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_ + +#include "source/opt/ir_builder.h" +#include "source/opt/pass.h" + +namespace spvtools { +namespace opt { + +class ConvertToHalfPass : public Pass { + public: + ConvertToHalfPass() : Pass() {} + + ~ConvertToHalfPass() override = default; + + IRContext::Analysis GetPreservedAnalyses() override { + return IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping; + } + + // See optimizer.hpp for pass user documentation. + Status Process() override; + + const char* name() const override { return "convert-to-half-pass"; } + + private: + // Return true if |inst| is an arithmetic op that can be of type float16 + bool IsArithmetic(Instruction* inst); + + // Return true if |inst| returns scalar, vector or matrix type with base + // float and |width| + bool IsFloat(Instruction* inst, uint32_t width); + + // Return true if |inst| is decorated with RelaxedPrecision + bool IsRelaxed(Instruction* inst); + + // Return type id for float with |width| + analysis::Type* FloatScalarType(uint32_t width); + + // Return type id for vector of length |vlen| of float of |width| + analysis::Type* FloatVectorType(uint32_t v_len, uint32_t width); + + // Return type id for matrix of |v_cnt| vectors of length identical to + // |vty_id| of float of |width| + analysis::Type* FloatMatrixType(uint32_t v_cnt, uint32_t vty_id, + uint32_t width); + + // Return equivalent to float type |ty_id| with |width| + uint32_t EquivFloatTypeId(uint32_t ty_id, uint32_t width); + + // Append instructions to builder to convert value |*val_idp| to type + // |ty_id| but with |width|. Set |*val_idp| to the new id. + void GenConvert(uint32_t* val_idp, uint32_t width, + InstructionBuilder* builder); + + // Remove RelaxedPrecision decoration of |id|. + void RemoveRelaxedDecoration(uint32_t id); + + // If |inst| is an arithmetic, phi, extract or convert instruction of float32 + // base type and decorated with RelaxedPrecision, change it to the equivalent + // float16 based type instruction. Specifically, insert instructions to + // convert all operands to float16 (if needed) and change its type to the + // equivalent float16 type. Otherwise, insert instructions to convert its + // operands back to their original types, if needed. + bool GenHalfCode(Instruction* inst); + + // Gen code for relaxed arithmetic |inst| + bool GenHalfArith(Instruction* inst); + + // Gen code for relaxed phi |inst| + bool ProcessPhi(Instruction* inst); + + // Gen code for relaxed extract |inst| + bool ProcessExtract(Instruction* inst); + + // Gen code for relaxed convert |inst| + bool ProcessConvert(Instruction* inst); + + // Gen code for image reference |inst| + bool ProcessImageRef(Instruction* inst); + + // Process default non-relaxed |inst| + bool ProcessDefault(Instruction* inst); + + // If |inst| is an FConvert of a matrix type, decompose it to a series + // of vector extracts, converts and inserts into an Undef. These are + // generated by GenHalfCode because they are easier to manipulate, but are + // invalid so we need to clean them up. + bool MatConvertCleanup(Instruction* inst); + + // Call GenHalfCode on every instruction in |func|. + // If code is generated for an instruction, replace the instruction + // with the new instructions that are generated. + bool ProcessFunction(Function* func); + + Pass::Status ProcessImpl(); + + // Initialize state for converting to half + void Initialize(); + + // Set of core operations to be processed + std::unordered_set target_ops_core_; + + // Set of 450 extension operations to be processed + std::unordered_set target_ops_450_; + + // Set of sample operations + std::unordered_set image_ops_; + + // Set of dref sample operations + std::unordered_set dref_image_ops_; + + // Ids of all converted instructions + std::vector relaxed_ids_; +}; + +} // namespace opt +} // namespace spvtools + +#endif // LIBSPIRV_OPT_CONVERT_TO_HALF_PASS_H_ diff --git a/source/opt/ir_builder.h b/source/opt/ir_builder.h index a0ca40cee5..d3875c499d 100644 --- a/source/opt/ir_builder.h +++ b/source/opt/ir_builder.h @@ -109,13 +109,13 @@ class InstructionBuilder { return AddInstruction(std::move(newQuadOp)); } - Instruction* AddIdLiteralOp(uint32_t type_id, SpvOp opcode, uint32_t operand1, - uint32_t operand2) { + Instruction* AddIdLiteralOp(uint32_t type_id, SpvOp opcode, uint32_t id, + uint32_t uliteral) { // TODO(1841): Handle id overflow. std::unique_ptr newBinOp(new Instruction( GetContext(), opcode, type_id, GetContext()->TakeNextId(), - {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {operand1}}, - {spv_operand_type_t::SPV_OPERAND_TYPE_LITERAL_INTEGER, {operand2}}})); + {{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {id}}, + {spv_operand_type_t::SPV_OPERAND_TYPE_LITERAL_INTEGER, {uliteral}}})); return AddInstruction(std::move(newBinOp)); } diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp index 635b075ff2..78b7646fd4 100644 --- a/source/opt/optimizer.cpp +++ b/source/opt/optimizer.cpp @@ -415,6 +415,10 @@ bool Optimizer::RegisterPassFromFlag(const std::string& flag) { } else if (pass_name == "inst-buff-addr-check") { RegisterPass(CreateInstBuffAddrCheckPass(7, 23, 2)); RegisterPass(CreateAggressiveDCEPass()); + } else if (pass_name == "convert-relaxed-to-half") { + RegisterPass(CreateConvertRelaxedToHalfPass()); + } else if (pass_name == "relax-float-ops") { + RegisterPass(CreateRelaxFloatOpsPass()); } else if (pass_name == "simplify-instructions") { RegisterPass(CreateSimplificationPass()); } else if (pass_name == "ssa-rewrite") { @@ -877,6 +881,16 @@ Optimizer::PassToken CreateInstBuffAddrCheckPass(uint32_t desc_set, MakeUnique(desc_set, shader_id, version)); } +Optimizer::PassToken CreateConvertRelaxedToHalfPass() { + return MakeUnique( + MakeUnique()); +} + +Optimizer::PassToken CreateRelaxFloatOpsPass() { + return MakeUnique( + MakeUnique()); +} + Optimizer::PassToken CreateCodeSinkingPass() { return MakeUnique( MakeUnique()); diff --git a/source/opt/pass.cpp b/source/opt/pass.cpp index f9e4a5d47a..72d7ceabdd 100644 --- a/source/opt/pass.cpp +++ b/source/opt/pass.cpp @@ -54,6 +54,25 @@ uint32_t Pass::GetPointeeTypeId(const Instruction* ptrInst) const { return ptrTypeInst->GetSingleWordInOperand(kTypePointerTypeIdInIdx); } +Instruction* Pass::GetBaseType(uint32_t ty_id) { + Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id); + if (ty_inst->opcode() == SpvOpTypeMatrix) { + uint32_t vty_id = ty_inst->GetSingleWordInOperand(0); + ty_inst = get_def_use_mgr()->GetDef(vty_id); + } + if (ty_inst->opcode() == SpvOpTypeVector) { + uint32_t cty_id = ty_inst->GetSingleWordInOperand(0); + ty_inst = get_def_use_mgr()->GetDef(cty_id); + } + return ty_inst; +} + +bool Pass::IsFloat(uint32_t ty_id, uint32_t width) { + Instruction* ty_inst = GetBaseType(ty_id); + if (ty_inst->opcode() != SpvOpTypeFloat) return false; + return ty_inst->GetSingleWordInOperand(0) == width; +} + uint32_t Pass::GenerateCopy(Instruction* object_to_copy, uint32_t new_type_id, Instruction* insertion_position) { analysis::TypeManager* type_mgr = context()->get_type_mgr(); diff --git a/source/opt/pass.h b/source/opt/pass.h index 686e9fc1de..356e94dc56 100644 --- a/source/opt/pass.h +++ b/source/opt/pass.h @@ -109,6 +109,13 @@ class Pass { // Return type id for |ptrInst|'s pointee uint32_t GetPointeeTypeId(const Instruction* ptrInst) const; + // Return base type of |ty_id| type + Instruction* GetBaseType(uint32_t ty_id); + + // Return true if |inst| returns scalar, vector or matrix type with base + // float and |width| + bool IsFloat(uint32_t ty_id, uint32_t width); + protected: // Constructs a new pass. // diff --git a/source/opt/passes.h b/source/opt/passes.h index d53af8ff29..3d08f9085d 100644 --- a/source/opt/passes.h +++ b/source/opt/passes.h @@ -25,6 +25,7 @@ #include "source/opt/code_sink.h" #include "source/opt/combine_access_chains.h" #include "source/opt/compact_ids_pass.h" +#include "source/opt/convert_to_half_pass.h" #include "source/opt/copy_prop_arrays.h" #include "source/opt/dead_branch_elim_pass.h" #include "source/opt/dead_insert_elim_pass.h" @@ -63,6 +64,7 @@ #include "source/opt/process_lines_pass.h" #include "source/opt/reduce_load_size.h" #include "source/opt/redundancy_elimination.h" +#include "source/opt/relax_float_ops_pass.h" #include "source/opt/remove_duplicates_pass.h" #include "source/opt/replace_invalid_opc.h" #include "source/opt/scalar_replacement_pass.h" diff --git a/source/opt/relax_float_ops_pass.cpp b/source/opt/relax_float_ops_pass.cpp new file mode 100644 index 0000000000..73f16ddf3f --- /dev/null +++ b/source/opt/relax_float_ops_pass.cpp @@ -0,0 +1,178 @@ +// Copyright (c) 2019 The Khronos Group Inc. +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "relax_float_ops_pass.h" + +#include "source/opt/ir_builder.h" + +namespace spvtools { +namespace opt { + +bool RelaxFloatOpsPass::IsRelaxable(Instruction* inst) { + return target_ops_core_f_rslt_.count(inst->opcode()) != 0 || + target_ops_core_f_opnd_.count(inst->opcode()) != 0 || + sample_ops_.count(inst->opcode()) != 0 || + (inst->opcode() == SpvOpExtInst && + inst->GetSingleWordInOperand(0) == + context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() && + target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0); +} + +bool RelaxFloatOpsPass::IsFloat32(Instruction* inst) { + uint32_t ty_id; + if (target_ops_core_f_opnd_.count(inst->opcode()) != 0) { + uint32_t opnd_id = inst->GetSingleWordInOperand(0); + Instruction* opnd_inst = get_def_use_mgr()->GetDef(opnd_id); + ty_id = opnd_inst->type_id(); + } else { + ty_id = inst->type_id(); + if (ty_id == 0) return false; + } + return IsFloat(ty_id, 32); +} + +bool RelaxFloatOpsPass::IsRelaxed(uint32_t r_id) { + for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false)) + if (r_inst->opcode() == SpvOpDecorate && + r_inst->GetSingleWordInOperand(1) == SpvDecorationRelaxedPrecision) + return true; + return false; +} + +bool RelaxFloatOpsPass::ProcessInst(Instruction* r_inst) { + uint32_t r_id = r_inst->result_id(); + if (r_id == 0) return false; + if (!IsFloat32(r_inst)) return false; + if (IsRelaxed(r_id)) return false; + if (!IsRelaxable(r_inst)) return false; + get_decoration_mgr()->AddDecoration(r_id, SpvDecorationRelaxedPrecision); + return true; +} + +bool RelaxFloatOpsPass::ProcessFunction(Function* func) { + bool modified = false; + cfg()->ForEachBlockInReversePostOrder( + func->entry().get(), [&modified, this](BasicBlock* bb) { + for (auto ii = bb->begin(); ii != bb->end(); ++ii) + modified |= ProcessInst(&*ii); + }); + return modified; +} + +Pass::Status RelaxFloatOpsPass::ProcessImpl() { + Pass::ProcessFunction pfn = [this](Function* fp) { + return ProcessFunction(fp); + }; + bool modified = context()->ProcessEntryPointCallTree(pfn); + return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; +} + +Pass::Status RelaxFloatOpsPass::Process() { + Initialize(); + return ProcessImpl(); +} + +void RelaxFloatOpsPass::Initialize() { + target_ops_core_f_rslt_ = { + SpvOpLoad, + SpvOpPhi, + SpvOpVectorExtractDynamic, + SpvOpVectorInsertDynamic, + SpvOpVectorShuffle, + SpvOpCompositeExtract, + SpvOpCompositeConstruct, + SpvOpCompositeInsert, + SpvOpCopyObject, + SpvOpTranspose, + SpvOpConvertSToF, + SpvOpConvertUToF, + SpvOpFConvert, + // SpvOpQuantizeToF16, + SpvOpFNegate, + SpvOpFAdd, + SpvOpFSub, + SpvOpFMul, + SpvOpFDiv, + SpvOpFMod, + SpvOpVectorTimesScalar, + SpvOpMatrixTimesScalar, + SpvOpVectorTimesMatrix, + SpvOpMatrixTimesVector, + SpvOpMatrixTimesMatrix, + SpvOpOuterProduct, + SpvOpDot, + SpvOpSelect, + }; + target_ops_core_f_opnd_ = { + SpvOpFOrdEqual, + SpvOpFUnordEqual, + SpvOpFOrdNotEqual, + SpvOpFUnordNotEqual, + SpvOpFOrdLessThan, + SpvOpFUnordLessThan, + SpvOpFOrdGreaterThan, + SpvOpFUnordGreaterThan, + SpvOpFOrdLessThanEqual, + SpvOpFUnordLessThanEqual, + SpvOpFOrdGreaterThanEqual, + SpvOpFUnordGreaterThanEqual, + }; + target_ops_450_ = { + GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs, + GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract, + GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos, + GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan, + GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh, + GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow, + GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2, + GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant, + GLSLstd450MatrixInverse, + // TODO(greg-lunarg): GLSLstd450ModfStruct, + GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix, + GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma, + // TODO(greg-lunarg): GLSLstd450FrexpStruct, + GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross, + GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect, + GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp}; + sample_ops_ = {SpvOpImageSampleImplicitLod, + SpvOpImageSampleExplicitLod, + SpvOpImageSampleDrefImplicitLod, + SpvOpImageSampleDrefExplicitLod, + SpvOpImageSampleProjImplicitLod, + SpvOpImageSampleProjExplicitLod, + SpvOpImageSampleProjDrefImplicitLod, + SpvOpImageSampleProjDrefExplicitLod, + SpvOpImageFetch, + SpvOpImageGather, + SpvOpImageDrefGather, + SpvOpImageRead, + SpvOpImageSparseSampleImplicitLod, + SpvOpImageSparseSampleExplicitLod, + SpvOpImageSparseSampleDrefImplicitLod, + SpvOpImageSparseSampleDrefExplicitLod, + SpvOpImageSparseSampleProjImplicitLod, + SpvOpImageSparseSampleProjExplicitLod, + SpvOpImageSparseSampleProjDrefImplicitLod, + SpvOpImageSparseSampleProjDrefExplicitLod, + SpvOpImageSparseFetch, + SpvOpImageSparseGather, + SpvOpImageSparseDrefGather, + SpvOpImageSparseTexelsResident, + SpvOpImageSparseRead}; +} + +} // namespace opt +} // namespace spvtools diff --git a/source/opt/relax_float_ops_pass.h b/source/opt/relax_float_ops_pass.h new file mode 100644 index 0000000000..5ee3d73c8a --- /dev/null +++ b/source/opt/relax_float_ops_pass.h @@ -0,0 +1,80 @@ +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_ +#define LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_ + +#include "source/opt/ir_builder.h" +#include "source/opt/pass.h" + +namespace spvtools { +namespace opt { + +class RelaxFloatOpsPass : public Pass { + public: + RelaxFloatOpsPass() : Pass() {} + + ~RelaxFloatOpsPass() override = default; + + IRContext::Analysis GetPreservedAnalyses() override { + return IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping; + } + + // See optimizer.hpp for pass user documentation. + Status Process() override; + + const char* name() const override { return "convert-to-half-pass"; } + + private: + // Return true if |inst| can have the RelaxedPrecision decoration applied + // to it. + bool IsRelaxable(Instruction* inst); + + // Return true if |inst| returns scalar, vector or matrix type with base + // float and width 32 + bool IsFloat32(Instruction* inst); + + // Return true if |r_id| is decorated with RelaxedPrecision + bool IsRelaxed(uint32_t r_id); + + // If |inst| is an instruction of float32-based type and is not decorated + // RelaxedPrecision, add such a decoration to the module. + bool ProcessInst(Instruction* inst); + + // Call ProcessInst on every instruction in |func|. + bool ProcessFunction(Function* func); + + Pass::Status ProcessImpl(); + + // Initialize state for converting to half + void Initialize(); + + // Set of float result core operations to be processed + std::unordered_set target_ops_core_f_rslt_; + + // Set of float operand core operations to be processed + std::unordered_set target_ops_core_f_opnd_; + + // Set of 450 extension operations to be processed + std::unordered_set target_ops_450_; + + // Set of sample operations + std::unordered_set sample_ops_; +}; + +} // namespace opt +} // namespace spvtools + +#endif // LIBSPIRV_OPT_RELAX_FLOAT_OPS_PASS_H_ diff --git a/test/opt/CMakeLists.txt b/test/opt/CMakeLists.txt index 47ce41f0c2..327f265632 100644 --- a/test/opt/CMakeLists.txt +++ b/test/opt/CMakeLists.txt @@ -28,6 +28,7 @@ add_spvtools_unittest(TARGET opt compact_ids_test.cpp constants_test.cpp constant_manager_test.cpp + convert_relaxed_to_half_test.cpp copy_prop_array_test.cpp dead_branch_elim_test.cpp dead_insert_elim_test.cpp @@ -80,6 +81,7 @@ add_spvtools_unittest(TARGET opt reduce_load_size_test.cpp redundancy_elimination_test.cpp register_liveness.cpp + relax_float_ops_test.cpp replace_invalid_opc_test.cpp scalar_analysis.cpp scalar_replacement_test.cpp diff --git a/test/opt/convert_relaxed_to_half_test.cpp b/test/opt/convert_relaxed_to_half_test.cpp new file mode 100644 index 0000000000..3ac80099fb --- /dev/null +++ b/test/opt/convert_relaxed_to_half_test.cpp @@ -0,0 +1,1227 @@ +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Convert Relaxed to Half tests + +#include +#include + +#include "test/opt/assembly_builder.h" +#include "test/opt/pass_fixture.h" +#include "test/opt/pass_utils.h" + +namespace spvtools { +namespace opt { +namespace { + +using ConvertToHalfTest = PassTest<::testing::Test>; + +TEST_F(ConvertToHalfTest, ConvertToHalfBasic) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_INPUT + // { + // float Tex0 : TEXCOORD0; + // }; + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // float c; + // } + // + // PS_OUTPUT main(PS_INPUT i) + // { + // PS_OUTPUT psout; + // psout.Color = g_tTex1df4.Sample(g_sSamp, i.Tex0) * c; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %48 RelaxedPrecision +OpDecorate %63 RelaxedPrecision +OpDecorate %65 RelaxedPrecision +OpDecorate %66 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%19 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_19 = OpTypePointer UniformConstant %19 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_19 UniformConstant +%23 = OpTypeSampler +%_ptr_UniformConstant_23 = OpTypePointer UniformConstant %23 +%g_sSamp = OpVariable %_ptr_UniformConstant_23 UniformConstant +%27 = OpTypeSampledImage %19 +%cbuff = OpTypeStruct %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%19 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_19 = OpTypePointer UniformConstant %19 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_19 UniformConstant +%23 = OpTypeSampler +%_ptr_UniformConstant_23 = OpTypePointer UniformConstant %23 +%g_sSamp = OpVariable %_ptr_UniformConstant_23 UniformConstant +%27 = OpTypeSampledImage %19 +%cbuff = OpTypeStruct %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%48 = OpLoad %float %i_Tex0 +%58 = OpLoad %19 %g_tTex1df4 +%59 = OpLoad %23 %g_sSamp +%60 = OpSampledImage %27 %58 %59 +%63 = OpImageSampleImplicitLod %v4float %60 %48 +%64 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%65 = OpLoad %float %64 +%66 = OpVectorTimesScalar %v4float %63 %65 +OpStore %_entryPointOutput_Color %66 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%48 = OpLoad %float %i_Tex0 +%58 = OpLoad %19 %g_tTex1df4 +%59 = OpLoad %23 %g_sSamp +%60 = OpSampledImage %27 %58 %59 +%63 = OpImageSampleImplicitLod %v4float %60 %48 +%64 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%65 = OpLoad %float %64 +%69 = OpFConvert %v4half %63 +%70 = OpFConvert %half %65 +%66 = OpVectorTimesScalar %v4half %69 %70 +%71 = OpFConvert %v4float %66 +OpStore %_entryPointOutput_Color %71 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithDrefSample) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // clang-format off + // + // SamplerComparisonState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // cbuffer cbuff{ + // float c1; + // float c2; + // }; + // + // struct PS_INPUT + // { + // float Tex0 : TEXCOORD0; + // float Tex1 : TEXCOORD1; + // }; + // + // struct PS_OUTPUT + // { + // float Color : SV_Target0; + // }; + // + // PS_OUTPUT main(PS_INPUT i) + // { + // PS_OUTPUT psout; + // float txval10 = g_tTex1df4.SampleCmp(g_sSamp, i.Tex0 * 0.1, c1 + 0.1); + // float txval11 = g_tTex1df4.SampleCmp(g_sSamp, i.Tex1 * 0.2, c2 + 0.2); + // float t = txval10 + txval11; + // float t2 = t / 2.0; + // psout.Color = t2; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %i_Tex1 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c1" +OpMemberName %cbuff 1 "c2" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %i_Tex1 "i.Tex1" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 1 Offset 4 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %i_Tex1 Location 1 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %100 RelaxedPrecision +OpDecorate %76 RelaxedPrecision +OpDecorate %79 RelaxedPrecision +OpDecorate %98 RelaxedPrecision +OpDecorate %101 RelaxedPrecision +OpDecorate %110 RelaxedPrecision +OpDecorate %102 RelaxedPrecision +OpDecorate %112 RelaxedPrecision +OpDecorate %104 RelaxedPrecision +OpDecorate %113 RelaxedPrecision +OpDecorate %114 RelaxedPrecision +OpDecorate %116 RelaxedPrecision +OpDecorate %119 RelaxedPrecision +OpDecorate %121 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%16 = OpTypeImage %float 1D 1 0 0 1 Unknown +%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_16 UniformConstant +%20 = OpTypeSampler +%_ptr_UniformConstant_20 = OpTypePointer UniformConstant %20 +%g_sSamp = OpVariable %_ptr_UniformConstant_20 UniformConstant +%24 = OpTypeSampledImage %16 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%float_0_100000001 = OpConstant %float 0.100000001 +%cbuff = OpTypeStruct %float %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%v2float = OpTypeVector %float 2 +%int_1 = OpConstant %int 1 +%float_0_200000003 = OpConstant %float 0.200000003 +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%i_Tex1 = OpVariable %_ptr_Input_float Input +%_ptr_Output_float = OpTypePointer Output %float +%_entryPointOutput_Color = OpVariable %_ptr_Output_float Output +%float_0_5 = OpConstant %float 0.5 +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %i_Tex1 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c1" +OpMemberName %cbuff 1 "c2" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %i_Tex1 "i.Tex1" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 1 Offset 4 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %i_Tex1 Location 1 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%25 = OpTypeFunction %void +%float = OpTypeFloat 32 +%27 = OpTypeImage %float 1D 1 0 0 1 Unknown +%_ptr_UniformConstant_27 = OpTypePointer UniformConstant %27 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_27 UniformConstant +%29 = OpTypeSampler +%_ptr_UniformConstant_29 = OpTypePointer UniformConstant %29 +%g_sSamp = OpVariable %_ptr_UniformConstant_29 UniformConstant +%31 = OpTypeSampledImage %27 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%float_0_100000001 = OpConstant %float 0.100000001 +%cbuff = OpTypeStruct %float %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%v2float = OpTypeVector %float 2 +%int_1 = OpConstant %int 1 +%float_0_200000003 = OpConstant %float 0.200000003 +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%i_Tex1 = OpVariable %_ptr_Input_float Input +%_ptr_Output_float = OpTypePointer Output %float +%_entryPointOutput_Color = OpVariable %_ptr_Output_float Output +%float_0_5 = OpConstant %float 0.5 +%half = OpTypeFloat 16 +%v2half = OpTypeVector %half 2 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%76 = OpLoad %float %i_Tex0 +%79 = OpLoad %float %i_Tex1 +%93 = OpLoad %16 %g_tTex1df4 +%94 = OpLoad %20 %g_sSamp +%95 = OpSampledImage %24 %93 %94 +%98 = OpFMul %float %76 %float_0_100000001 +%99 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%100 = OpLoad %float %99 +%101 = OpFAdd %float %100 %float_0_100000001 +%102 = OpCompositeConstruct %v2float %98 %101 +%104 = OpImageSampleDrefImplicitLod %float %95 %102 %101 +%105 = OpLoad %16 %g_tTex1df4 +%106 = OpLoad %20 %g_sSamp +%107 = OpSampledImage %24 %105 %106 +%110 = OpFMul %float %79 %float_0_200000003 +%111 = OpAccessChain %_ptr_Uniform_float %_ %int_1 +%112 = OpLoad %float %111 +%113 = OpFAdd %float %112 %float_0_200000003 +%114 = OpCompositeConstruct %v2float %110 %113 +%116 = OpImageSampleDrefImplicitLod %float %107 %114 %113 +%119 = OpFAdd %float %104 %116 +%121 = OpFMul %float %119 %float_0_5 +OpStore %_entryPointOutput_Color %121 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %25 +%43 = OpLabel +%11 = OpLoad %float %i_Tex0 +%12 = OpLoad %float %i_Tex1 +%44 = OpLoad %27 %g_tTex1df4 +%45 = OpLoad %29 %g_sSamp +%46 = OpSampledImage %31 %44 %45 +%53 = OpFConvert %half %11 +%54 = OpFConvert %half %float_0_100000001 +%13 = OpFMul %half %53 %54 +%47 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%10 = OpLoad %float %47 +%55 = OpFConvert %half %10 +%56 = OpFConvert %half %float_0_100000001 +%14 = OpFAdd %half %55 %56 +%16 = OpCompositeConstruct %v2half %13 %14 +%58 = OpFConvert %float %14 +%18 = OpImageSampleDrefImplicitLod %float %46 %16 %58 +%48 = OpLoad %27 %g_tTex1df4 +%49 = OpLoad %29 %g_sSamp +%50 = OpSampledImage %31 %48 %49 +%59 = OpFConvert %half %12 +%60 = OpFConvert %half %float_0_200000003 +%15 = OpFMul %half %59 %60 +%51 = OpAccessChain %_ptr_Uniform_float %_ %int_1 +%17 = OpLoad %float %51 +%61 = OpFConvert %half %17 +%62 = OpFConvert %half %float_0_200000003 +%19 = OpFAdd %half %61 %62 +%20 = OpCompositeConstruct %v2half %15 %19 +%63 = OpFConvert %float %19 +%21 = OpImageSampleDrefImplicitLod %float %50 %20 %63 +%64 = OpFConvert %half %18 +%65 = OpFConvert %half %21 +%22 = OpFAdd %half %64 %65 +%66 = OpFConvert %half %float_0_5 +%23 = OpFMul %half %22 %66 +%67 = OpFConvert %float %23 +OpStore %_entryPointOutput_Color %67 +OpReturn +OpFunctionEnd +)"; + + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithVectorMatrixMult) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // float4x4 M; + // } + // + // PS_OUTPUT main() + // { + // PS_OUTPUT psout; + // float4 txval10 = g_tTex1df4.Sample(g_sSamp, 0.1); + // float4 t = mul(txval10, M); + // psout.Color = t; + // return psout; + //} + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "M" +OpName %_ "" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 RowMajor +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 0 MatrixStride 16 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %56 RelaxedPrecision +OpDecorate %58 RelaxedPrecision +OpDecorate %60 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%14 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_14 = OpTypePointer UniformConstant %14 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_14 UniformConstant +%18 = OpTypeSampler +%_ptr_UniformConstant_18 = OpTypePointer UniformConstant %18 +%g_sSamp = OpVariable %_ptr_UniformConstant_18 UniformConstant +%22 = OpTypeSampledImage %14 +%float_0_100000001 = OpConstant %float 0.100000001 +%mat4v4float = OpTypeMatrix %v4float 4 +%cbuff = OpTypeStruct %mat4v4float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "M" +OpName %_ "" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 RowMajor +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 0 MatrixStride 16 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%14 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_14 = OpTypePointer UniformConstant %14 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_14 UniformConstant +%18 = OpTypeSampler +%_ptr_UniformConstant_18 = OpTypePointer UniformConstant %18 +%g_sSamp = OpVariable %_ptr_UniformConstant_18 UniformConstant +%22 = OpTypeSampledImage %14 +%float_0_100000001 = OpConstant %float 0.100000001 +%mat4v4float = OpTypeMatrix %v4float 4 +%cbuff = OpTypeStruct %mat4v4float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +%mat4v4half = OpTypeMatrix %v4half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%53 = OpLoad %14 %g_tTex1df4 +%54 = OpLoad %18 %g_sSamp +%55 = OpSampledImage %22 %53 %54 +%56 = OpImageSampleImplicitLod %v4float %55 %float_0_100000001 +%57 = OpAccessChain %_ptr_Uniform_mat4v4float %_ %int_0 +%58 = OpLoad %mat4v4float %57 +%60 = OpMatrixTimesVector %v4float %58 %56 +OpStore %_entryPointOutput_Color %60 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%53 = OpLoad %14 %g_tTex1df4 +%54 = OpLoad %18 %g_sSamp +%55 = OpSampledImage %22 %53 %54 +%56 = OpImageSampleImplicitLod %v4float %55 %float_0_100000001 +%57 = OpAccessChain %_ptr_Uniform_mat4v4float %_ %int_0 +%58 = OpLoad %mat4v4float %57 +%67 = OpCompositeExtract %v4float %58 0 +%68 = OpFConvert %v4half %67 +%69 = OpCompositeExtract %v4float %58 1 +%70 = OpFConvert %v4half %69 +%71 = OpCompositeExtract %v4float %58 2 +%72 = OpFConvert %v4half %71 +%73 = OpCompositeExtract %v4float %58 3 +%74 = OpFConvert %v4half %73 +%75 = OpCompositeConstruct %mat4v4half %68 %70 %72 %74 +%64 = OpCopyObject %mat4v4float %58 +%65 = OpFConvert %v4half %56 +%60 = OpMatrixTimesVector %v4half %75 %65 +%66 = OpFConvert %v4float %60 +OpStore %_entryPointOutput_Color %66 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithPhi) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // bool b; + // float4x4 M; + // } + // + // PS_OUTPUT main() + // { + // PS_OUTPUT psout; + // float4 t; + // + // if (b) + // t = g_tTex1df4.Sample(g_sSamp, 0.1); + // else + // t = float4(0.0, 0.0, 0.0, 0.0); + // + // float4 t2 = t * 2.0; + // psout.Color = t2; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "b" +OpMemberName %cbuff 1 "M" +OpName %_ "" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 1 RowMajor +OpMemberDecorate %cbuff 1 Offset 16 +OpMemberDecorate %cbuff 1 MatrixStride 16 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %72 RelaxedPrecision +OpDecorate %85 RelaxedPrecision +OpDecorate %74 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%uint = OpTypeInt 32 0 +%mat4v4float = OpTypeMatrix %v4float 4 +%cbuff = OpTypeStruct %uint %mat4v4float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%bool = OpTypeBool +%uint_0 = OpConstant %uint 0 +%29 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_29 = OpTypePointer UniformConstant %29 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_29 UniformConstant +%33 = OpTypeSampler +%_ptr_UniformConstant_33 = OpTypePointer UniformConstant %33 +%g_sSamp = OpVariable %_ptr_UniformConstant_33 UniformConstant +%37 = OpTypeSampledImage %29 +%float_0_100000001 = OpConstant %float 0.100000001 +%float_0 = OpConstant %float 0 +%43 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%float_2 = OpConstant %float 2 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "b" +OpMemberName %cbuff 1 "M" +OpName %_ "" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpMemberDecorate %cbuff 0 Offset 0 +OpMemberDecorate %cbuff 1 RowMajor +OpMemberDecorate %cbuff 1 Offset 16 +OpMemberDecorate %cbuff 1 MatrixStride 16 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%uint = OpTypeInt 32 0 +%mat4v4float = OpTypeMatrix %v4float 4 +%cbuff = OpTypeStruct %uint %mat4v4float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%bool = OpTypeBool +%uint_0 = OpConstant %uint 0 +%29 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_29 = OpTypePointer UniformConstant %29 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_29 UniformConstant +%33 = OpTypeSampler +%_ptr_UniformConstant_33 = OpTypePointer UniformConstant %33 +%g_sSamp = OpVariable %_ptr_UniformConstant_33 UniformConstant +%37 = OpTypeSampledImage %29 +%float_0_100000001 = OpConstant %float 0.100000001 +%float_0 = OpConstant %float 0 +%43 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%float_2 = OpConstant %float 2 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%63 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 +%64 = OpLoad %uint %63 +%65 = OpINotEqual %bool %64 %uint_0 +OpSelectionMerge %66 None +OpBranchConditional %65 %67 %68 +%67 = OpLabel +%69 = OpLoad %29 %g_tTex1df4 +%70 = OpLoad %33 %g_sSamp +%71 = OpSampledImage %37 %69 %70 +%72 = OpImageSampleImplicitLod %v4float %71 %float_0_100000001 +OpBranch %66 +%68 = OpLabel +OpBranch %66 +%66 = OpLabel +%85 = OpPhi %v4float %72 %67 %43 %68 +%74 = OpVectorTimesScalar %v4float %85 %float_2 +OpStore %_entryPointOutput_Color %74 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%63 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 +%64 = OpLoad %uint %63 +%65 = OpINotEqual %bool %64 %uint_0 +OpSelectionMerge %66 None +OpBranchConditional %65 %67 %68 +%67 = OpLabel +%69 = OpLoad %29 %g_tTex1df4 +%70 = OpLoad %33 %g_sSamp +%71 = OpSampledImage %37 %69 %70 +%72 = OpImageSampleImplicitLod %v4float %71 %float_0_100000001 +%88 = OpFConvert %v4half %72 +OpBranch %66 +%68 = OpLabel +%89 = OpFConvert %v4half %43 +OpBranch %66 +%66 = OpLabel +%85 = OpPhi %v4half %88 %67 %89 %68 +%90 = OpFConvert %half %float_2 +%74 = OpVectorTimesScalar %v4half %85 %90 +%91 = OpFConvert %v4float %74 +OpStore %_entryPointOutput_Color %91 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithLoopAndFConvert) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // The loop causes an FConvert to be generated at the bottom of the loop + // for the Phi. The FConvert is later processed and turned into a (dead) + // copy. + // + // clang-format off + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // float4 a[10]; + // } + // + // PS_OUTPUT main() + // { + // PS_OUTPUT psout; + // float4 t = 0.0;; + // + // for (int i = 0; i<10; ++i) + // t = t + a[i]; + // + // float4 t2 = t / 10.0; + // psout.Color = t2; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "a" +OpName %_ "" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %_arr_v4float_uint_10 ArrayStride 16 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 0 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %96 RelaxedPrecision +OpDecorate %81 RelaxedPrecision +OpDecorate %75 RelaxedPrecision +OpDecorate %76 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%float_0 = OpConstant %float 0 +%15 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%int_10 = OpConstant %int 10 +%bool = OpTypeBool +%uint = OpTypeInt 32 0 +%uint_10 = OpConstant %uint 10 +%_arr_v4float_uint_10 = OpTypeArray %v4float %uint_10 +%cbuff = OpTypeStruct %_arr_v4float_uint_10 +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%int_1 = OpConstant %int 1 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%float_0_100000001 = OpConstant %float 0.100000001 +%94 = OpConstantComposite %v4float %float_0_100000001 %float_0_100000001 %float_0_100000001 %float_0_100000001 +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "a" +OpName %_ "" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %_arr_v4float_uint_10 ArrayStride 16 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 0 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%float_0 = OpConstant %float 0 +%15 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%int_10 = OpConstant %int 10 +%bool = OpTypeBool +%uint = OpTypeInt 32 0 +%uint_10 = OpConstant %uint 10 +%_arr_v4float_uint_10 = OpTypeArray %v4float %uint_10 +%cbuff = OpTypeStruct %_arr_v4float_uint_10 +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%int_1 = OpConstant %int 1 +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%float_0_100000001 = OpConstant %float 0.100000001 +%94 = OpConstantComposite %v4float %float_0_100000001 %float_0_100000001 %float_0_100000001 %float_0_100000001 +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +OpBranch %65 +%65 = OpLabel +%96 = OpPhi %v4float %15 %5 %76 %71 +%95 = OpPhi %int %int_0 %5 %78 %71 +%70 = OpSLessThan %bool %95 %int_10 +OpLoopMerge %66 %71 None +OpBranchConditional %70 %71 %66 +%71 = OpLabel +%74 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %95 +%75 = OpLoad %v4float %74 +%76 = OpFAdd %v4float %96 %75 +%78 = OpIAdd %int %95 %int_1 +OpBranch %65 +%66 = OpLabel +%81 = OpFMul %v4float %96 %94 +OpStore %_entryPointOutput_Color %81 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%99 = OpFConvert %v4half %15 +OpBranch %65 +%65 = OpLabel +%96 = OpPhi %v4half %99 %5 %76 %71 +%95 = OpPhi %int %int_0 %5 %78 %71 +%70 = OpSLessThan %bool %95 %int_10 +OpLoopMerge %66 %71 None +OpBranchConditional %70 %71 %66 +%71 = OpLabel +%74 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %95 +%75 = OpLoad %v4float %74 +%103 = OpFConvert %v4half %75 +%76 = OpFAdd %v4half %96 %103 +%78 = OpIAdd %int %95 %int_1 +%100 = OpCopyObject %v4half %76 +OpBranch %65 +%66 = OpLabel +%101 = OpFConvert %v4half %94 +%81 = OpFMul %v4half %96 %101 +%102 = OpFConvert %v4float %81 +OpStore %_entryPointOutput_Color %102 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +TEST_F(ConvertToHalfTest, ConvertToHalfWithExtracts) { + // The resulting SPIR-V was processed with --relax-float-ops. + // + // The extra converts in the func_after can be DCE'd. + // + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_INPUT + // { + // float Tex0 : TEXCOORD0; + // }; + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // cbuffer cbuff{ + // float c; + // } + // + // PS_OUTPUT main(PS_INPUT i) + // { + // PS_OUTPUT psout; + // float4 tx = g_tTex1df4.Sample(g_sSamp, i.Tex0); + // float4 t = float4(tx.y, tx.z, tx.x, tx.w) * c; + // psout.Color = t; + // return psout; + // } + // + // clang-format on + + const std::string defs_before = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %_entryPointOutput_Color Location 0 +OpDecorate %65 RelaxedPrecision +OpDecorate %82 RelaxedPrecision +OpDecorate %84 RelaxedPrecision +OpDecorate %86 RelaxedPrecision +OpDecorate %88 RelaxedPrecision +OpDecorate %90 RelaxedPrecision +OpDecorate %91 RelaxedPrecision +OpDecorate %93 RelaxedPrecision +OpDecorate %94 RelaxedPrecision +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%17 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_17 UniformConstant +%21 = OpTypeSampler +%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %21 +%g_sSamp = OpVariable %_ptr_UniformConstant_21 UniformConstant +%25 = OpTypeSampledImage %17 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%cbuff = OpTypeStruct %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +)"; + + const std::string defs_after = + R"(OpCapability Shader +OpCapability Sampled1D +OpCapability Float16 +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %cbuff "cbuff" +OpMemberName %cbuff 0 "c" +OpName %_ "" +OpName %i_Tex0 "i.Tex0" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpMemberDecorate %cbuff 0 Offset 0 +OpDecorate %cbuff Block +OpDecorate %_ DescriptorSet 0 +OpDecorate %_ Binding 1 +OpDecorate %i_Tex0 Location 0 +OpDecorate %_entryPointOutput_Color Location 0 +%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%17 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_17 UniformConstant +%21 = OpTypeSampler +%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %21 +%g_sSamp = OpVariable %_ptr_UniformConstant_21 UniformConstant +%25 = OpTypeSampledImage %17 +%int = OpTypeInt 32 1 +%int_0 = OpConstant %int 0 +%cbuff = OpTypeStruct %float +%_ptr_Uniform_cbuff = OpTypePointer Uniform %cbuff +%_ = OpVariable %_ptr_Uniform_cbuff Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%half = OpTypeFloat 16 +%v4half = OpTypeVector %half 4 +)"; + + const std::string func_before = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%65 = OpLoad %float %i_Tex0 +%77 = OpLoad %17 %g_tTex1df4 +%78 = OpLoad %21 %g_sSamp +%79 = OpSampledImage %25 %77 %78 +%82 = OpImageSampleImplicitLod %v4float %79 %65 +%84 = OpCompositeExtract %float %82 1 +%86 = OpCompositeExtract %float %82 2 +%88 = OpCompositeExtract %float %82 0 +%90 = OpCompositeExtract %float %82 3 +%91 = OpCompositeConstruct %v4float %84 %86 %88 %90 +%92 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%93 = OpLoad %float %92 +%94 = OpVectorTimesScalar %v4float %91 %93 +OpStore %_entryPointOutput_Color %94 +OpReturn +OpFunctionEnd +)"; + + const std::string func_after = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%65 = OpLoad %float %i_Tex0 +%77 = OpLoad %17 %g_tTex1df4 +%78 = OpLoad %21 %g_sSamp +%79 = OpSampledImage %25 %77 %78 +%82 = OpImageSampleImplicitLod %v4float %79 %65 +%97 = OpFConvert %v4half %82 +%84 = OpCompositeExtract %half %97 1 +%98 = OpFConvert %v4half %82 +%86 = OpCompositeExtract %half %98 2 +%99 = OpFConvert %v4half %82 +%88 = OpCompositeExtract %half %99 0 +%100 = OpFConvert %v4half %82 +%90 = OpCompositeExtract %half %100 3 +%91 = OpCompositeConstruct %v4half %84 %86 %88 %90 +%92 = OpAccessChain %_ptr_Uniform_float %_ %int_0 +%93 = OpLoad %float %92 +%101 = OpFConvert %half %93 +%94 = OpVectorTimesScalar %v4half %91 %101 +%102 = OpFConvert %v4float %94 +OpStore %_entryPointOutput_Color %102 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck(defs_before + func_before, + defs_after + func_after, true, true); +} + +} // namespace +} // namespace opt +} // namespace spvtools diff --git a/test/opt/relax_float_ops_test.cpp b/test/opt/relax_float_ops_test.cpp new file mode 100644 index 0000000000..14cde0b94d --- /dev/null +++ b/test/opt/relax_float_ops_test.cpp @@ -0,0 +1,142 @@ +// Copyright (c) 2019 Valve Corporation +// Copyright (c) 2019 LunarG Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Relax float ops tests + +#include +#include + +#include "test/opt/assembly_builder.h" +#include "test/opt/pass_fixture.h" +#include "test/opt/pass_utils.h" + +namespace spvtools { +namespace opt { +namespace { + +using RelaxFloatOpsTest = PassTest<::testing::Test>; + +TEST_F(RelaxFloatOpsTest, RelaxFloatOpsBasic) { + // All float result instructions in functions should be relaxed + // clang-format off + // + // SamplerState g_sSamp : register(s0); + // uniform Texture1D g_tTex1df4 : register(t0); + // + // struct PS_INPUT + // { + // float Tex0 : TEXCOORD0; + // float Tex1 : TEXCOORD1; + // }; + // + // struct PS_OUTPUT + // { + // float4 Color : SV_Target0; + // }; + // + // PS_OUTPUT main(PS_INPUT i) + // { + // PS_OUTPUT psout; + // float4 txval10 = g_tTex1df4.Sample(g_sSamp, i.Tex0); + // float4 txval11 = g_tTex1df4.Sample(g_sSamp, i.Tex1); + // float4 t = txval10 + txval11; + // float4 t2 = t / 2.0; + // psout.Color = t2; + // return psout; + // } + // clang-format on + + const std::string defs0 = + R"(OpCapability Shader +OpCapability Sampled1D +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %main "main" %i_Tex0 %i_Tex1 %_entryPointOutput_Color +OpExecutionMode %main OriginUpperLeft +OpSource HLSL 500 +OpName %main "main" +OpName %g_tTex1df4 "g_tTex1df4" +OpName %g_sSamp "g_sSamp" +OpName %i_Tex0 "i.Tex0" +OpName %i_Tex1 "i.Tex1" +OpName %_entryPointOutput_Color "@entryPointOutput.Color" +OpDecorate %g_tTex1df4 DescriptorSet 0 +OpDecorate %g_tTex1df4 Binding 0 +OpDecorate %g_sSamp DescriptorSet 0 +OpDecorate %g_sSamp Binding 0 +OpDecorate %i_Tex0 Location 0 +OpDecorate %i_Tex1 Location 1 +OpDecorate %_entryPointOutput_Color Location 0 +)"; + + const std::string defs1 = + R"(%void = OpTypeVoid +%3 = OpTypeFunction %void +%float = OpTypeFloat 32 +%v4float = OpTypeVector %float 4 +%17 = OpTypeImage %float 1D 0 0 0 1 Unknown +%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17 +%g_tTex1df4 = OpVariable %_ptr_UniformConstant_17 UniformConstant +%21 = OpTypeSampler +%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %21 +%g_sSamp = OpVariable %_ptr_UniformConstant_21 UniformConstant +%25 = OpTypeSampledImage %17 +%_ptr_Input_float = OpTypePointer Input %float +%i_Tex0 = OpVariable %_ptr_Input_float Input +%i_Tex1 = OpVariable %_ptr_Input_float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput_Color = OpVariable %_ptr_Output_v4float Output +%float_0_5 = OpConstant %float 0.5 +%116 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5 +)"; + + const std::string relax_decos = + R"(OpDecorate %60 RelaxedPrecision +OpDecorate %63 RelaxedPrecision +OpDecorate %82 RelaxedPrecision +OpDecorate %88 RelaxedPrecision +OpDecorate %91 RelaxedPrecision +OpDecorate %94 RelaxedPrecision +)"; + + const std::string func_orig = + R"(%main = OpFunction %void None %3 +%5 = OpLabel +%60 = OpLoad %float %i_Tex0 +%63 = OpLoad %float %i_Tex1 +%77 = OpLoad %17 %g_tTex1df4 +%78 = OpLoad %21 %g_sSamp +%79 = OpSampledImage %25 %77 %78 +%82 = OpImageSampleImplicitLod %v4float %79 %60 +%83 = OpLoad %17 %g_tTex1df4 +%84 = OpLoad %21 %g_sSamp +%85 = OpSampledImage %25 %83 %84 +%88 = OpImageSampleImplicitLod %v4float %85 %63 +%91 = OpFAdd %v4float %82 %88 +%94 = OpFMul %v4float %91 %116 +OpStore %_entryPointOutput_Color %94 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SinglePassRunAndCheck( + defs0 + defs1 + func_orig, defs0 + relax_decos + defs1 + func_orig, true, + true); +} + +} // namespace +} // namespace opt +} // namespace spvtools diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index b229c84137..df76677cd5 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -141,6 +141,16 @@ Options (in lexicographical order):)", and constant index access chains in entry point call tree functions.)"); printf(R"( + --convert-relaxed-to-half + Convert all RelaxedPrecision arithmetic operations to half + precision, inserting conversion operations where needed. + Run after function scope variable load and store elimination + for better results. Simplify-instructions, redundancy-elimination + and DCE should be run after this pass to eliminate excess + conversions. This conversion is useful when the target platform + does not support RelaxedPrecision or ignores it. This pass also + removes all RelaxedPrecision decorations.)"); + printf(R"( --copy-propagate-arrays Does propagation of memory references when an array is a copy of another. It will only propagate an array if the source is never @@ -393,6 +403,10 @@ Options (in lexicographical order):)", Looks for instructions in the same function that compute the same value, and deletes the redundant ones.)"); printf(R"( + --relax-float-ops + Decorate all float operations with RelaxedPrecision if not already + so decorated. This does not decorate types or variables.)"); + printf(R"( --relax-struct-store Allow store from one struct type to a different type with compatible layout and members. This option is forwarded to the