From c72af29f0bd82678918b941654aff892fbd7dc8a Mon Sep 17 00:00:00 2001 From: Aleksei Nurmukhametov Date: Thu, 29 Feb 2024 08:59:44 -0800 Subject: [PATCH] llvm_patches: add x86-isel fix This backported patch fixes generation of redundant vmovd, vpinsrd instructions. Test case added as XFAIL until LLVM rebuild in CI. --- ...auxShuffleMask-handle-fp-int-bitcast.patch | 40 +++++++++++++++++++ tests/lit-tests/2777.ispc | 34 ++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch create mode 100644 tests/lit-tests/2777.ispc diff --git a/llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch b/llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch new file mode 100644 index 0000000000..f197b35051 --- /dev/null +++ b/llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch @@ -0,0 +1,40 @@ +# This patch is required to fix the issue #2777 +From 7aa6ddfea210b6b114ac649c4d3219138e9cc52a Mon Sep 17 00:00:00 2001 +From: Simon Pilgrim +Date: Thu, 29 Feb 2024 10:32:37 +0000 +Subject: [PATCH] [X86] getFauxShuffleMask - handle + insert_vector_elt(bitcast(extract_vector_elt(x))) shuffle patterns + +If the bitcast is between types of equal scalar size (i.e. fp<->int bitcasts), then we can safely peek through them + +Fixes #83289 +--- + llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp +index e43b33eed470..22eea8b3d43d 100644 +--- a/llvm/lib/Target/X86/X86ISelLowering.cpp ++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp +@@ -8477,13 +8477,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, + } + } + +- // Peek through trunc/aext/zext. ++ // Peek through trunc/aext/zext/bitcast. + // TODO: aext shouldn't require SM_SentinelZero padding. + // TODO: handle shift of scalars. + unsigned MinBitsPerElt = Scl.getScalarValueSizeInBits(); + while (Scl.getOpcode() == ISD::TRUNCATE || + Scl.getOpcode() == ISD::ANY_EXTEND || +- Scl.getOpcode() == ISD::ZERO_EXTEND) { ++ Scl.getOpcode() == ISD::ZERO_EXTEND || ++ (Scl.getOpcode() == ISD::BITCAST && ++ Scl.getScalarValueSizeInBits() == ++ Scl.getOperand(0).getScalarValueSizeInBits())) { + Scl = Scl.getOperand(0); + MinBitsPerElt = + std::min(MinBitsPerElt, Scl.getScalarValueSizeInBits()); +-- +2.25.1 + diff --git a/tests/lit-tests/2777.ispc b/tests/lit-tests/2777.ispc new file mode 100644 index 0000000000..ec7656629a --- /dev/null +++ b/tests/lit-tests/2777.ispc @@ -0,0 +1,34 @@ +// RUN: %{ispc} %s --target=avx2-i32x8 --emit-asm -o - | FileCheck %s + +// REQUIRES: X86_ENABLED && LLVM_16_0+ + +// XFAIL: * + +// CHECK-NOT: vmovd +// CHECK-NOT: vpinsrd + +struct FVector4f +{ + float V[4]; +}; + +// Extra vmovd, vpinsrd after 2x vmaxps +inline uniform FVector4f VectorMax(const uniform FVector4f& V1, const uniform FVector4f& V2) +{ + varying float S0, S1, Result; + *((uniform FVector4f *uniform)&S0) = *((uniform FVector4f *uniform)&V1); + *((uniform FVector4f *uniform)&S1) = *((uniform FVector4f *uniform)&V2); + + Result = max(S0, S1); + + return *((uniform FVector4f *uniform)&Result); +} + +export void foo(uniform float A[], uniform float B[]) +{ + + uniform FVector4f *uniform pA = (uniform FVector4f *uniform)A; + uniform FVector4f *uniform pB = (uniform FVector4f *uniform)B; + + *pA = VectorMax(*pA, *pB); +}