package created using the webbuild interface [release 1.23.0-1mamba;Sat May 04 2024]
This commit is contained in:
parent
4518cd0bab
commit
15f0781892
@ -1,2 +1,4 @@
|
|||||||
# ispc
|
# ispc
|
||||||
|
|
||||||
|
Compiler for high-performance SIMD programming on the CPU.
|
||||||
|
|
||||||
|
366
ispc-1.23.0-llvm-18.1.patch
Normal file
366
ispc-1.23.0-llvm-18.1.patch
Normal file
@ -0,0 +1,366 @@
|
|||||||
|
From 71f6866107da93bb963e9e1a5d2313df493a7ce5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Arina Neshlyaeva <arina.neshlyaeva@intel.com>
|
||||||
|
Date: Wed, 14 Feb 2024 09:05:23 -0800
|
||||||
|
Subject: [PATCH] Fix ISPC build with LLVM 18.1 and trunk
|
||||||
|
|
||||||
|
---
|
||||||
|
alloy.py | 4 +-
|
||||||
|
...uxShuffleMask-handle-fp-int-bitcast.patch} | 0
|
||||||
|
...> 16_0_17_0_18_1_dbghelp_mitigation.patch} | 0
|
||||||
|
...ble-A-B-A-B-and-BSWAP-in-InstCombine.patch | 64 +++++++++++++++++++
|
||||||
|
src/ispc.cpp | 12 +++-
|
||||||
|
src/ispc_version.h | 9 +--
|
||||||
|
src/module.cpp | 19 +++++-
|
||||||
|
src/opt/ISPCPass.h | 6 +-
|
||||||
|
src/type.cpp | 6 +-
|
||||||
|
superbuild/CMakeLists.txt | 5 +-
|
||||||
|
tests/lit-tests/ftz_daz_flags_x86.ispc | 10 +--
|
||||||
|
tests/lit-tests/ftz_daz_flags_x86_64.ispc | 10 +--
|
||||||
|
12 files changed, 119 insertions(+), 26 deletions(-)
|
||||||
|
rename llvm_patches/{16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch => 16_0_17_0_18_1-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch} (100%)
|
||||||
|
rename llvm_patches/{16_0_17_0_dbghelp_mitigation.patch => 16_0_17_0_18_1_dbghelp_mitigation.patch} (100%)
|
||||||
|
create mode 100644 llvm_patches/18_1_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch
|
||||||
|
|
||||||
|
diff --git a/alloy.py b/alloy.py
|
||||||
|
index 6e276526c1..1dc9905943 100755
|
||||||
|
--- a/alloy.py
|
||||||
|
+++ b/alloy.py
|
||||||
|
@@ -1,6 +1,6 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
#
|
||||||
|
-# Copyright (c) 2013-2023, Intel Corporation
|
||||||
|
+# Copyright (c) 2013-2024, Intel Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
@@ -93,6 +93,8 @@ def checkout_LLVM(component, version_LLVM, target_dir, from_validation, verbose)
|
||||||
|
# git: "release/16.x"
|
||||||
|
if version_LLVM == "trunk":
|
||||||
|
GIT_TAG="main"
|
||||||
|
+ elif version_LLVM == "18_1":
|
||||||
|
+ GIT_TAG="llvmorg-18.1.1"
|
||||||
|
elif version_LLVM == "17_0":
|
||||||
|
GIT_TAG="llvmorg-17.0.6"
|
||||||
|
elif version_LLVM == "16_0":
|
||||||
|
diff --git a/llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch b/llvm_patches/16_0_17_0_18_1-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch
|
||||||
|
similarity index 100%
|
||||||
|
rename from llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch
|
||||||
|
rename to llvm_patches/16_0_17_0_18_1-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch
|
||||||
|
diff --git a/llvm_patches/16_0_17_0_dbghelp_mitigation.patch b/llvm_patches/16_0_17_0_18_1_dbghelp_mitigation.patch
|
||||||
|
similarity index 100%
|
||||||
|
rename from llvm_patches/16_0_17_0_dbghelp_mitigation.patch
|
||||||
|
rename to llvm_patches/16_0_17_0_18_1_dbghelp_mitigation.patch
|
||||||
|
diff --git a/llvm_patches/18_1_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch b/llvm_patches/18_1_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..0655a86aa9
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/llvm_patches/18_1_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch
|
||||||
|
@@ -0,0 +1,64 @@
|
||||||
|
+# This patch is needed for ISPC for Xe only
|
||||||
|
+
|
||||||
|
+# 1. Transformation of add to or is not safe for VC backend.
|
||||||
|
+# 2. bswap intrinsics is not supported in VC backend yet.
|
||||||
|
+diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
|
||||||
|
+index 8a00b75a1f..7e3147b6cb 100644
|
||||||
|
+--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
|
||||||
|
++++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
|
||||||
|
+@@ -29,6 +29,7 @@
|
||||||
|
+ #include "llvm/Support/AlignOf.h"
|
||||||
|
+ #include "llvm/Support/Casting.h"
|
||||||
|
+ #include "llvm/Support/KnownBits.h"
|
||||||
|
++#include "llvm/TargetParser/Triple.h"
|
||||||
|
+ #include "llvm/Transforms/InstCombine/InstCombiner.h"
|
||||||
|
+ #include <cassert>
|
||||||
|
+ #include <utility>
|
||||||
|
+@@ -1579,11 +1580,15 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
|
||||||
|
+ if (match(&I, m_c_BinOp(m_ZExt(m_Value(A)), m_SExt(m_Deferred(A)))) &&
|
||||||
|
+ A->getType()->isIntOrIntVectorTy(1))
|
||||||
|
+ return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));
|
||||||
|
+-
|
||||||
|
+- // A+B --> A|B iff A and B have no bits set in common.
|
||||||
|
++
|
||||||
|
+ WithCache<const Value *> LHSCache(LHS), RHSCache(RHS);
|
||||||
|
+- if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(&I)))
|
||||||
|
+- return BinaryOperator::CreateDisjointOr(LHS, RHS);
|
||||||
|
++
|
||||||
|
++ // Disable this transformation for ISPC SPIR-V
|
||||||
|
++ if (!Triple(I.getModule()->getTargetTriple()).isSPIR()) {
|
||||||
|
++ // A+B --> A|B iff A and B have no bits set in common.
|
||||||
|
++ if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(&I)))
|
||||||
|
++ return BinaryOperator::CreateDisjointOr(LHS, RHS);
|
||||||
|
++ }
|
||||||
|
+
|
||||||
|
+ if (Instruction *Ext = narrowMathIfNoOverflow(I))
|
||||||
|
+ return Ext;
|
||||||
|
+diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
|
||||||
|
+index 5fd944a859..ad3ae96393 100644
|
||||||
|
+--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
|
||||||
|
++++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
|
||||||
|
+@@ -16,6 +16,7 @@
|
||||||
|
+ #include "llvm/IR/ConstantRange.h"
|
||||||
|
+ #include "llvm/IR/Intrinsics.h"
|
||||||
|
+ #include "llvm/IR/PatternMatch.h"
|
||||||
|
++#include "llvm/TargetParser/Triple.h"
|
||||||
|
+ #include "llvm/Transforms/InstCombine/InstCombiner.h"
|
||||||
|
+ #include "llvm/Transforms/Utils/Local.h"
|
||||||
|
+
|
||||||
|
+@@ -3389,9 +3390,12 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
|
||||||
|
+ if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
|
||||||
|
+ return FoldedLogic;
|
||||||
|
+
|
||||||
|
+- if (Instruction *BitOp = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true,
|
||||||
|
+- /*MatchBitReversals*/ true))
|
||||||
|
+- return BitOp;
|
||||||
|
++ // Disable this transformation for ISPC SPIR-V
|
||||||
|
++ if (!Triple(I.getModule()->getTargetTriple()).isSPIR()) {
|
||||||
|
++ if (Instruction *BitOp = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true,
|
||||||
|
++ /*MatchBitReversals*/ true))
|
||||||
|
++ return BitOp;
|
||||||
|
++ }
|
||||||
|
+
|
||||||
|
+ if (Instruction *Funnel = matchFunnelShift(I, *this, DT))
|
||||||
|
+ return Funnel;
|
||||||
|
diff --git a/src/ispc.cpp b/src/ispc.cpp
|
||||||
|
index e0a4515105..04b5205711 100644
|
||||||
|
--- a/src/ispc.cpp
|
||||||
|
+++ b/src/ispc.cpp
|
||||||
|
@@ -51,6 +51,12 @@
|
||||||
|
#include <llvm/Target/TargetMachine.h>
|
||||||
|
#include <llvm/Target/TargetOptions.h>
|
||||||
|
|
||||||
|
+#if ISPC_LLVM_VERSION > ISPC_LLVM_17_0
|
||||||
|
+using CodegenOptLevel = llvm::CodeGenOptLevel;
|
||||||
|
+#else
|
||||||
|
+using CodegenOptLevel = llvm::CodeGenOpt::Level;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
using namespace ispc;
|
||||||
|
|
||||||
|
Globals *ispc::g;
|
||||||
|
@@ -1707,14 +1713,14 @@ Target::Target(Arch arch, const char *cpu, ISPCTarget ispc_target, bool pic, MCM
|
||||||
|
// requested by user via ISPC Optimization Flag. Mapping is :
|
||||||
|
// ISPC O0 -> Codegen O0
|
||||||
|
// ISPC O1,O2,O3,default -> Codegen O3
|
||||||
|
- llvm::CodeGenOpt::Level cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
|
||||||
|
+ CodegenOptLevel cOptLevel = CodegenOptLevel::Aggressive;
|
||||||
|
switch (g->codegenOptLevel) {
|
||||||
|
case Globals::CodegenOptLevel::None:
|
||||||
|
- cOptLevel = llvm::CodeGenOpt::Level::None;
|
||||||
|
+ cOptLevel = CodegenOptLevel::None;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Globals::CodegenOptLevel::Aggressive:
|
||||||
|
- cOptLevel = llvm::CodeGenOpt::Level::Aggressive;
|
||||||
|
+ cOptLevel = CodegenOptLevel::Aggressive;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
m_targetMachine->setOptLevel(cOptLevel);
|
||||||
|
diff --git a/src/ispc_version.h b/src/ispc_version.h
|
||||||
|
index d26129d864..6c343bbd41 100644
|
||||||
|
--- a/src/ispc_version.h
|
||||||
|
+++ b/src/ispc_version.h
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/*
|
||||||
|
- Copyright (c) 2015-2023, Intel Corporation
|
||||||
|
+ Copyright (c) 2015-2024, Intel Corporation
|
||||||
|
|
||||||
|
SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
*/
|
||||||
|
@@ -19,10 +19,11 @@
|
||||||
|
#define ISPC_LLVM_15_0 150000
|
||||||
|
#define ISPC_LLVM_16_0 160000
|
||||||
|
#define ISPC_LLVM_17_0 170000
|
||||||
|
-#define ISPC_LLVM_18_0 180000
|
||||||
|
+#define ISPC_LLVM_18_1 181000
|
||||||
|
+#define ISPC_LLVM_19_0 190000
|
||||||
|
|
||||||
|
#define OLDEST_SUPPORTED_LLVM ISPC_LLVM_14_0
|
||||||
|
-#define LATEST_SUPPORTED_LLVM ISPC_LLVM_18_0
|
||||||
|
+#define LATEST_SUPPORTED_LLVM ISPC_LLVM_19_0
|
||||||
|
|
||||||
|
#ifdef __ispc__xstr
|
||||||
|
#undef __ispc__xstr
|
||||||
|
@@ -34,7 +35,7 @@
|
||||||
|
__ispc__xstr(LLVM_VERSION_MAJOR) "." __ispc__xstr(LLVM_VERSION_MINOR) "." __ispc__xstr(LLVM_VERSION_PATCH)
|
||||||
|
|
||||||
|
#if ISPC_LLVM_VERSION < OLDEST_SUPPORTED_LLVM || ISPC_LLVM_VERSION > LATEST_SUPPORTED_LLVM
|
||||||
|
-#error "Only LLVM 14.0 - 17.0 and 18.0 development branch are supported"
|
||||||
|
+#error "Only LLVM 14.0 - 18.1 and 19.0 development branch are supported"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define ISPC_VERSION_STRING \
|
||||||
|
diff --git a/src/module.cpp b/src/module.cpp
|
||||||
|
index e883e61211..25a26686bf 100644
|
||||||
|
--- a/src/module.cpp
|
||||||
|
+++ b/src/module.cpp
|
||||||
|
@@ -115,6 +115,14 @@
|
||||||
|
#define strcasecmp stricmp
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+// Clang defines alloca which interferes with standard library function.
|
||||||
|
+// It happenned after newly included clang/Basic/Builtins.h in PR71709
|
||||||
|
+#if ISPC_LLVM_VERSION > ISPC_LLVM_17_0
|
||||||
|
+#define ALLOCA __builtin_alloca
|
||||||
|
+#else
|
||||||
|
+#define ALLOCA alloca
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
using namespace ispc;
|
||||||
|
|
||||||
|
// The magic constants are derived from https://github.com/intel/compute-runtime repo
|
||||||
|
@@ -495,7 +503,7 @@ Expr *lCreateConstExpr(ExprList *exprList, const AtomicType::BasicType basicType
|
||||||
|
} else {
|
||||||
|
// T equals int8_t* and etc.
|
||||||
|
using PointToType = typename std::remove_pointer<T>::type;
|
||||||
|
- vals = (T)alloca(N * sizeof(PointToType));
|
||||||
|
+ vals = (T)ALLOCA(N * sizeof(PointToType));
|
||||||
|
memset(vals, 0, N * sizeof(PointToType));
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1751,9 +1759,15 @@ bool Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine, llvm:
|
||||||
|
const char *outFileName) {
|
||||||
|
// Figure out if we're generating object file or assembly output, and
|
||||||
|
// set binary output for object files
|
||||||
|
+#if ISPC_LLVM_VERSION > ISPC_LLVM_17_0
|
||||||
|
+ llvm::CodeGenFileType fileType =
|
||||||
|
+ (outputType == Object) ? llvm::CodeGenFileType::ObjectFile : llvm::CodeGenFileType::AssemblyFile;
|
||||||
|
+ bool binary = (fileType == llvm::CodeGenFileType::ObjectFile);
|
||||||
|
+#else
|
||||||
|
llvm::CodeGenFileType fileType = (outputType == Object) ? llvm::CGFT_ObjectFile : llvm::CGFT_AssemblyFile;
|
||||||
|
bool binary = (fileType == llvm::CGFT_ObjectFile);
|
||||||
|
|
||||||
|
+#endif
|
||||||
|
llvm::sys::fs::OpenFlags flags = binary ? llvm::sys::fs::OF_None : llvm::sys::fs::OF_Text;
|
||||||
|
|
||||||
|
std::error_code error;
|
||||||
|
@@ -3084,7 +3098,6 @@ static void lGetExportedFunctions(SymbolTable *symbolTable, std::map<std::string
|
||||||
|
}
|
||||||
|
|
||||||
|
static llvm::FunctionType *lGetVaryingDispatchType(FunctionTargetVariants &funcs) {
|
||||||
|
- llvm::Type *ptrToInt8Ty = llvm::Type::getInt8PtrTy(*g->ctx);
|
||||||
|
llvm::FunctionType *resultFuncTy = nullptr;
|
||||||
|
|
||||||
|
for (int i = 0; i < Target::NUM_ISAS; ++i) {
|
||||||
|
@@ -3109,7 +3122,7 @@ static llvm::FunctionType *lGetVaryingDispatchType(FunctionTargetVariants &funcs
|
||||||
|
// For each varying type pointed to, swap the LLVM pointer type
|
||||||
|
// with i8 * (as close as we can get to void *)
|
||||||
|
if (baseType->IsVaryingType()) {
|
||||||
|
- ftype[j] = ptrToInt8Ty;
|
||||||
|
+ ftype[j] = LLVMTypes::Int8PointerType;
|
||||||
|
foundVarying = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/src/opt/ISPCPass.h b/src/opt/ISPCPass.h
|
||||||
|
index f02eb42fe1..c1c413377e 100644
|
||||||
|
--- a/src/opt/ISPCPass.h
|
||||||
|
+++ b/src/opt/ISPCPass.h
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/*
|
||||||
|
- Copyright (c) 2022-2023, Intel Corporation
|
||||||
|
+ Copyright (c) 2022-2024, Intel Corporation
|
||||||
|
|
||||||
|
SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
*/
|
||||||
|
@@ -37,7 +37,9 @@
|
||||||
|
#include <llvm/Transforms/Instrumentation.h>
|
||||||
|
#include <llvm/Transforms/Utils.h>
|
||||||
|
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
|
||||||
|
-#include <llvm/Transforms/Vectorize.h>
|
||||||
|
+#if ISPC_LLVM_VERSION > ISPC_LLVM_17_0
|
||||||
|
+#include <llvm/Transforms/Vectorize/LoadStoreVectorizer.h>
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
#ifdef ISPC_XE_ENABLED
|
||||||
|
#include <llvm/GenXIntrinsics/GenXIntrOpts.h>
|
||||||
|
diff --git a/src/type.cpp b/src/type.cpp
|
||||||
|
index 8b95a80559..6ba39beb8d 100644
|
||||||
|
--- a/src/type.cpp
|
||||||
|
+++ b/src/type.cpp
|
||||||
|
@@ -1013,7 +1013,11 @@ llvm::DIType *EnumType::GetDIType(llvm::DIScope *scope) const {
|
||||||
|
llvm::DIType *underlyingType = AtomicType::UniformInt32->GetDIType(scope);
|
||||||
|
llvm::DIType *diType =
|
||||||
|
m->diBuilder->createEnumerationType(diSpace, GetString(), diFile, pos.first_line, 32 /* size in bits */,
|
||||||
|
- 32 /* align in bits */, elementArray, underlyingType, name);
|
||||||
|
+ 32 /* align in bits */, elementArray, underlyingType,
|
||||||
|
+#if ISPC_LLVM_VERSION > ISPC_LLVM_17_0
|
||||||
|
+ 0,
|
||||||
|
+#endif
|
||||||
|
+ name);
|
||||||
|
switch (variability.type) {
|
||||||
|
case Variability::Uniform:
|
||||||
|
return diType;
|
||||||
|
diff --git a/superbuild/CMakeLists.txt b/superbuild/CMakeLists.txt
|
||||||
|
index 8ace2a9965..7d86ca8ee7 100644
|
||||||
|
--- a/superbuild/CMakeLists.txt
|
||||||
|
+++ b/superbuild/CMakeLists.txt
|
||||||
|
@@ -192,9 +192,10 @@ string(APPEND LIT_TOOLS_DIR ${GNUWIN32} "/bin")
|
||||||
|
unset(LLVM_TAG)
|
||||||
|
unset(LLVM_VERSION_DOTTED)
|
||||||
|
string(REPLACE "." "_" LLVM_VERSION "${LLVM_VERSION}")
|
||||||
|
-if (${LLVM_VERSION} STREQUAL 17_0)
|
||||||
|
+if (${LLVM_VERSION} STREQUAL 18_1)
|
||||||
|
+ set(LLVM_TAG llvmorg-18.1.1)
|
||||||
|
+elseif (${LLVM_VERSION} STREQUAL 17_0)
|
||||||
|
set(LLVM_TAG llvmorg-17.0.6)
|
||||||
|
- set(LLVM_VERSION_DOTTED 17.0.6)
|
||||||
|
elseif (${LLVM_VERSION} STREQUAL 16_0)
|
||||||
|
set(LLVM_TAG llvmorg-16.0.6)
|
||||||
|
elseif (${LLVM_VERSION} STREQUAL 15_0)
|
||||||
|
diff --git a/tests/lit-tests/ftz_daz_flags_x86.ispc b/tests/lit-tests/ftz_daz_flags_x86.ispc
|
||||||
|
index 98bbf604f6..8ab2e473bc 100644
|
||||||
|
--- a/tests/lit-tests/ftz_daz_flags_x86.ispc
|
||||||
|
+++ b/tests/lit-tests/ftz_daz_flags_x86.ispc
|
||||||
|
@@ -4,19 +4,19 @@
|
||||||
|
// RUN: %{ispc} %s --target=avx2 --arch=x86 --nostdlib --emit-llvm-text --opt=reset-ftz-daz -o - | FileCheck %s
|
||||||
|
// RUN: %{ispc} %s --target=avx2 --arch=x86 --nostdlib --emit-llvm-text -o - | FileCheck --check-prefixes=CHECK_NO_FTZ_DAZ %s
|
||||||
|
|
||||||
|
-// CHECK: define float @test_ftz_daz___
|
||||||
|
+// CHECK-LABEL: @test_ftz_daz___
|
||||||
|
// CHECK-NOT: stmxcsr
|
||||||
|
// CHECK-NOT: ldmxcsr
|
||||||
|
-// CHECK: define float @export_test_ftz_daz___
|
||||||
|
+// CHECK-LABEL: @export_test_ftz_daz___
|
||||||
|
// CHECK-NOT: stmxcsr
|
||||||
|
// CHECK-NOT: ldmxcsr
|
||||||
|
-// CHECK: define void @export_void_test_ftz_daz___
|
||||||
|
+// CHECK-LABEL: @export_void_test_ftz_daz___
|
||||||
|
// CHECK-NOT: stmxcsr
|
||||||
|
// CHECK-NOT: ldmxcsr
|
||||||
|
-// CHECK: define float @externC_test_ftz_daz()
|
||||||
|
+// CHECK-LABEL: @externC_test_ftz_daz()
|
||||||
|
// CHECK: stmxcsr
|
||||||
|
// CHECK-COUNT-2: ldmxcsr
|
||||||
|
-// CHECK: define float @export_test_ftz_daz(
|
||||||
|
+// CHECK-LABEL: @export_test_ftz_daz(
|
||||||
|
// CHECK: stmxcsr
|
||||||
|
// CHECK-COUNT-2: ldmxcsr
|
||||||
|
|
||||||
|
diff --git a/tests/lit-tests/ftz_daz_flags_x86_64.ispc b/tests/lit-tests/ftz_daz_flags_x86_64.ispc
|
||||||
|
index 36bc58fba8..e68ee0f011 100644
|
||||||
|
--- a/tests/lit-tests/ftz_daz_flags_x86_64.ispc
|
||||||
|
+++ b/tests/lit-tests/ftz_daz_flags_x86_64.ispc
|
||||||
|
@@ -4,19 +4,19 @@
|
||||||
|
// RUN: %{ispc} %s --target=avx2 --arch=x86-64 --nostdlib --emit-llvm-text --opt=reset-ftz-daz -o - | FileCheck %s
|
||||||
|
// RUN: %{ispc} %s --target=avx2 --arch=x86-64 --nostdlib --emit-llvm-text -o - | FileCheck --check-prefixes=CHECK_NO_FTZ_DAZ %s
|
||||||
|
|
||||||
|
-// CHECK: define float @test_ftz_daz___
|
||||||
|
+// CHECK-LABEL: @test_ftz_daz___
|
||||||
|
// CHECK-NOT: stmxcsr
|
||||||
|
// CHECK-NOT: ldmxcsr
|
||||||
|
-// CHECK: define float @export_test_ftz_daz___
|
||||||
|
+// CHECK-LABEL: @export_test_ftz_daz___
|
||||||
|
// CHECK-NOT: stmxcsr
|
||||||
|
// CHECK-NOT: ldmxcsr
|
||||||
|
-// CHECK: define void @export_void_test_ftz_daz___
|
||||||
|
+// CHECK-LABEL: @export_void_test_ftz_daz___
|
||||||
|
// CHECK-NOT: stmxcsr
|
||||||
|
// CHECK-NOT: ldmxcsr
|
||||||
|
-// CHECK: define float @externC_test_ftz_daz()
|
||||||
|
+// CHECK-LABEL: @externC_test_ftz_daz()
|
||||||
|
// CHECK: stmxcsr
|
||||||
|
// CHECK-COUNT-2: ldmxcsr
|
||||||
|
-// CHECK: define float @export_test_ftz_daz(
|
||||||
|
+// CHECK-LABEL: @export_test_ftz_daz(
|
||||||
|
// CHECK: stmxcsr
|
||||||
|
// CHECK-COUNT-2: ldmxcsr
|
||||||
|
|
102
ispc-1.23.0-upstream-add_x86-isel-fix.patch
Normal file
102
ispc-1.23.0-upstream-add_x86-isel-fix.patch
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
From c72af29f0bd82678918b941654aff892fbd7dc8a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Aleksei Nurmukhametov <aleksei.nurmukhametov@intel.com>
|
||||||
|
Date: Thu, 29 Feb 2024 08:59:44 -0800
|
||||||
|
Subject: [PATCH] llvm_patches: add x86-isel fix
|
||||||
|
|
||||||
|
This backported patch fixes generation of redundant vmovd, vpinsrd
|
||||||
|
instructions.
|
||||||
|
|
||||||
|
Test case added as XFAIL until LLVM rebuild in CI.
|
||||||
|
---
|
||||||
|
...auxShuffleMask-handle-fp-int-bitcast.patch | 40 +++++++++++++++++++
|
||||||
|
tests/lit-tests/2777.ispc | 34 ++++++++++++++++
|
||||||
|
2 files changed, 74 insertions(+)
|
||||||
|
create mode 100644 llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch
|
||||||
|
create mode 100644 tests/lit-tests/2777.ispc
|
||||||
|
|
||||||
|
diff --git a/llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch b/llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..f197b35051
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/llvm_patches/16_0_17_0-X86-getFauxShuffleMask-handle-fp-int-bitcast.patch
|
||||||
|
@@ -0,0 +1,40 @@
|
||||||
|
+# This patch is required to fix the issue #2777
|
||||||
|
+From 7aa6ddfea210b6b114ac649c4d3219138e9cc52a Mon Sep 17 00:00:00 2001
|
||||||
|
+From: Simon Pilgrim <llvm-dev@redking.me.uk>
|
||||||
|
+Date: Thu, 29 Feb 2024 10:32:37 +0000
|
||||||
|
+Subject: [PATCH] [X86] getFauxShuffleMask - handle
|
||||||
|
+ insert_vector_elt(bitcast(extract_vector_elt(x))) shuffle patterns
|
||||||
|
+
|
||||||
|
+If the bitcast is between types of equal scalar size (i.e. fp<->int bitcasts), then we can safely peek through them
|
||||||
|
+
|
||||||
|
+Fixes #83289
|
||||||
|
+---
|
||||||
|
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++--
|
||||||
|
+ 1 file changed, 5 insertions(+), 2 deletions(-)
|
||||||
|
+
|
||||||
|
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||||
|
+index e43b33eed470..22eea8b3d43d 100644
|
||||||
|
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||||
|
++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||||
|
+@@ -8477,13 +8477,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+- // Peek through trunc/aext/zext.
|
||||||
|
++ // Peek through trunc/aext/zext/bitcast.
|
||||||
|
+ // TODO: aext shouldn't require SM_SentinelZero padding.
|
||||||
|
+ // TODO: handle shift of scalars.
|
||||||
|
+ unsigned MinBitsPerElt = Scl.getScalarValueSizeInBits();
|
||||||
|
+ while (Scl.getOpcode() == ISD::TRUNCATE ||
|
||||||
|
+ Scl.getOpcode() == ISD::ANY_EXTEND ||
|
||||||
|
+- Scl.getOpcode() == ISD::ZERO_EXTEND) {
|
||||||
|
++ Scl.getOpcode() == ISD::ZERO_EXTEND ||
|
||||||
|
++ (Scl.getOpcode() == ISD::BITCAST &&
|
||||||
|
++ Scl.getScalarValueSizeInBits() ==
|
||||||
|
++ Scl.getOperand(0).getScalarValueSizeInBits())) {
|
||||||
|
+ Scl = Scl.getOperand(0);
|
||||||
|
+ MinBitsPerElt =
|
||||||
|
+ std::min<unsigned>(MinBitsPerElt, Scl.getScalarValueSizeInBits());
|
||||||
|
+--
|
||||||
|
+2.25.1
|
||||||
|
+
|
||||||
|
diff --git a/tests/lit-tests/2777.ispc b/tests/lit-tests/2777.ispc
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..ec7656629a
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/lit-tests/2777.ispc
|
||||||
|
@@ -0,0 +1,34 @@
|
||||||
|
+// RUN: %{ispc} %s --target=avx2-i32x8 --emit-asm -o - | FileCheck %s
|
||||||
|
+
|
||||||
|
+// REQUIRES: X86_ENABLED && LLVM_16_0+
|
||||||
|
+
|
||||||
|
+// XFAIL: *
|
||||||
|
+
|
||||||
|
+// CHECK-NOT: vmovd
|
||||||
|
+// CHECK-NOT: vpinsrd
|
||||||
|
+
|
||||||
|
+struct FVector4f
|
||||||
|
+{
|
||||||
|
+ float V[4];
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+// Extra vmovd, vpinsrd after 2x vmaxps
|
||||||
|
+inline uniform FVector4f VectorMax(const uniform FVector4f& V1, const uniform FVector4f& V2)
|
||||||
|
+{
|
||||||
|
+ varying float S0, S1, Result;
|
||||||
|
+ *((uniform FVector4f *uniform)&S0) = *((uniform FVector4f *uniform)&V1);
|
||||||
|
+ *((uniform FVector4f *uniform)&S1) = *((uniform FVector4f *uniform)&V2);
|
||||||
|
+
|
||||||
|
+ Result = max(S0, S1);
|
||||||
|
+
|
||||||
|
+ return *((uniform FVector4f *uniform)&Result);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+export void foo(uniform float A[], uniform float B[])
|
||||||
|
+{
|
||||||
|
+
|
||||||
|
+ uniform FVector4f *uniform pA = (uniform FVector4f *uniform)A;
|
||||||
|
+ uniform FVector4f *uniform pB = (uniform FVector4f *uniform)B;
|
||||||
|
+
|
||||||
|
+ *pA = VectorMax(*pA, *pB);
|
||||||
|
+}
|
88
ispc.spec
Normal file
88
ispc.spec
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
Name: ispc
|
||||||
|
Version: 1.23.0
|
||||||
|
Release: 1mamba
|
||||||
|
Summary: Compiler for high-performance SIMD programming on the CPU
|
||||||
|
Group: Development/Tools
|
||||||
|
Vendor: openmamba
|
||||||
|
Distribution: openmamba
|
||||||
|
Packager: Silvan Calarco <silvan.calarco@mambasoft.it>
|
||||||
|
URL: https://ispc.github.io/
|
||||||
|
Source: https://github.com/ispc/ispc.git/v%{version}/ispc-%{version}.tar.bz2
|
||||||
|
Patch0: ispc-1.23.0-upstream-add_x86-isel-fix.patch
|
||||||
|
Patch1: ispc-1.23.0-llvm-18.1.patch
|
||||||
|
License: BSD
|
||||||
|
## AUTOBUILDREQ-BEGIN
|
||||||
|
BuildRequires: glibc-devel
|
||||||
|
BuildRequires: libgcc
|
||||||
|
BuildRequires: libllvm-devel
|
||||||
|
BuildRequires: libncurses-devel
|
||||||
|
BuildRequires: libstdc++6-devel
|
||||||
|
BuildRequires: libtbb-devel
|
||||||
|
## AUTOBUILDREQ-END
|
||||||
|
BuildRequires: cmake
|
||||||
|
Requires: lib%{name} = %{?epoch:%epoch:}%{version}-%{release}
|
||||||
|
Requires: lib%{name}-devel = %{?epoch:%epoch:}%{version}-%{release}
|
||||||
|
|
||||||
|
%description
|
||||||
|
Compiler for high-performance SIMD programming on the CPU.
|
||||||
|
|
||||||
|
%package -n lib%{name}
|
||||||
|
Group: System/Libraries
|
||||||
|
Summary: Shared libraries for %{name}
|
||||||
|
|
||||||
|
%description -n lib%{name}
|
||||||
|
This package contains shared libraries for %{name}.
|
||||||
|
|
||||||
|
%package -n lib%{name}-devel
|
||||||
|
Group: Development/Libraries
|
||||||
|
Summary: Development files for %{name}
|
||||||
|
Requires: lib%{name} = %{?epoch:%epoch:}%{version}-%{release}
|
||||||
|
|
||||||
|
%description -n lib%{name}-devel
|
||||||
|
This package contains libraries and header files for developing applications that use %{name}.
|
||||||
|
|
||||||
|
%debug_package
|
||||||
|
|
||||||
|
%prep
|
||||||
|
%setup -q
|
||||||
|
%patch 0 -p1 -b .upstream-add_x86-isel-fix
|
||||||
|
%patch 1 -p1 -b .llvm-18.1
|
||||||
|
|
||||||
|
%build
|
||||||
|
%cmake
|
||||||
|
%cmake_build
|
||||||
|
|
||||||
|
%install
|
||||||
|
[ "%{buildroot}" != / ] && rm -rf "%{buildroot}"
|
||||||
|
%cmake_install
|
||||||
|
|
||||||
|
%clean
|
||||||
|
[ "%{buildroot}" != / ] && rm -rf "%{buildroot}"
|
||||||
|
|
||||||
|
%post -n lib%{name} -p /sbin/ldconfig
|
||||||
|
%postun -n lib%{name} -p /sbin/ldconfig
|
||||||
|
|
||||||
|
%files
|
||||||
|
%defattr(-,root,root)
|
||||||
|
%{_bindir}/check_isa
|
||||||
|
%{_bindir}/ispc
|
||||||
|
|
||||||
|
%files -n lib%{name}
|
||||||
|
%defattr(-,root,root)
|
||||||
|
%{_libdir}/libispcrt.so.*
|
||||||
|
%{_libdir}/libispcrt_device_cpu.so.*
|
||||||
|
|
||||||
|
%files -n lib%{name}-devel
|
||||||
|
%defattr(-,root,root)
|
||||||
|
%dir %{_includedir}/ispcrt
|
||||||
|
%{_includedir}/ispcrt/*
|
||||||
|
%{_libdir}/libispcrt.so
|
||||||
|
%{_libdir}/libispcrt_device_cpu.so
|
||||||
|
%{_libdir}/libispcrt_static.a
|
||||||
|
%dir %{_libdir}/cmake/ispcrt-%{version}
|
||||||
|
%{_libdir}/cmake/ispcrt-%{version}/*.cmake
|
||||||
|
%doc README.md
|
||||||
|
|
||||||
|
%changelog
|
||||||
|
* Sat May 04 2024 Silvan Calarco <silvan.calarco@mambasoft.it> 1.23.0-1mamba
|
||||||
|
- package created using the webbuild interface
|
Loading…
Reference in New Issue
Block a user