How can I call this intrinsic?
declare {i32, i1} @llvm.nvvm.vote.sync(i32 %membermask, i32 %mode, i1 %predicate)
I’ve tried the following, but it crashes the compiler
var result = llvm_intrinsic[
"llvm.nvvm.vote.sync",
_RegisterPackType[Int32, Bool]
](Int32(_FULL_MASK), Int32(3), value)
return result[0].cast[dtype]()
INFO: Analyzed target //max/kernels/test/gpu/basics:test_prefix_sum.mojo.test (0 packages loaded, 0 targets configured).
ERROR: /modular/max/kernels/test/gpu/basics/BUILD.bazel:45:14: //max/kernels/test/gpu/basics:test_prefix_sum.mojo.test compiling mojo object failed: (Aborted): mojo failed: error executing MojoCompile command (from target //max/kernels/test/gpu/basics:test_prefix_sum.mojo.test) external/rules_mojo++mojo+mojo_toolchain_linux_x86_64/bin/mojo build '-strip-file-prefix=.' --emit object -o bazel-out/k8-fastbuild/bin/max/kernels/test/gpu/basics/test_prefix_sum.mojo.test.lo ... (remaining 24 arguments skipped)
Use --sandbox_debug to see verbose messages from the sandbox and retain the sandbox build root for debugging
double free or corruption (fasttop)
Please submit a bug report to https://github.com/modular/modular/issues and include the crash backtrace along with all the relevant source codes.
#0 0x0000557716bdda5b llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) Signals.cpp:0:0
#1 0x0000557716bdb82b llvm::sys::RunSignalHandlers() Signals.cpp:0:0
#2 0x0000557716bde10a SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
#3 0x00007fcd96e45330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330)
#4 0x00007fcd96e9eb2c pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x9eb2c)
#5 0x00007fcd96e4527e raise (/lib/x86_64-linux-gnu/libc.so.6+0x4527e)
#6 0x00007fcd96e288ff abort (/lib/x86_64-linux-gnu/libc.so.6+0x288ff)
#7 0x00007fcd96e297b6 (/lib/x86_64-linux-gnu/libc.so.6+0x297b6)
#8 0x00007fcd96ea8ff5 (/lib/x86_64-linux-gnu/libc.so.6+0xa8ff5)
#9 0x00007fcd96eab32a (/lib/x86_64-linux-gnu/libc.so.6+0xab32a)
#10 0x00007fcd96eaddae cfree (/lib/x86_64-linux-gnu/libc.so.6+0xaddae)
#11 0x00005577136e3b4c compileOffloads(mlir::ModuleOp, llvm::MapVector<M::TargetInfoAttr, M::KGEN::OffloadInfo, llvm::DenseMap<M::TargetInfoAttr, unsigned int, llvm::DenseMapInfo<M::TargetInfoAttr, void>, llvm::detail::DenseMapPair<M::TargetInfoAttr, unsigned int>>, llvm::SmallVector<std::pair<M::TargetInfoAttr, M::KGEN::OffloadInfo>, 0u>>&, mlir::SymbolTable const&, M::KGEN::CompilationOptions, M::KGEN::ElaborateGeneratorsOptions) KGENCompiler.cpp:0:0
#12 0x000055771376969a M::KGEN::Elaborator::run(mlir::ModuleOp, llvm::ArrayRef<std::pair<M::KGEN::GeneratorOp, M::KGEN::ParameterExprArrayAttr>>) Elaborator.cpp:0:0
#13 0x000055771377752b (anonymous namespace)::ElaborateGeneratorsPass::runOnOperation() Elaborator.cpp:0:0
#14 0x00005577143de24b mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) Pass.cpp:0:0
#15 0x00005577143e0e80 mlir::PassManager::run(mlir::Operation*) Pass.cpp:0:0
#16 0x0000557713c1041d void llvm::detail::UniqueFunctionBase<void, M::AsyncRT::AnyAsyncValueRef&&>::CallImpl<M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_0::operator()(mlir::Operation*, M::RCRef<M::WriteableBuffer>, M::AsyncRT::AnyAsyncValueRef) const::'lambda'(M::AsyncRT::AnyAsyncValueRef&&)>(void*, M::AsyncRT::AnyAsyncValueRef&) CachedTransform.cpp:0:0
#17 0x00005577136a3b97 void M::AsyncRT::AnyAsyncValueRef::andThen<false>(llvm::unique_function<void (M::AsyncRT::AnyAsyncValueRef&&)>&&) && ObjectCompiler.cpp:0:0
#18 0x0000557713c10017 M::AsyncRT::AnyAsyncValueRef llvm::detail::UniqueFunctionBase<M::AsyncRT::AnyAsyncValueRef, M::RCRef<M::WriteableBuffer>, M::AsyncRT::AnyAsyncValueRef>::CallImpl<M::AsyncRT::AnyAsyncValueRef M::Cache::cachedTransform<M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_0, M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_1>(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, M::RCRef<M::WriteableBuffer>, M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_0&&, M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_1&&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*)::'lambda'(M::RCRef<M::WriteableBuffer>, M::AsyncRT::AnyAsyncValueRef)>(void*, M::RCRef<M::WriteableBuffer>&, M::AsyncRT::AnyAsyncValueRef&) CachedTransform.cpp:0:0
#19 0x0000557713c0fb18 void llvm::detail::UniqueFunctionBase<void, M::AsyncRT::AsyncValueRef<std::optional<M::RCRef<M::Buffer>>>&&>::CallImpl<M::AsyncRT::AnyAsyncValueRef M::Cache::cachedTransform<M::AsyncRT::AnyAsyncValueRef M::Cache::cachedTransform<M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_0, M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_1>(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, M::RCRef<M::WriteableBuffer>, M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_0&&, M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_1&&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*)::'lambda'(M::RCRef<M::WriteableBuffer>, M::AsyncRT::AnyAsyncValueRef)>(M::AsyncRT::EncodedLocation, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>> const&, M::AsyncRT::AnyAsyncValueRef, M::RCRef<M::WriteableBuffer>, M::Cache::cachedTransform(mlir::Operation*, M::RCRef<M::Cache::BlobCache<M::Cache::TransformCacheKey>>, M::AsyncRT::AnyAsyncValueRef, mlir::PassManager&, std::function<void (mlir::Operation*)> const&, std::function<void (mlir::Operation*)> const&)::$_0, llvm::unique_function<M::AsyncRT::AnyAsyncValueRef (M::RCRef<M::Buffer>)>, bool, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*)::'lambda'(M::AsyncRT::AsyncValueRef<std::optional<M::RCRef<M::Buffer>>>&&)>(void*, M::AsyncRT::AsyncValueRef<std::optional<M::RCRef<M::Buffer>>>&) CachedTransform.cpp:0:0
#20 0x0000557713c1889c void (anonymous namespace)::WorkQueueThread::runItemsImpl<(anonymous namespace)::WorkQueueThread::runOnThread()::$_0, (anonymous namespace)::WorkQueueThread::runOnThread()::$_1>((anonymous namespace)::WorkQueueThread::runOnThread()::$_0, (anonymous namespace)::WorkQueueThread::runOnThread()::$_1, bool, llvm::StringLiteral, llvm::StringLiteral) ThreadPoolWorkQueue.cpp:0:0
#21 0x0000557713c18707 (anonymous namespace)::WorkQueueThread::runOnThread() ThreadPoolWorkQueue.cpp:0:0
#22 0x00007fcd9a0ecdb4 (/lib/x86_64-linux-gnu/libstdc++.so.6+0xecdb4)
#23 0x00007fcd96e9caa4 (/lib/x86_64-linux-gnu/libc.so.6+0x9caa4)
#24 0x00007fcd96f29c3c (/lib/x86_64-linux-gnu/libc.so.6+0x129c3c)
INFO: Found 1 test target...
Target //max/kernels/test/gpu/basics:test_prefix_sum.mojo.test failed to build
Use --verbose_failures to see the command lines of failed build steps.
INFO: Elapsed time: 31.331s, Critical Path: 31.28s
INFO: 12 processes: 2 internal, 10 processwrapper-sandbox.
ERROR: Build did NOT complete successfully
//max/kernels/test/gpu/basics:test_prefix_sum.mojo.test FAILED TO BUILD
Executed 0 out of 1 test: 1 fails to build.
Repro code:
For some reason it’s unable to find this intrinsic on Godbolt, but it crashes the compiler with the above trace when compiled locally.