Hi!
I’m trying to test the vendor_blas.matmul
. Is it compatible with LayoutTensor
s or just NDBuffers
?
I have the following code:
from layout import Layout, LayoutTensor
from gpu.host import DeviceContext
from random import randn
from memory import memset_zero
from linalg import vendor_blas
alias M = 128
alias N = 128
alias K = 64
alias a_layout = Layout.row_major(M, K)
alias b_layout = Layout.row_major(K, N)
alias c_layout = Layout.row_major(M, N)
def main():
var ctx = DeviceContext()
var a_buffer = ctx.enqueue_create_buffer[DType.float16](a_layout.size())
var b_buffer = ctx.enqueue_create_buffer[DType.float16](b_layout.size())
var c_buffer = ctx.enqueue_create_buffer[DType.float16](c_layout.size())
with a_buffer.map_to_host() as host_buffer:
var a_tensor = LayoutTensor[DType.float16, a_layout](host_buffer)
randn(a_tensor.ptr, a_layout.size())
with b_buffer.map_to_host() as host_buffer:
var b_tensor = LayoutTensor[DType.float16, b_layout](host_buffer)
randn(b_tensor.ptr, b_layout.size())
with c_buffer.map_to_host() as host_buffer:
var c_tensor = LayoutTensor[DType.float16, c_layout](host_buffer)
memset_zero(c_tensor.ptr, c_layout.size())
ctx.synchronize()
var a_tensor = LayoutTensor[DType.float16, a_layout](a_buffer)
var b_tensor = LayoutTensor[DType.float16, b_layout](b_buffer)
var c_tensor = LayoutTensor[DType.float16, c_layout](c_buffer)
with vendor_blas.Handle[vendor_blas.Backend.CUBLASLT]() as handle:
vendor_blas.matmul[use_tf32=False](
ctx,
handle=handle,
c=c_buffer,
a=a_tensor,
b=b_tensor,
c_row_major=True
)
However, I get the following error:
/home/ubuntu/gpu-intro/gemm.mojo:40:43: error: no matching function in call to 'matmul'
vendor_blas.matmul[use_tf32=False](
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
/home/ubuntu/gpu-intro/gemm.mojo:1:1: note: candidate not viable: unknown keyword argument: 'handle'
from layout import Layout, LayoutTensor
^
/home/ubuntu/gpu-intro/gemm.mojo:1:1: note: candidate not viable: failed to infer implicit parameter 'mut' of argument 'c' type 'Origin'
from layout import Layout, LayoutTensor
^
/home/ubuntu/gpu-intro/gemm.mojo:40:43: note: failed to infer parameter #2, parameter isn't used in any argument
vendor_blas.matmul[use_tf32=False](
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
mojo: error: failed to parse the provided Mojo source module
I’m not sure if the use_tf32
and handle
are actually necessary, but I was trying to minimize the error surface area so I passed them explicitly.
It seems like I am doing something wrong with the mutability of c_buffer
? However, I’m not at all sure how to fix that. Any ideas?
Thanks!