Following is a readily runnable code. It starts storing DType.bool differently once we cross simd width for Bool type. How to make this work?
Outputs are given for when size is 61 and 66 respectively.
from algorithm import vectorize
from sys import simdwidthof
@fieldwise_init
struct Buffer[dtype: DType = DType.float32](
Copyable & Movable & Sized & Stringable & Writable & Representable
):
var size: Int
var data: UnsafePointer[Scalar[dtype]]
fn __init__(out self):
self.size = 0
self.data = UnsafePointer[Scalar[dtype]]()
fn __init__(out self, size: Int):
self.data = UnsafePointer[Scalar[dtype]].alloc(size)
self.size = size
fn __len__(self) -> Int:
return self.size
fn __getitem__(self, index: Int) -> Scalar[dtype]:
return self.data.load[width=1, volatile=True](index)
fn __setitem__(self, index: Int, scalar: Scalar[dtype]):
self.data.store[width=1, volatile=True](index, scalar)
@always_inline
fn load[simdwidth: Int = 1](self, offset: Int) -> SIMD[dtype, simdwidth]:
return self.data.load[width=simdwidth, volatile=True](offset)
@always_inline
fn store[
simdwidth: Int = 1
](self, offset: Int, values: SIMD[dtype, simdwidth]):
self.data.store[width=simdwidth, volatile=True](offset, values)
fn fill[
simd_width: Int = simdwidthof[dtype]()
](this: Buffer[dtype], value: Scalar[dtype]):
@parameter
fn set_scalar[simdwidth: Int](idx: Int):
this.store[simdwidth](idx, value)
vectorize[set_scalar, simd_width](this.size)
@no_inline
fn __str__(self) -> String:
return String.write(self)
fn write_to[W: Writer](self, mut writer: W):
length = len(self)
writer.write("Buffer[")
if length <= 100:
for i in range(length):
writer.write(self[i])
if i < length - 1:
writer.write(", ")
else:
for i in range(15):
writer.write(self[i])
writer.write(", ")
writer.write("..., ")
for i in range(length - 15, length):
writer.write(self[i])
if i < length - 1:
writer.write(", ")
writer.write(", dtype=", self.dtype, ", size=", length, "]")
@no_inline
fn __repr__(self) -> String:
return self.__str__()
fn __gt__[
simd_width: Int = simdwidthof[dtype]()
](this: Buffer[dtype], scalar: Scalar[dtype]) -> Buffer[DType.bool]:
var out = Buffer[DType.bool](this.size)
@parameter
fn cmp_scalar[simdwidth: Int](idx: Int):
out.store[simdwidth](idx, this.load[simdwidth](idx) > scalar)
vectorize[cmp_scalar, simdwidthof[DType.bool]()](this.size)
return out
fn test_buffer_greater_than_scalar() raises:
x = Buffer[DType.float32](66)
x.fill(42)
cmp_result = x > 41
print("cmp_result")
print(cmp_result)
fn main() raises:
test_buffer_greater_than_scalar()
Output:
cmp_result [with x = BufferDType.float32]
Buffer[True, True, True, True, True, True, True, True, False,..... False, False, False, False, False, True, True, dtype=bool, size=66]
cmp_result [with x = Buffer[DType.float32](61)]
Buffer[True, True, True, True, True, True, True, True, True, True, ....True, True, True, True, True, dtype=bool, size=61]