How to make vectorized(store) work for DType.bool?

Following is a readily runnable code. It starts storing DType.bool differently once we cross simd width for Bool type. How to make this work?

Outputs are given for when size is 61 and 66 respectively.

from algorithm import vectorize
from sys import simdwidthof


@fieldwise_init
struct Buffer[dtype: DType = DType.float32](
    Copyable & Movable & Sized & Stringable & Writable & Representable
):
    var size: Int
    var data: UnsafePointer[Scalar[dtype]]

    fn __init__(out self):
        self.size = 0
        self.data = UnsafePointer[Scalar[dtype]]()

    fn __init__(out self, size: Int):
        self.data = UnsafePointer[Scalar[dtype]].alloc(size)
        self.size = size

    fn __len__(self) -> Int:
        return self.size

    fn __getitem__(self, index: Int) -> Scalar[dtype]:
        return self.data.load[width=1, volatile=True](index)

    fn __setitem__(self, index: Int, scalar: Scalar[dtype]):
        self.data.store[width=1, volatile=True](index, scalar)

    @always_inline
    fn load[simdwidth: Int = 1](self, offset: Int) -> SIMD[dtype, simdwidth]:
        return self.data.load[width=simdwidth, volatile=True](offset)

    @always_inline
    fn store[
        simdwidth: Int = 1
    ](self, offset: Int, values: SIMD[dtype, simdwidth]):
        self.data.store[width=simdwidth, volatile=True](offset, values)

    fn fill[
        simd_width: Int = simdwidthof[dtype]()
    ](this: Buffer[dtype], value: Scalar[dtype]):
        @parameter
        fn set_scalar[simdwidth: Int](idx: Int):
            this.store[simdwidth](idx, value)

        vectorize[set_scalar, simd_width](this.size)

    @no_inline
    fn __str__(self) -> String:
        return String.write(self)

    fn write_to[W: Writer](self, mut writer: W):
        length = len(self)
        writer.write("Buffer[")
        if length <= 100:
            for i in range(length):
                writer.write(self[i])
                if i < length - 1:
                    writer.write(", ")
        else:
            for i in range(15):
                writer.write(self[i])
                writer.write(", ")

            writer.write("..., ")
            for i in range(length - 15, length):
                writer.write(self[i])
                if i < length - 1:
                    writer.write(", ")

        writer.write(", dtype=", self.dtype, ", size=", length, "]")

    @no_inline
    fn __repr__(self) -> String:
        return self.__str__()

    fn __gt__[
        simd_width: Int = simdwidthof[dtype]()
    ](this: Buffer[dtype], scalar: Scalar[dtype]) -> Buffer[DType.bool]:
        var out = Buffer[DType.bool](this.size)

        @parameter
        fn cmp_scalar[simdwidth: Int](idx: Int):
            out.store[simdwidth](idx, this.load[simdwidth](idx) > scalar)

        vectorize[cmp_scalar, simdwidthof[DType.bool]()](this.size)
        return out


fn test_buffer_greater_than_scalar() raises:
    x = Buffer[DType.float32](66)
    x.fill(42)
    cmp_result = x > 41
    print("cmp_result")
    print(cmp_result)


fn main() raises:
    test_buffer_greater_than_scalar()

Output:

cmp_result [with x = BufferDType.float32]


Buffer[True, True, True, True, True, True, True, True, False,.....  False, False, False, False, False, True, True, dtype=bool, size=66]


cmp_result [with x = Buffer[DType.float32](61)]


Buffer[True, True, True, True, True, True, True, True, True, True, ....True, True, True, True, True, dtype=bool, size=61]

This topic was automatically closed 180 days after the last reply. New replies are no longer allowed.