I am short on time right now, but I can discuss this topic later.
The logic for saturation is not complex. 1cycle + #ALU thoughput should be possible.
ARM and x86 already provide partial support for saturating arithmetic through the QADD and PADDS/VPADDS instructions. Note that x86’s support is limited to 8/16-bit, and neither provides support for general-purpose registers (GRPs) only vector registers.
My preference: Until the hardware manufacturers take saturation seriously, we need one language that does saturation by default.
from sys.intrinsics import llvm_intrinsic, _RegisterPackType
from os import abort
@fieldwise_init
@register_passable("trivial")
struct OverflowError(ImplicitlyCopyable):
var overflow_bits: Int
# --- Solution 1: Wrapper Types ---
@fieldwise_init
@register_passable("trivial")
struct RaisingInt(ImplicitlyCopyable):
var int: Int
fn __add__(self, other: Self) raises OverflowError -> Self:
res = llvm_intrinsic[
"llvm.sadd.with.overflow",
_RegisterPackType[Int, Bool],
Int,
Int,
](self.int, other.int)
overflowed = res[1]
res_bits = res[0]
if overflowed:
raise OverflowError(res_bits)
return RaisingInt(res_bits)
@fieldwise_init
@register_passable("trivial")
struct SaturatingInt(ImplicitlyCopyable):
var int: Int
fn __add__(self, other: Self) -> Self:
res = llvm_intrinsic[
"llvm.sadd.sat",
Int,
Int,
Int,
](self.int, other.int)
return SaturatingInt(res)
@fieldwise_init
@register_passable("trivial")
struct TrappingInt(ImplicitlyCopyable):
var int: Int
fn __add__(self, other: Self) -> Self:
try:
return TrappingInt((RaisingInt(self.int) + RaisingInt(other.int)).int)
except _overflowed:
abort()
# --- Solution 2: Extended Arithmethic operations on Int ---
struct ExtendedInt:
var int: Int
fn sat_add(self, other: Self) -> Self: ...
fn raise_add(self, other: Self) raises OverflowError -> Self: ...
fn trap_add(self, other: Self) -> Self: ...
# --- Tests ---
@export
fn test_raise_add(a: RaisingInt, b: RaisingInt) raises OverflowError -> RaisingInt:
return a + b
@export
fn test_sat_add(a: SaturatingInt, b: SaturatingInt) -> SaturatingInt:
return a + b
@export
fn test_trap_add(a: TrappingInt, b: TrappingInt) -> TrappingInt:
return a + b
0000000000002060 <test_trap_add>:
2060: 48 01 f7 add rdi, rsi
2063: 70 04 jo 0x2069 <test_trap_add+0x9>
2065: 48 89 f8 mov rax, rdi
2068: c3 ret
2069: 50 push rax
206a: e8 31 00 00 00 call 0x20a0 <stdlib::os::os::abort()>
0000000000002070 <test_raise_add>:
2070: 48 89 f8 mov rax, rdi
2073: 48 01 f0 add rax, rsi
2076: 0f 91 c2 setno dl
2079: c3 ret
0000000000002080 <test_sat_add>:
2080: 48 8d 0c 37 lea rcx, [rdi + rsi]
2084: 48 c1 f9 3f sar rcx, 0x3f
2088: 48 b8 00 00 00 00 00 00 00 80 movabs rax, -0x8000000000000000
2092: 48 31 c8 xor rax, rcx
2095: 48 01 f7 add rdi, rsi
2098: 48 0f 41 c7 cmovno rax, rdi
209c: c3 ret
00000000000020a0 <stdlib::os::os::abort()>:
20a0: 0f 0b ud2