Transcendentals

Magetypes provides SIMD implementations of common math functions. These are polynomial approximations tuned per platform — faster than calling scalar f32::exp() in a loop.

Exponential Functions

use magetypes::simd::{
    generic::f32x8,
    backends::F32x8Convert,
};

#[inline(always)]
fn exponentials<T: F32x8Convert>(token: T) {
    let v = f32x8::<T>::splat(token, 3.0);

    // Base-2 exponential: 2^x
    let result = v.exp2_midp();   // [8.0; 8]

    // Natural exponential: e^x
    let result = v.exp_midp();    // [e^3; 8] ~ [20.09; 8]
}

Logarithms

use magetypes::simd::{
    generic::f32x8,
    backends::F32x8Convert,
};

#[inline(always)]
fn logarithms<T: F32x8Convert>(token: T) {
    let v = f32x8::<T>::splat(token, 8.0);

    // Base-2 logarithm: log2(x)
    let result = v.log2_midp();   // [3.0; 8]

    // Natural logarithm: ln(x)
    let result = v.ln_midp();     // [ln(8); 8] ~ [2.08; 8]

    // Base-10 logarithm: log10(x)
    let result = v.log10_midp();  // [log10(8); 8] ~ [0.90; 8]
}

Power

use magetypes::simd::{
    generic::f32x8,
    backends::F32x8Convert,
};

#[inline(always)]
fn power<T: F32x8Convert>(token: T) {
    let base = f32x8::<T>::splat(token, 2.0);

    // x^n (computed as exp2(n * log2(x)))
    let result = base.pow_midp(3.0);  // [8.0; 8]
}

Note: pow takes a scalar exponent, not a vector.

Roots

use magetypes::simd::{
    generic::f32x8,
    backends::F32x8Convert,
};

#[inline(always)]
fn roots<T: F32x8Convert>(token: T) {
    let v = f32x8::<T>::splat(token, 9.0);

    // Square root (hardware instruction on all platforms)
    let result = v.sqrt();            // [3.0; 8]

    // Cube root (polynomial approximation)
    let result = v.cbrt_midp();       // [cbrt(9); 8] ~ [2.08; 8]

    // Reciprocal square root: 1/sqrt(x)
    let result = v.rsqrt();           // [1/3; 8] ~ [0.33; 8]
}

sqrt() maps to a single hardware instruction (vsqrtps on x86, fsqrt on ARM). It's exact, not an approximation.

Precision Variants

Most transcendentals come in multiple precision levels. See Precision Levels for the full breakdown.

// given token: T where T: F32x8Convert
let v = f32x8::<T>::splat(token, 2.0);

let fast     = v.exp2_lowp();   // ~12-bit precision, fastest
let balanced = v.exp2_midp();   // ~20-bit precision

There are no unsuffixed "full precision" transcendentals. _midp is the highest precision level for polynomial approximations. For exact results, use sqrt() (which is a hardware instruction, not an approximation).

Domain Errors

Invalid inputs produce NaN or infinity, matching IEEE 754 behavior:

use magetypes::simd::{
    generic::f32x8,
    backends::F32x8Convert,
};

#[inline(always)]
fn domain_errors<T: F32x8Convert>(token: T) {
    let v = f32x8::<T>::from_array(token, [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);

    // sqrt of negative -> NaN
    let sqrt = v.sqrt();   // [NaN, 0.0, 1.0, 1.41, 1.73, 2.0, 2.24, 2.45]

    // log of non-positive -> NaN or -inf
    let log = v.ln_midp(); // [NaN, -inf, 0.0, 0.69, 1.10, 1.39, 1.61, 1.79]
}

Example: Gaussian Function

use archmage::{arcane, SimdToken};
use magetypes::simd::{
    generic::f32x8,
    backends::F32x8Convert,
};

#[arcane]
fn gaussian<T: F32x8Convert>(token: T, x: &[f32; 8], sigma: f32) -> [f32; 8] {
    let v = f32x8::<T>::from_array(token, *x);
    let sigma_v = f32x8::<T>::splat(token, sigma);
    let two = f32x8::<T>::splat(token, 2.0);

    // exp(-x^2 / (2 * sigma^2))
    let x_sq = v * v;
    let two_sigma_sq = two * sigma_v * sigma_v;
    let exponent = -(x_sq / two_sigma_sq);
    let result = exponent.exp_midp();  // Good precision, fast

    result.to_array()
}

Example: Softmax

use archmage::{arcane, SimdToken};
use magetypes::simd::{
    generic::f32x8,
    backends::F32x8Convert,
};

#[arcane]
fn softmax<T: F32x8Convert>(token: T, logits: &[f32; 8]) -> [f32; 8] {
    let v = f32x8::<T>::from_array(token, *logits);

    // Subtract max for numerical stability
    let max = v.reduce_max();
    let shifted = v - f32x8::<T>::splat(token, max);

    // exp(x - max)
    let exp = shifted.exp_midp();

    // Normalize
    let sum = exp.reduce_add();
    let result = exp / f32x8::<T>::splat(token, sum);

    result.to_array()
}

Platform Coverage

  • x86-64: All functions available on f32x4, f32x8, f64x2, f64x4
  • AArch64: Full support via NEON polynomial approximations
  • WASM: Most functions available; some use scalar fallback internally

The implementations use platform-tuned polynomial coefficients for best accuracy per instruction count.

Found an error or it needs a clarification? Open an issue on GitHub.
Substantiated corrections will be incorporated with attribution.