Float32x8 (ocaml_simd.ocaml_simd.avx.Ocaml_simd

type t = float32x8

type mask = int32x8

val box : t -> float32x8 @@ portable

val unbox : float32x8 @ local -> t @@ portable

val zero : unit -> t @@ portable

Equivalent to const1 #0.0s.

val one : unit -> t @@ portable

Equivalent to const1 #1.0s.

val set1 : float32 -> t @@ portable

_mm256_set1_ps

val set : 
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  t @@ portable

_mm256_set_ps

val set_lanes : float32x4 -> float32x4 -> t @@ portable

_mm256_set_m128 Operates on two float32x4 lanes.

val const1 : float32 -> t @@ portable

Argument must be a float literal. Compiles to a static vector literal. Exposed as an external so user code can compile without cross-library inlining.

val const : 
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  float32 ->
  t @@ portable

Arguments must be float literals. Compiles to a static vector literal. Exposed as an external so user code can compile without cross-library inlining.

module Raw : sig ... end

module String : sig ... end

module Bytes : sig ... end

module Bigstring : sig ... end

module Float32_u_array : sig ... end

val (>=) : t -> t -> mask @@ portable

_mm256_cmp_ps with _CMP_LE_OS

val (<=) : t -> t -> mask @@ portable

_mm256_cmp_ps with _CMP_LE_OS

val (=) : t -> t -> mask @@ portable

_mm256_cmp_ps with _CMP_EQ_OQ

val (>) : t -> t -> mask @@ portable

_mm256_cmp_ps with _CMP_LT_OS

val (<) : t -> t -> mask @@ portable

_mm256_cmp_ps with _CMP_LT_OS

val (<>) : t -> t -> mask @@ portable

_mm256_cmp_ps with _CMP_NEQ_UQ

val equal : t -> t -> mask @@ portable

_mm256_cmp_ps with _CMP_EQ_OQ

val is_nan : t -> mask @@ portable

_mm256_cmp_ps with _CMP_UNORD_Q

val is_not_nan : t -> mask @@ portable

_mm256_cmp_ps with _CMP_ORD_Q

val movemask : mask -> int64 @@ portable

_mm256_movemask_ps

val bitmask : mask -> int32x8 @@ portable

Identity.

val select : mask -> fail:t -> pass:t -> t @@ portable

_mm256_blendv_ps Only reads the sign bit of each mask lane. Selects the element from pass if the sign bit is 1, otherwise fail.

val insert : idx:int64 -> t -> float32 -> t @@ portable

idx must be in 0,7.

val extract : idx:int64 -> t -> float32 @@ portable

idx must be in 0,7.

val extract0 : t -> float32 @@ portable

Projection. Has no runtime cost.

val insert_lane : idx:int64 -> t -> float32x4 -> t @@ portable

idx must be a literal in 0,1. Operates on two float32x4 lanes. Exposed as an external so user code can compile without cross-library inlining.

val extract_lane : idx:int64 -> t -> float32x4 @@ portable

idx must be a literal in 0,1. Operates on two float32x4 lanes. Exposed as an external so user code can compile without cross-library inlining.

val extract_lane0 : t -> float32x4 @@ portable

Projection. Has no runtime cost. Operates on two float32x4 lanes.

val splat : 
  t ->
  #(float32
    * float32
    * float32
    * float32
    * float32
    * float32
    * float32
    * float32) @@ portable

Slow, intended for debugging / printing / etc.

val interleave_upper_lanes : even:t -> odd:t -> t @@ portable

_mm256_unpackhi_ps Operates on two float32x4 lanes.

  interleave_upper_lanes ~even ~odd
  = (even.(2), odd.(2), even.(3), odd.(3), even.(6), odd.(6), even.(7), odd.(7))

val interleave_lower_lanes : even:t -> odd:t -> t @@ portable

_mm256_unpacklo_ps Operates on two float32x4 lanes.

  interleave_lower_lanes ~even ~odd
  = (even.(0), odd.(0), even.(1), odd.(1), even.(4), odd.(4), even.(5), odd.(5))

val duplicate_even : t -> t @@ portable

_mm256_moveldup_ps

  duplicate_even x = (x.(0), x.(0), x.(2), x.(2), x.(4), x.(4), x.(6), x.(6))

val duplicate_odd : t -> t @@ portable

_mm256_movehdup_ps

  duplicate_odd x = (x.(1), x.(1), x.(3), x.(3), x.(5), x.(5), x.(7), x.(7))

val blend : Ocaml_simd.Blend8.t -> t -> t -> t @@ portable

_mm256_blend_ps Specify blend with ppx_simd: %blend N, N, N, N, N, N, N, N, where each N is in 0,1. Exposed as an external so user code can compile without cross-library inlining.

  blend [%blend 1, 0, 1, 0, 1, 0, 1, 0] x y
  = (y.(0), x.(1), y.(2), x.(3), y.(4), x.(5), y.(6), x.(7))

val shuffle_lanes : Ocaml_simd.Shuffle4.t -> t -> t -> t @@ portable

_mm256_shuffle_ps Specify shuffle with ppx_simd: %shuffle N, N, N, N, where each N is in 0,3. Operates on two float32x4 lanes. Exposed as an external so user code can compile without cross-library inlining.

  shuffle_lanes [%shuffle 1, 0, 3, 2] x y
  = (x.(1), x.(0), y.(3), y.(2), x.(5), x.(4), y.(7), y.(6))

val permute_lanes : Ocaml_simd.Permute4.t -> t -> t @@ portable

_mm256_permute_ps Specify permute with ppx_simd: %permute N, N, N, N, where each N is in 0,3. Operates on two float32x4 lanes. Exposed as an external so user code can compile without cross-library inlining.

  permute_lanes [%permute 3, 2, 1, 0] x
  = (x.(3), x.(2), x.(1), x.(0), x.(7), x.(6), x.(5), x.(4))

val permute_lanes_by : t -> idx:int32x8 -> t @@ portable

_mm256_permutevar_ps Operates on two float32x4 lanes. Each lane of idx is interpreted as an integer in 0,3 by taking its bottom two bits.

  permute_lanes_by x ~idx:(1, 0, 3, 2, 0, 2, 1, 3)
  = (x.(1), x.(0), x.(3), x.(2), x.(4), x.(6), x.(5), x.(7))

val min : t -> t -> t @@ portable

_mm256_min_ps Equivalent to pointwise (x < y ? x : y). If either lane is NaN, the second lane is returned.

val max : t -> t -> t @@ portable

_mm256_max_ps Equivalent to pointwise (x > y ? x : y). If either lane is NaN, the second lane is returned.

val add : t -> t -> t @@ portable

_mm256_add_ps

val sub : t -> t -> t @@ portable

_mm256_sub_ps

val mul : t -> t -> t @@ portable

_mm256_mul_ps

val div : t -> t -> t @@ portable

_mm256_div_ps

val neg : t -> t @@ portable

Compiles to xor with a static constant.

val abs : t -> t @@ portable

Compiles to and with a static constant.

val rcp : t -> t @@ portable

_mm256_rcp_ps WARNING: result has relative error up to 1.5*2^-12, and may differ between CPU vendors.

val rsqrt : t -> t @@ portable

_mm256_rsqrt_ps WARNING: result has relative error up to 1.5*2^-12, and may differ between CPU vendors.

val sqrt : t -> t @@ portable

_mm256_sqrt_ps

val add_sub : t -> t -> t @@ portable

_mm256_addsub_ps

  add_sub x y
  = ( x.(0) - y.(0)
    , x.(1) + y.(1)
    , x.(2) - y.(2)
    , x.(3) + y.(3)
    , x.(4) - y.(4)
    , x.(5) + y.(5)
    , x.(6) - y.(6)
    , x.(7) + y.(7) )

val horizontal_add_lanes : t -> t -> t @@ portable

_mm256_hadd_ps Operates on two float32x4 lanes.

  horizontal_add_lanes x y
  = ( x.(1) + x.(0)
    , x.(3) + x.(2)
    , y.(1) + y.(0)
    , y.(3) + y.(2)
    , x.(5) + x.(4)
    , x.(7) + x.(6)
    , y.(5) + y.(4)
    , y.(7) + y.(6) )

val horizontal_sub_lanes : t -> t -> t @@ portable

_mm256_hsub_ps Operates on two float32x4 lanes.

  horizontal_sub_lanes x y
  = ( x.(0) - x.(1)
    , x.(2) - x.(3)
    , y.(0) - y.(1)
    , y.(2) - y.(3)
    , x.(4) - x.(5)
    , x.(6) - x.(7)
    , y.(4) - y.(5)
    , y.(6) - y.(7) )

val dot : t -> t -> float32 @@ portable

Dot product.

val mul_add : t -> t -> t -> t @@ portable

_mm256_fmadd_ps. Computes x * y + z without intermediate rounding.

val mul_sub : t -> t -> t -> t @@ portable

_mm256_fmsub_ps. Computes x * y - z without intermediate rounding.

val neg_mul_add : t -> t -> t -> t @@ portable

_mm256_fnmadd_ps. Computes -(x * y) + z without intermediate rounding.

val neg_mul_sub : t -> t -> t -> t @@ portable

_mm256_fnmsub_ps. Computes -(x * y) - z without intermediate rounding.

val mul_add_sub : t -> t -> t -> t @@ portable

_mm256_fmaddsub_ps. Computes the following expression without intermediate rounding.

  mul_add_sub x y z
  = ( (x.(0) * y.(0)) - z.(0)
    , (x.(1) * y.(1)) + z.(1)
    , (x.(2) * y.(2)) - z.(2)
    , (x.(3) * y.(3)) + z.(3)
    , (x.(4) * y.(4)) - z.(4)
    , (x.(5) * y.(5)) + z.(5)
    , (x.(6) * y.(6)) - z.(6)
    , (x.(7) * y.(7)) + z.(7) )

val mul_sub_add : t -> t -> t -> t @@ portable

_mm256_fmsubadd_ps. Computes the following expression without intermediate rounding.

  mul_add_sub x y z
  = ( (x.(0) * y.(0)) + z.(0)
    , (x.(1) * y.(1)) - z.(1)
    , (x.(2) * y.(2)) + z.(2)
    , (x.(3) * y.(3)) - z.(3)
    , (x.(4) * y.(4)) + z.(4)
    , (x.(5) * y.(5)) - z.(5)
    , (x.(6) * y.(6)) + z.(6)
    , (x.(7) * y.(7)) - z.(7) )

val (+) : t -> t -> t @@ portable

val (-) : t -> t -> t @@ portable

val (/) : t -> t -> t @@ portable

val (*) : t -> t -> t @@ portable

val iround_current : t -> int32x8 @@ portable

_mm256_cvtps_epi32

val round_nearest : t -> t @@ portable

_mm256_round_ps

val round_current : t -> t @@ portable

_mm256_round_ps

val round_down : t -> t @@ portable

_mm256_round_ps

val round_up : t -> t @@ portable

_mm256_round_ps

val round_toward_zero : t -> t @@ portable

_mm256_round_ps

val unsafe_of_float32 : float32 -> t @@ portable

Identity; leaves upper 224 bits unspecified.

val unsafe_of_float32x4 : float32x4 -> t @@ portable

Identity; leaves upper 128 bits unspecified.

val of_float16x16_bits : float16x16 -> t @@ portable