Ocaml_simd_avx.Float32x8val box : t -> float32x8 @@ portableval unbox : float32x8 @ local -> t @@ portableval zero : unit -> t @@ portableEquivalent to const1 #0.0s.
val one : unit -> t @@ portableEquivalent to const1 #1.0s.
val set1 : float32 -> t @@ portable_mm256_set1_ps
val set :
float32 ->
float32 ->
float32 ->
float32 ->
float32 ->
float32 ->
float32 ->
float32 ->
t @@ portable_mm256_set_ps
val set_lanes : float32x4 -> float32x4 -> t @@ portable_mm256_set_m128 Operates on two float32x4 lanes.
val const1 : float32 -> t @@ portableArgument must be a float literal. Compiles to a static vector literal. Exposed as an external so user code can compile without cross-library inlining.
val const :
float32 ->
float32 ->
float32 ->
float32 ->
float32 ->
float32 ->
float32 ->
float32 ->
t @@ portableArguments must be float literals. Compiles to a static vector literal. Exposed as an external so user code can compile without cross-library inlining.
module Raw : sig ... endmodule String : sig ... endmodule Bytes : sig ... endmodule Bigstring : sig ... endmodule Float32_u_array : sig ... endval movemask : mask -> int64 @@ portable_mm256_movemask_ps
val bitmask : mask -> int32x8 @@ portableIdentity.
_mm256_blendv_ps Only reads the sign bit of each mask lane. Selects the element from pass if the sign bit is 1, otherwise fail.
val extract : idx:int64 -> t -> float32 @@ portableidx must be in 0,7.
val extract0 : t -> float32 @@ portableProjection. Has no runtime cost.
idx must be a literal in 0,1. Operates on two float32x4 lanes. Exposed as an external so user code can compile without cross-library inlining.
val extract_lane : idx:int64 -> t -> float32x4 @@ portableidx must be a literal in 0,1. Operates on two float32x4 lanes. Exposed as an external so user code can compile without cross-library inlining.
val extract_lane0 : t -> float32x4 @@ portableProjection. Has no runtime cost. Operates on two float32x4 lanes.
val splat :
t ->
#(float32
* float32
* float32
* float32
* float32
* float32
* float32
* float32) @@ portableSlow, intended for debugging / printing / etc.
_mm256_unpackhi_ps Operates on two float32x4 lanes.
interleave_upper_lanes ~even ~odd
= (even.(2), odd.(2), even.(3), odd.(3), even.(6), odd.(6), even.(7), odd.(7))_mm256_unpacklo_ps Operates on two float32x4 lanes.
interleave_lower_lanes ~even ~odd
= (even.(0), odd.(0), even.(1), odd.(1), even.(4), odd.(4), even.(5), odd.(5))_mm256_moveldup_ps
duplicate_even x = (x.(0), x.(0), x.(2), x.(2), x.(4), x.(4), x.(6), x.(6))_mm256_movehdup_ps
duplicate_odd x = (x.(1), x.(1), x.(3), x.(3), x.(5), x.(5), x.(7), x.(7))val blend : Ocaml_simd.Blend8.t -> t -> t -> t @@ portable_mm256_blend_ps Specify blend with ppx_simd: %blend N, N, N, N, N, N, N, N, where each N is in 0,1. Exposed as an external so user code can compile without cross-library inlining.
blend [%blend 1, 0, 1, 0, 1, 0, 1, 0] x y
= (y.(0), x.(1), y.(2), x.(3), y.(4), x.(5), y.(6), x.(7))val shuffle_lanes : Ocaml_simd.Shuffle4.t -> t -> t -> t @@ portable_mm256_shuffle_ps Specify shuffle with ppx_simd: %shuffle N, N, N, N, where each N is in 0,3. Operates on two float32x4 lanes. Exposed as an external so user code can compile without cross-library inlining.
shuffle_lanes [%shuffle 1, 0, 3, 2] x y
= (x.(1), x.(0), y.(3), y.(2), x.(5), x.(4), y.(7), y.(6))val permute_lanes : Ocaml_simd.Permute4.t -> t -> t @@ portable_mm256_permute_ps Specify permute with ppx_simd: %permute N, N, N, N, where each N is in 0,3. Operates on two float32x4 lanes. Exposed as an external so user code can compile without cross-library inlining.
permute_lanes [%permute 3, 2, 1, 0] x
= (x.(3), x.(2), x.(1), x.(0), x.(7), x.(6), x.(5), x.(4))_mm256_permutevar_ps Operates on two float32x4 lanes. Each lane of idx is interpreted as an integer in 0,3 by taking its bottom two bits.
permute_lanes_by x ~idx:(1, 0, 3, 2, 0, 2, 1, 3)
= (x.(1), x.(0), x.(3), x.(2), x.(4), x.(6), x.(5), x.(7))_mm256_min_ps Equivalent to pointwise (x < y ? x : y). If either lane is NaN, the second lane is returned.
_mm256_max_ps Equivalent to pointwise (x > y ? x : y). If either lane is NaN, the second lane is returned.
_mm256_rcp_ps WARNING: result has relative error up to 1.5*2^-12, and may differ between CPU vendors.
_mm256_rsqrt_ps WARNING: result has relative error up to 1.5*2^-12, and may differ between CPU vendors.
_mm256_addsub_ps
add_sub x y
= ( x.(0) - y.(0)
, x.(1) + y.(1)
, x.(2) - y.(2)
, x.(3) + y.(3)
, x.(4) - y.(4)
, x.(5) + y.(5)
, x.(6) - y.(6)
, x.(7) + y.(7) )_mm256_hadd_ps Operates on two float32x4 lanes.
horizontal_add_lanes x y
= ( x.(1) + x.(0)
, x.(3) + x.(2)
, y.(1) + y.(0)
, y.(3) + y.(2)
, x.(5) + x.(4)
, x.(7) + x.(6)
, y.(5) + y.(4)
, y.(7) + y.(6) )_mm256_hsub_ps Operates on two float32x4 lanes.
horizontal_sub_lanes x y
= ( x.(0) - x.(1)
, x.(2) - x.(3)
, y.(0) - y.(1)
, y.(2) - y.(3)
, x.(4) - x.(5)
, x.(6) - x.(7)
, y.(4) - y.(5)
, y.(6) - y.(7) )_mm256_fmadd_ps. Computes x * y + z without intermediate rounding.
_mm256_fmsub_ps. Computes x * y - z without intermediate rounding.
_mm256_fnmadd_ps. Computes -(x * y) + z without intermediate rounding.
_mm256_fnmsub_ps. Computes -(x * y) - z without intermediate rounding.
_mm256_fmaddsub_ps. Computes the following expression without intermediate rounding.
mul_add_sub x y z
= ( (x.(0) * y.(0)) - z.(0)
, (x.(1) * y.(1)) + z.(1)
, (x.(2) * y.(2)) - z.(2)
, (x.(3) * y.(3)) + z.(3)
, (x.(4) * y.(4)) - z.(4)
, (x.(5) * y.(5)) + z.(5)
, (x.(6) * y.(6)) - z.(6)
, (x.(7) * y.(7)) + z.(7) )_mm256_fmsubadd_ps. Computes the following expression without intermediate rounding.
mul_add_sub x y z
= ( (x.(0) * y.(0)) + z.(0)
, (x.(1) * y.(1)) - z.(1)
, (x.(2) * y.(2)) + z.(2)
, (x.(3) * y.(3)) - z.(3)
, (x.(4) * y.(4)) + z.(4)
, (x.(5) * y.(5)) - z.(5)
, (x.(6) * y.(6)) + z.(6)
, (x.(7) * y.(7)) - z.(7) )val iround_current : t -> int32x8 @@ portable_mm256_cvtps_epi32
val unsafe_of_float32 : float32 -> t @@ portableIdentity; leaves upper 224 bits unspecified.
val unsafe_of_float32x4 : float32x4 -> t @@ portableIdentity; leaves upper 128 bits unspecified.
val of_float16x16_bits : float16x16 -> t @@ portableIdentity in the bit representation. Different numeric interpretation.
val of_float64x4_bits : float64x4 -> t @@ portableIdentity in the bit representation. Different numeric interpretation.
val of_int8x32_bits : int8x32 -> t @@ portableIdentity in the bit representation. Different numeric interpretation.
val of_int16x16_bits : int16x16 -> t @@ portableIdentity in the bit representation. Different numeric interpretation.
val of_int32x8_bits : int32x8 -> t @@ portableIdentity in the bit representation. Different numeric interpretation.
val of_int64x4_bits : int64x4 -> t @@ portableIdentity in the bit representation. Different numeric interpretation.
val of_float16x8 : float16x8 -> t @@ portable_mm256_cvtph_ps
val of_int32x8 : int32x8 -> t @@ portable_mm256_cvtepi32_ps. Performs numeric conversion from int32# to float32#
val to_string : t -> string @@ portableCompiles to splat, sprintf.
val of_string : string -> t @@ portableCompiles to sscanf, set. Expects a string in the output format of to_string.