Enumerations
enum	pad_mode_e { PAD_MODE_REFLECT = (INT32_MAX-0) , PAD_MODE_EXTEND = (INT32_MAX-1) , PAD_MODE_ZERO = 0 }
	Supported padding modes for convolutions in "same" mode. More...

Functions
headroom_t	xs3_vect_complex_s32_add (complex_s32_t a[], const complex_s32_t b[], const complex_s32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Add one complex 32-bit vector to another. More...

headroom_t	xs3_vect_complex_s32_add_scalar (complex_s32_t a[], const complex_s32_t b[], const complex_s32_t c, const unsigned length, const right_shift_t b_shr)
	Add a scalar to a complex 32-bit vector. More...

headroom_t	xs3_vect_complex_s32_conj_mul (complex_s32_t a[], const complex_s32_t b[], const complex_s32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply one complex 32-bit vector element-wise by the complex conjugate of another. More...

headroom_t	xs3_vect_complex_s32_headroom (const complex_s32_t x[], const unsigned length)
	Calculate the headroom of a complex 32-bit array. More...

headroom_t	xs3_vect_complex_s32_macc (complex_s32_t acc[], const complex_s32_t b[], const complex_s32_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply one complex 32-bit vector element-wise by another, and add the result to an accumulator. More...

headroom_t	xs3_vect_complex_s32_nmacc (complex_s32_t acc[], const complex_s32_t b[], const complex_s32_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply one complex 32-bit vector element-wise by another, and subtract the result from an accumulator. More...

headroom_t	xs3_vect_complex_s32_conj_macc (complex_s32_t acc[], const complex_s32_t b[], const complex_s32_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply one complex 32-bit vector element-wise by the complex conjugate of another, and add the result to an accumulator. More...

headroom_t	xs3_vect_complex_s32_conj_nmacc (complex_s32_t acc[], const complex_s32_t b[], const complex_s32_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply one complex 32-bit vector element-wise by the complex conjugate of another, and subtract the result from an accumulator. More...

headroom_t	xs3_vect_complex_s32_mag (int32_t a[], const complex_s32_t b[], const unsigned length, const right_shift_t b_shr, const complex_s32_t *rot_table, const unsigned table_rows)
	Compute the magnitude of each element of a complex 32-bit vector. More...

headroom_t	xs3_vect_complex_s32_mul (complex_s32_t a[], const complex_s32_t b[], const complex_s32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply one complex 32-bit vector element-wise by another. More...

headroom_t	xs3_vect_complex_s32_real_mul (complex_s32_t a[], const complex_s32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply a complex 32-bit vector element-wise by a real 32-bit vector. More...

headroom_t	xs3_vect_complex_s32_real_scale (complex_s32_t a[], const complex_s32_t b[], const int32_t c, const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply a complex 32-bit vector by a real scalar. More...

headroom_t	xs3_vect_complex_s32_scale (complex_s32_t a[], const complex_s32_t b[], const int32_t c_real, const int32_t c_imag, const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply a complex 32-bit vector by a complex 32-bit scalar. More...

void	xs3_vect_complex_s32_set (complex_s32_t a[], const int32_t b_real, const int32_t b_imag, const unsigned length)
	Set each element of a complex 32-bit vector to a specified value. More...

headroom_t	xs3_vect_complex_s32_shl (complex_s32_t a[], const complex_s32_t b[], const unsigned length, const left_shift_t b_shl)
	Left-shift each element of a complex 32-bit vector by a specified number of bits. More...

headroom_t	xs3_vect_complex_s32_shr (complex_s32_t a[], const complex_s32_t b[], const unsigned length, const right_shift_t b_shr)
	Right-shift each element of a complex 32-bit vector by a specified number of bits. More...

headroom_t	xs3_vect_complex_s32_squared_mag (int32_t a[], const complex_s32_t b[], const unsigned length, const right_shift_t b_shr)
	Computes the squared magnitudes of elements of a complex 32-bit vector. More...

headroom_t	xs3_vect_complex_s32_sub (complex_s32_t a[], const complex_s32_t b[], const complex_s32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Subtract one complex 32-bit vector from another. More...

void	xs3_vect_complex_s32_sum (complex_s64_t *a, const complex_s32_t b[], const unsigned length, const right_shift_t b_shr)
	Compute the sum of elements of a complex 32-bit vector. More...

void	xs3_vect_complex_s32_tail_reverse (complex_s32_t x[], const unsigned length)
	Reverses the order of the tail of a complex 32-bit vector. More...

headroom_t	xs3_vect_complex_s32_conjugate (complex_s32_t a[], const complex_s32_t b[], const unsigned length)
	Get the complex conjugate of a complex 32-bit vector. More...

headroom_t	xs3_vect_s32_copy (int32_t a[], const int32_t b[], const unsigned length)
	Copy one 32-bit vector to another. More...

headroom_t	xs3_vect_s32_abs (int32_t a[], const int32_t b[], const unsigned length)
	Compute the element-wise absolute value of a 32-bit vector. More...

int64_t	xs3_vect_s32_abs_sum (const int32_t b[], const unsigned length)
	Compute the sum of the absolute values of elements of a 32-bit vector. More...

headroom_t	xs3_vect_s32_add (int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Add together two 32-bit vectors. More...

headroom_t	xs3_vect_s32_add_scalar (int32_t a[], const int32_t b[], const int32_t c, const unsigned length, const right_shift_t b_shr)
	Add a scalar to a 32-bit vector. More...

unsigned	xs3_vect_s32_argmin (const int32_t b[], const unsigned length)
	Obtain the array index of the minimum element of a 32-bit vector. More...

headroom_t	xs3_vect_s32_clip (int32_t a[], const int32_t b[], const unsigned length, const int32_t lower_bound, const int32_t upper_bound, const right_shift_t b_shr)
	Clamp the elements of a 32-bit vector to a specified range. More...

int64_t	xs3_vect_s32_dot (const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Compute the inner product between two 32-bit vectors. More...

int64_t	xs3_vect_s32_energy (const int32_t b[], const unsigned length, const right_shift_t b_shr)
	Calculate the energy (sum of squares of elements) of a 32-bit vector. More...

headroom_t	xs3_vect_s32_headroom (const int32_t x[], const unsigned length)
	Calculate the headroom of a 32-bit vector. More...

headroom_t	xs3_vect_s32_inverse (int32_t a[], const int32_t b[], const unsigned length, const unsigned scale)
	Compute the inverse of elements of a 32-bit vector. More...

int32_t	xs3_vect_s32_max (const int32_t b[], const unsigned length)
	Find the maximum value in a 32-bit vector. More...

headroom_t	xs3_vect_s32_max_elementwise (int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Get the element-wise maximum of two 32-bit vectors. More...

int32_t	xs3_vect_s32_min (const int32_t b[], const unsigned length)
	Find the minimum value in a 32-bit vector. More...

headroom_t	xs3_vect_s32_min_elementwise (int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Get the element-wise minimum of two 32-bit vectors. More...

headroom_t	xs3_vect_s32_mul (int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply one 32-bit vector element-wise by another. More...

headroom_t	xs3_vect_s32_macc (int32_t acc[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t b_shr, const right_shift_t c_shr)
	[xs3_vect_s32_mul] More...

headroom_t	xs3_vect_s32_nmacc (int32_t acc[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply one 32-bit vector element-wise by another, and subtract the result from an accumulator. More...

headroom_t	xs3_vect_s32_rect (int32_t a[], const int32_t b[], const unsigned length)
	Rectify the elements of a 32-bit vector. More...

headroom_t	xs3_vect_s32_scale (int32_t a[], const int32_t b[], const unsigned length, const int32_t c, const right_shift_t b_shr, const right_shift_t c_shr)
	Multiply a 32-bit vector by a scalar. More...

void	xs3_vect_s32_set (int32_t a[], const int32_t b, const unsigned length)
	Set all elements of a 32-bit vector to the specified value. More...

headroom_t	xs3_vect_s32_shl (int32_t a[], const int32_t b[], const unsigned length, const left_shift_t b_shl)
	Left-shift the elements of a 32-bit vector by a specified number of bits. More...

headroom_t	xs3_vect_s32_shr (int32_t a[], const int32_t b[], const unsigned length, const right_shift_t b_shr)
	Right-shift the elements of a 32-bit vector by a specified number of bits. More...

headroom_t	xs3_vect_s32_sqrt (int32_t a[], const int32_t b[], const unsigned length, const right_shift_t b_shr, const unsigned depth)
	Compute the square root of elements of a 32-bit vector. More...

headroom_t	xs3_vect_s32_sub (int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Subtract one 32-bit vector from another. More...

int64_t	xs3_vect_s32_sum (const int32_t b[], const unsigned length)
	Sum the elements of a 32-bit vector. More...

void	xs3_vect_s32_zip (complex_s32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Interleave the elements of two vectors into a single vector. More...

void	xs3_vect_s32_unzip (int32_t a[], int32_t b[], const complex_s32_t c[], const unsigned length)
	Deinterleave the real and imaginary parts of a complex 32-bit vector into two separate vectors. More...

headroom_t	xs3_vect_s32_convolve_valid (int32_t y[], const int32_t x[], const int32_t b_q30[], const unsigned x_length, const unsigned b_length)
	Convolve a 32-bit vector with a short kernel. More...

headroom_t	xs3_vect_s32_convolve_same (int32_t y[], const int32_t x[], const int32_t b_q30[], const unsigned x_length, const unsigned b_length, const pad_mode_e padding_mode)
	Convolve a 32-bit vector with a short kernel. More...

void	xs3_vect_s32_merge_accs (int32_t a[], const xs3_split_acc_s32_t b[], const unsigned length)
	Merge a vector of split 32-bit accumulators into a vector of int32_t's. More...

void	xs3_vect_s32_split_accs (xs3_split_acc_s32_t a[], const int32_t b[], const unsigned length)
	Split a vector of `int32_t`'s into a vector of `xs3_split_acc_s32_t`. More...

Detailed Description

Enumeration Type Documentation

◆ pad_mode_e

enum pad_mode_e

Supported padding modes for convolutions in "same" mode.

See also: xs3_vect_s32_convolve_same(), bfp_s32_convolve_same()

Enumerator

PAD_MODE_REFLECT

Vector is reflected at its boundaries, such that

\( \tilde{x}_i \begin{cases} x_{-i} & i \lt 0 \\ x_{2N - 2 - i} & i \ge N \\ x_i & otherwise \end{cases} \)

For example, if the length \(N\) of input vector \(\bar x\) is \(7\) and the order \(K\) of the filter is \(5\), then

\( \bar{x} = [x_0, x_1, x_2, x_3, x_4, x_5, x_6] \)

\( \tilde{x} = [x_2, x_1, x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_5, x_4] \)

Note that by convention the first element of \(\tilde{x}\) is considered to be at index \(-P\), where \(P = \lfloor K/2 \rfloor\).

PAD_MODE_EXTEND

Vector is padded using the value of the bounding elements.

\( \tilde{x}_i \begin{cases} x_{0} & i \lt 0 \\ x_{N-1} & i \ge N \\ x_i & otherwise \end{cases} \)

For example, if the length \(N\) of input vector \(\bar x\) is \(7\) and the order \(K\) of the filter is \(5\), then

\( \bar{x} = [x_0, x_1, x_2, x_3, x_4, x_5, x_6] \)

\( \tilde{x} = [x_0, x_0, x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_6, x_6] \)

Note that by convention the first element of \(\tilde{x}\) is considered to be at index \(-P\), where \(P = \lfloor K/2 \rfloor\).

PAD_MODE_ZERO

Vector is padded with zeroes.

\( \tilde{x}_i \begin{cases} 0 & i \lt 0 \\ 0 & i \ge N \\ x_i & otherwise \end{cases} \)

For example, if the length \(N\) of input vector \(\bar x\) is \(7\) and the order \(K\) of the filter is \(5\), then

\( \bar{x} = [x_0, x_1, x_2, x_3, x_4, x_5, x_6] \)

\( \tilde{x} = [0, 0, x_0, x_1, x_2, x_3, x_4, x_5, x_6, 0, 0] \)

Note that by convention the first element of \(\tilde{x}\) is considered to be at index \(-P\), where \(P = \lfloor K/2 \rfloor\).

Function Documentation

◆ xs3_vect_complex_s32_add()

headroom_t xs3_vect_complex_s32_add	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Add one complex 32-bit vector to another.

a[], b[] and c[] represent the complex 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\)
respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow Re\{b_k'\} + Re\{c_k'\} \\ & Im\{a_k\} \leftarrow Im\{b_k'\} + Im\{c_k'\} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the complex 32-bit mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the complex 32-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_complex_s32_add_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift applied to \(\bar b\)
[in]	c_shr	Right-shift applied to \(\bar c\)

Returns: Headroom of output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_add_prepare

◆ xs3_vect_complex_s32_add_scalar()

headroom_t xs3_vect_complex_s32_add_scalar	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const complex_s32_t	c,
		const unsigned	length,
		const right_shift_t	b_shr
	)

Add a scalar to a complex 32-bit vector.

a[] and b[]represent the complex 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

c is the complex scalar \(c\)to be added to each element of \(\bar b\).

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow Re\{b_k'\} + Re\{c\} \\ & Im\{a_k\} \leftarrow Im\{b_k'\} + Im\{c\} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If elements of \(\bar b\) are the complex mantissas of BFP vector \( \bar{b} \cdot 2^{b\_exp}\), and \(c\) is the mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_complex_s32_add_scalar_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Note that \(c\_shr\) is an output of xs3_vect_complex_s32_add_scalar_prepare(), but is not a parameter to this function. The \(c\_shr\) produced by xs3_vect_complex_s32_add_scalar_prepare() is to be applied by the user, and the result passed as input c.

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input scalar \(c\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift applied to \(\bar b\)

Returns: Headroom of output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_add_scalar_prepare

◆ xs3_vect_complex_s32_conj_macc()

headroom_t xs3_vect_complex_s32_conj_macc	(	complex_s32_t	acc[],
		const complex_s32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply one complex 32-bit vector element-wise by the complex conjugate of another, and add the result to an accumulator.

acc[] represents the complex 32-bit accumulator mantissa vector \(\bar a\). Each \(a_k\) is acc[k].

b[] and c[] represent the complex 32-bit input mantissa vectors \(\bar b\) and \(\bar c\), where each \(b_k\) is b[k] and each \(c_k\) is c[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr, b_shr and c_shr are the signed arithmetic right-shifts applied to input elements \(a_k\), \(b_k\) and \(c_k\).

Operation Performed:: \begin{align*} & \tilde{b}_k \leftarrow sat_{32}( b_k \cdot 2^{-b\_shr} ) \\ & \tilde{c}_k \leftarrow sat_{32}( c_k \cdot 2^{-c\_shr} ) \\ & \tilde{a}_k \leftarrow sat_{32}( a_k \cdot 2^{-acc\_shr} ) \\ & v_k \leftarrow round( sat_{32}( ( Re\{\tilde{b}_k\} \cdot Re\{\tilde{c}_k\} + Im\{\tilde{b}_k\} \cdot Im\{\tilde{c}_k\} ) \cdot 2^{-30}) ) \\ & s_k \leftarrow round( sat_{32}( ( Im\{\tilde{b}_k\} \cdot Re\{\tilde{c}_k\} - Re\{\tilde{b}_k\} \cdot Im\{\tilde{c}_k\} ) \cdot 2^{-30}) ) \\ & Re\{a_k\} \leftarrow sat_{32}( Re\{\tilde{a}_k\} + v_k ) \\ & Im\{a_k\} \leftarrow sat_{32}( Im\{\tilde{a}_k\} + s_k ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + b\_shr + c\_shr \).

The function xs3_vect_complex_s32_conj_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc	Complex accumulator \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	b_shr	Signed arithmetic right-shift applied to elements of \(\bar b\)
[in]	c_shr	Signed arithmetic right-shift applied to elements of \(\bar c\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_conj_macc_prepare

◆ xs3_vect_complex_s32_conj_mul()

headroom_t xs3_vect_complex_s32_conj_mul	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply one complex 32-bit vector element-wise by the complex conjugate of another.

a[], b[] and c[] represent the 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow \left( Re\{b_k'\} \cdot Re\{c_k'\} + Im\{b_k'\} \cdot Im\{c_k'\} \right) \cdot 2^{-30} \\ & Im\{a_k\} \leftarrow \left( Im\{b_k'\} \cdot Re\{c_k'\} - Re\{b_k'\} \cdot Im\{c_k'\} \right) \cdot 2^{-30} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 32-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the complex 32-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + a\_shr\).

The function xs3_vect_complex_s32_conj_mul_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift applied to elements of \(\bar b\).
[in]	c_shr	Right-shift applied to elements of \(\bar c\).

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_conj_mul_prepare

◆ xs3_vect_complex_s32_conj_nmacc()

headroom_t xs3_vect_complex_s32_conj_nmacc	(	complex_s32_t	acc[],
		const complex_s32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply one complex 32-bit vector element-wise by the complex conjugate of another, and subtract the result from an accumulator.

acc[] represents the complex 32-bit accumulator mantissa vector \(\bar a\). Each \(a_k\) is acc[k].

b[] and c[] represent the complex 32-bit input mantissa vectors \(\bar b\) and \(\bar c\), where each \(b_k\) is b[k] and each \(c_k\) is c[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr, b_shr and c_shr are the signed arithmetic right-shifts applied to input elements \(a_k\), \(b_k\) and \(c_k\).

Operation Performed:: \begin{align*} & \tilde{b}_k \leftarrow sat_{32}( b_k \cdot 2^{-b\_shr} ) \\ & \tilde{c}_k \leftarrow sat_{32}( c_k \cdot 2^{-c\_shr} ) \\ & \tilde{a}_k \leftarrow sat_{32}( a_k \cdot 2^{-acc\_shr} ) \\ & v_k \leftarrow round( sat_{32}( ( Re\{\tilde{b}_k\} \cdot Re\{\tilde{c}_k\} + Im\{\tilde{b}_k\} \cdot Im\{\tilde{c}_k\} ) \cdot 2^{-30}) ) \\ & s_k \leftarrow round( sat_{32}( ( Im\{\tilde{b}_k\} \cdot Re\{\tilde{c}_k\} - Re\{\tilde{b}_k\} \cdot Im\{\tilde{c}_k\} ) \cdot 2^{-30}) ) \\ & Re\{a_k\} \leftarrow sat_{32}( Re\{\tilde{a}_k\} - v_k ) \\ & Im\{a_k\} \leftarrow sat_{32}( Im\{\tilde{a}_k\} - s_k ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + b\_shr + c\_shr \).

The function xs3_vect_complex_s32_conj_nmacc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc	Complex accumulator \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	b_shr	Signed arithmetic right-shift applied to elements of \(\bar b\)
[in]	c_shr	Signed arithmetic right-shift applied to elements of \(\bar c\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_conj_nmacc_prepare

◆ xs3_vect_complex_s32_conjugate()

headroom_t xs3_vect_complex_s32_conjugate	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const unsigned	length
	)

Get the complex conjugate of a complex 32-bit vector.

The complex conjugate of a complex scalar \(z = x + yi\) is \(z^* = x - yi\). This function computes the complex conjugate of each element of \(\bar b\) (negates the imaginary part of each element) and places the result in \(\bar a\).

a[] is the complex 32-bit output vector \(\bar a\).

b[] is the complex 32-bit input vector \(\bar b\).

Both a and b must point to word-aligned addresses.

length is the number of elements in \(\bar a\) and \(\bar b\).

Operation Performed:: \begin{align*} & Re\{a_k\} \leftarrow Re\{b_k\} \\ & Im\{a_k\} \leftarrow - Im\{b_k\} \\ & \qquad\text{ for }k\in 1\ ...\ (length-1) \end{align*}

Parameters

[out]	a	Complex 32-bit output vector \(\bar a\)
[in]	b	Complex 32-bit input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s32_headroom()

headroom_t xs3_vect_complex_s32_headroom	(	const complex_s32_t	x[],
		const unsigned	length
	)

Calculate the headroom of a complex 32-bit array.

The headroom of an N-bit integer is the number of bits that the integer's value may be left-shifted without any information being lost. Equivalently, it is one less than the number of leading sign bits.

The headroom of a complex_s32_t struct is the minimum of the headroom of each of its 32-bit fields, re and im.

The headroom of a complex_s32_t array is the minimum of the headroom of each of its complex_s32_t elements.

This function efficiently traverses the elements of \(\bar x\) to determine its headroom.

x[] represents the complex 32-bit vector \(\bar x\). x[] must begin at a word-aligned address.

length is the number of elements in x[].

Operation Performed:: \begin{align*} min\!\{ HR_{32}\left(x_0\right), HR_{32}\left(x_1\right), ..., HR_{32}\left(x_{length-1}\right) \} \end{align*}

Parameters

[in]	x	Complex input vector \(\bar x\)
[in]	length	Number of elements in \(\bar x\)

Returns: Headroom of vector \(\bar x\)

Exceptions

ET_LOAD_STORE Raised if x is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s16_headroom, xs3_vect_s32_headroom, xs3_vect_complex_s16_headroom

◆ xs3_vect_complex_s32_macc()

headroom_t xs3_vect_complex_s32_macc	(	complex_s32_t	acc[],
		const complex_s32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply one complex 32-bit vector element-wise by another, and add the result to an accumulator.

acc[] represents the complex 32-bit accumulator mantissa vector \(\bar a\). Each \(a_k\) is acc[k].

b[] and c[] represent the complex 32-bit input mantissa vectors \(\bar b\) and \(\bar c\), where each \(b_k\) is b[k] and each \(c_k\) is c[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr, b_shr and c_shr are the signed arithmetic right-shifts applied to input elements \(a_k\), \(b_k\) and \(c_k\).

Operation Performed:: \begin{align*} & \tilde{b}_k \leftarrow sat_{32}( b_k \cdot 2^{-b\_shr} ) \\ & \tilde{c}_k \leftarrow sat_{32}( c_k \cdot 2^{-c\_shr} ) \\ & \tilde{a}_k \leftarrow sat_{32}( a_k \cdot 2^{-acc\_shr} ) \\ & v_k \leftarrow round( sat_{32}( ( Re\{\tilde{b}_k\} \cdot Re\{\tilde{c}_k\} - Im\{\tilde{b}_k\} \cdot Im\{\tilde{c}_k\} ) \cdot 2^{-30}) ) \\ & s_k \leftarrow round( sat_{32}( ( Im\{\tilde{b}_k\} \cdot Re\{\tilde{c}_k\} + Re\{\tilde{b}_k\} \cdot Im\{\tilde{c}_k\} ) \cdot 2^{-30}) ) \\ & Re\{a_k\} \leftarrow sat_{32}( Re\{\tilde{a}_k\} + v_k ) \\ & Im\{a_k\} \leftarrow sat_{32}( Im\{\tilde{a}_k\} + s_k ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + b\_shr + c\_shr \).

The function xs3_vect_complex_s32_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc	Complex accumulator \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	b_shr	Signed arithmetic right-shift applied to elements of \(\bar b\)
[in]	c_shr	Signed arithmetic right-shift applied to elements of \(\bar c\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_macc_prepare

◆ xs3_vect_complex_s32_mag()

headroom_t xs3_vect_complex_s32_mag	(	int32_t	a[],
		const complex_s32_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const complex_s32_t *	rot_table,
		const unsigned	table_rows
	)

Compute the magnitude of each element of a complex 32-bit vector.

a[] represents the real 32-bit output mantissa vector \(\bar a\).

b[] represents the complex 32-bit input mantissa vector \(\bar b\).

a[] and b[] must each begin at a word-aligned address.

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\).

rot_table must point to a pre-computed table of complex vectors used in calculating the magnitudes. table_rows is the number of rows in the table. This library is distributed with a default version of the required rotation table. The following symbols can be used to refer to it in user code:

const extern unsigned rot_table32_rows;

const extern complex_s32_t rot_table32[30][4];

complex_s32_t

A complex number with a 32-bit real part and 32-bit imaginary part.

Definition: xs3_math_types.h:49

Faster computation (with reduced precision) can be achieved by generating a smaller version of the table. A python script is provided to generate this table.

Todo:: Point to documentation page on generating this table.

Operation Performed:: \begin{align*} & v_k \leftarrow b_k \cdot 2^{-b\_shr} \\ & a_k \leftarrow \sqrt { {\left( Re\{v_k\} \right)}^2 + {\left( Im\{v_k\} \right)}^2 } & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 32-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the real 32-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr\).

The function xs3_vect_complex_s32_mag_prepare() can be used to obtain values for \(a\_exp\) and \(b\_shr\) based on the input exponent \(b\_exp\) and headroom \(b\_hr\).

Parameters

[out]	a	Real output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	rot_table	Pre-computed rotation table required for calculating magnitudes
[in]	table_rows	Number of rows in `rot_table`

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_mag_prepare

◆ xs3_vect_complex_s32_mul()

headroom_t xs3_vect_complex_s32_mul	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply one complex 32-bit vector element-wise by another.

a[], b[] and c[] represent the 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow \left( Re\{b_k'\} \cdot Re\{c_k'\} - Im\{b_k'\} \cdot Im\{c_k'\} \right) \cdot 2^{-30} \\ & Im\{a_k\} \leftarrow \left( Im\{b_k'\} \cdot Re\{c_k'\} + Re\{b_k'\} \cdot Im\{c_k'\} \right) \cdot 2^{-30} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 32-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the complex 32-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + b\_shr + c\_shr\).

The function xs3_vect_complex_s32_mul_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\), and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_mul_prepare

◆ xs3_vect_complex_s32_nmacc()

headroom_t xs3_vect_complex_s32_nmacc	(	complex_s32_t	acc[],
		const complex_s32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply one complex 32-bit vector element-wise by another, and subtract the result from an accumulator.

acc[] represents the complex 32-bit accumulator mantissa vector \(\bar a\). Each \(a_k\) is acc[k].

b[] and c[] represent the complex 32-bit input mantissa vectors \(\bar b\) and \(\bar c\), where each \(b_k\) is b[k] and each \(c_k\) is c[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr, b_shr and c_shr are the signed arithmetic right-shifts applied to input elements \(a_k\), \(b_k\) and \(c_k\).

Operation Performed:: \begin{align*} & \tilde{b}_k \leftarrow sat_{32}( b_k \cdot 2^{-b\_shr} ) \\ & \tilde{c}_k \leftarrow sat_{32}( c_k \cdot 2^{-c\_shr} ) \\ & \tilde{a}_k \leftarrow sat_{32}( a_k \cdot 2^{-acc\_shr} ) \\ & v_k \leftarrow round( sat_{32}( ( Re\{\tilde{b}_k\} \cdot Re\{\tilde{c}_k\} - Im\{\tilde{b}_k\} \cdot Im\{\tilde{c}_k\} ) \cdot 2^{-30}) ) \\ & s_k \leftarrow round( sat_{32}( ( Im\{\tilde{b}_k\} \cdot Re\{\tilde{c}_k\} + Re\{\tilde{b}_k\} \cdot Im\{\tilde{c}_k\} ) \cdot 2^{-30}) ) \\ & Re\{a_k\} \leftarrow sat_{32}( Re\{\tilde{a}_k\} - v_k ) \\ & Im\{a_k\} \leftarrow sat_{32}( Im\{\tilde{a}_k\} - s_k ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + b\_shr + c\_shr \).

The function xs3_vect_complex_s32_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc	Complex accumulator \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	b_shr	Signed arithmetic right-shift applied to elements of \(\bar b\)
[in]	c_shr	Signed arithmetic right-shift applied to elements of \(\bar c\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_nmacc_prepare

◆ xs3_vect_complex_s32_real_mul()

headroom_t xs3_vect_complex_s32_real_mul	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply a complex 32-bit vector element-wise by a real 32-bit vector.

a[] and b[] represent the complex 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively.

c[] represents the real 32-bit mantissa vector \(\bar c\).

a[], b[], and c[] each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow \left( Re\{b_k'\} \cdot c_k' \right) \cdot 2^{-30} \\ & Im\{a_k\} \leftarrow \left( Im\{b_k'\} \cdot c_k' \right) \cdot 2^{-30} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 32-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the complex 32-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + b\_shr + c\_shr\).

The function xs3_vect_complex_s32_real_mul_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Complex output vector \(\bar a\).
[in]	b	Complex input vector \(\bar b\).
[in]	c	Real input vector \(\bar c\).
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\), and \(\bar c\).
[in]	b_shr	Right-shift appled to \(\bar b\).
[in]	c_shr	Right-shift appled to \(\bar c\).

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_real_mul_prepare

◆ xs3_vect_complex_s32_real_scale()

headroom_t xs3_vect_complex_s32_real_scale	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const int32_t	c,
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply a complex 32-bit vector by a real scalar.

a[] and b[] represent the complex 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively.

c represents the real 32-bit scale factor \(c\).

a[] and b[] each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shift applied to each element of \(\bar b\) and to \(c\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow Re\{b_k'\} \cdot c \\ & Im\{a_k\} \leftarrow Im\{b_k'\} \cdot c \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the complex 16-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + b\_shr + c\_shr\).

The function xs3_vect_complex_s32_real_scale_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\), and \(\bar c\)
[in]	b_shr	Right-shift applied to \(\bar b\)
[in]	c_shr	Right-shift applied to \(c\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s32_scale()

headroom_t xs3_vect_complex_s32_scale	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const int32_t	c_real,
		const int32_t	c_imag,
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply a complex 32-bit vector by a complex 32-bit scalar.

a[] and b[] represent the complex 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively.

c represents the complex 32-bit scale factor \(c\).

a[] and b[] each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and to \(c\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow \left( Re\{v_k\} \cdot Re\{c\} - Im\{v_k\} \cdot Im\{c\} \right) \cdot 2^{-30} \\ & Im\{a_k\} \leftarrow \left( Re\{v_k\} \cdot Im\{c\} + Im\{v_k\} \cdot Re\{c\} \right) \cdot 2^{-30} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 32-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the complex 32-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + b\_shr + c\_shr\).

The function xs3_vect_complex_s32_mul_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Complex output vector \(\bar a\).
[in]	b	Complex input vector \(\bar b\).
[in]	c_real	Real part of \(c\)
[in]	c_imag	Imaginary part of \(c\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\).
[in]	b_shr	Right-shift appled to \(\bar b\).
[in]	c_shr	Right-shift applied to \(c\).

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s32_set()

void xs3_vect_complex_s32_set	(	complex_s32_t	a[],
		const int32_t	b_real,
		const int32_t	b_imag,
		const unsigned	length
	)

Set each element of a complex 32-bit vector to a specified value.

a[] represents a complex 32-bit vector \(\bar a\). a[] must begin at a word-aligned address.

b_real and b_imag are the real and imaginary parts to which each element will be set.

length is the number of elements in a[].

Operation Performed:: \begin{align*} & a_k \leftarrow b\_real + j\cdot b\_imag \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \\ & \qquad\text{ where } j^2 = -1 \end{align*}

Block Floating-Point

If \(b\) is the mantissa of floating-point value \(b \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b_real	Value to set real part of elements of \(\bar a\) to
[in]	b_imag	Value to set imaginary part of elements of \(\bar a\) to
[in]	length	Number of elements in \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s32_shl()

headroom_t xs3_vect_complex_s32_shl	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const unsigned	length,
		const left_shift_t	b_shl
	)

Left-shift each element of a complex 32-bit vector by a specified number of bits.

a[] and b[] represent the complex 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in \(\bar a\) and \(\bar b\).

b_shl is the signed arithmetic left-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & Re\{a_k\} \leftarrow sat_{32}(\lfloor Re\{b_k\} \cdot 2^{b\_shl} \rfloor) \\ & Im\{a_k\} \leftarrow sat_{32}(\lfloor Im\{b_k\} \cdot 2^{b\_shl} \rfloor) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 32-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the complex 32-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(\bar{a} = \bar{b} \cdot 2^{b\_shl}\) and \(a\_exp = b\_exp\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	length	Number of elements in vector \(\bar b\)
[in]	b_shl	Left-shift applied to \(\bar b\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s32_shr()

headroom_t xs3_vect_complex_s32_shr	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr
	)

Right-shift each element of a complex 32-bit vector by a specified number of bits.

a[] and b[] represent the complex 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in \(\bar a\) and \(\bar b\).

b_shr is the signed arithmetic right-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & Re\{a_k\} \leftarrow sat_{32}(\lfloor Re\{b_k\} \cdot 2^{-b\_shr} \rfloor) \\ & Im\{a_k\} \leftarrow sat_{32}(\lfloor Im\{b_k\} \cdot 2^{-b\_shr} \rfloor) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 32-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the complex 32-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(\bar{a} = \bar{b} \cdot 2^{-b\_shr}\) and \(a\_exp = b\_exp\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	length	Number of elements in vector \(\bar b\)
[in]	b_shr	Right-shift applied to \(\bar b\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s32_squared_mag()

headroom_t xs3_vect_complex_s32_squared_mag	(	int32_t	a[],
		const complex_s32_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr
	)

Computes the squared magnitudes of elements of a complex 32-bit vector.

a[] represents the complex 32-bit mantissa vector \(\bar a\). b[] represents the real 32-bit mantissa vector \(\bar b\). Each must begin at a word-aligned address.

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shift appled to each element of \(\bar b\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a_k \leftarrow ((Re\{b_k'\})^2 + (Im\{b_k'\})^2)\cdot 2^{-30} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 32-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the real 32-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = 2 \cdot (b\_exp + b\_shr)\).

The function xs3_vect_complex_s32_squared_mag_prepare() can be used to obtain values for \(a\_exp\) and \(b\_shr\) based on the input exponent \(b\_exp\) and headroom \(b\_hr\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)

Exceptions

ET_LOAD_STORE Raised if a is not double word-aligned or b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_squared_mag_prepare

◆ xs3_vect_complex_s32_sub()

headroom_t xs3_vect_complex_s32_sub	(	complex_s32_t	a[],
		const complex_s32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Subtract one complex 32-bit vector from another.

a[], b[] and c[] represent the complex 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\)
respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow Re\{b_k'\} - Re\{c_k'\} \\ & Im\{a_k\} \leftarrow Im\{b_k'\} - Im\{c_k'\} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the complex 32-bit mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the complex 32-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_complex_s32_sub_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Complex output vector \(\bar a\)
[in]	b	Complex input vector \(\bar b\)
[in]	c	Complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift applied to \(\bar b\)
[in]	c_shr	Right-shift applied to \(\bar c\)

Returns: Headroom of output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_sub_prepare

◆ xs3_vect_complex_s32_sum()

void xs3_vect_complex_s32_sum	(	complex_s64_t *	a,
		const complex_s32_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr
	)

Compute the sum of elements of a complex 32-bit vector.

a is the complex 64-bit mantissa of the resulting sum.

b[] represents the complex 32-bit mantissa vector \(\bar b\). b[] must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

b_shr is the unsigned arithmetic right-shift appled to each element of \(\bar b\). b_shr cannot be negative.

Operation Performed:: \begin{align*} & b_k' \leftarrow b_k \cdot 2^{-b\_shr} \\ & Re\{a\} \leftarrow \sum_{k=0}^{length-1} \left( Re\{b_k'\} \right) \\ & Im\{a\} \leftarrow \sum_{k=0}^{length-1} \left( Im\{b_k'\} \right) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then \(a\) is the complex 64-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr\).

The function xs3_vect_complex_s32_sum_prepare() can be used to obtain values for \(a\_exp\) and \(b\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Additional Details

Internally the sum accumulates into four separate complex 40-bit accumulators. These accumulators apply symmetric 40-bit saturation logic (with bounds \(\pm 2^{39}-1\)) with each added element. At the end, the 4 accumulators are summed together into the 64-bit fields of a. No saturation logic is applied at this final step.

In the most extreme case, each \(b_k\) may be \(-2^{31}\). \(256\) of these added into the same accumulator is \(-2^{39}\) which would saturate to \(-2^{39}+1\), introducing 1 LSb of error (which may or may not be acceptable given a particular circumstance). The final result for each part then may be as large as \(4\cdot(-2^{39}+1) = -2^{41}+4 \), each fitting into a 42-bit signed integer.

Parameters

[out]	a	Complex sum \(a\)
[in]	b	Complex input vector \(\bar b\).
[in]	length	Number of elements in vector \(\bar b\).
[in]	b_shr	Right-shift appled to \(\bar b\).

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s32_sum_prepare

◆ xs3_vect_complex_s32_tail_reverse()

void xs3_vect_complex_s32_tail_reverse	(	complex_s32_t	x[],
		const unsigned	length
	)

Reverses the order of the tail of a complex 32-bit vector.

Reverses the order of elements in the tail of the complex 32-bit vector \(\bar x\). The tail of \(\bar x\), in this context, is all elements of \(\bar x\) except for \(x_0\). In other words, the first element \(x_0\) remains where it is, and the remaining \(length-1\) elements are rearranged to have their order reversed.

This function is used when performing a forward or inverse FFT on a single sequence of real values (i.e. the mono FFT), and operates in-place on x[].

Parameter Details

x[] represents the complex 32-bit vector \(\bar x\), which is both an input to and an output of this function. x[] must begin at a word-aligned address.

length is the number of elements in \(\bar x\).

Operation Performed:: \begin{align*} & x_0 \leftarrow x_0 \\ & x_k \leftarrow x_{length - k} \\ & \qquad\text{ for }k\in 1\ ...\ (length-1) \end{align*}

Parameters

[in,out]	x	Complex vector to have its tail reversed.
[in]	length	Number of elements in \(\bar x\)

Exceptions

ET_LOAD_STORE Raised if x is not word-aligned (See Note: Vector Alignment)

See also: bfp_fft_forward_mono, bfp_fft_inverse_mono

◆ xs3_vect_s32_abs()

headroom_t xs3_vect_s32_abs	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length
	)

Compute the element-wise absolute value of a 32-bit vector.

a[] and b[] represent the 32-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

Operation Performed:: \begin{align*} & a_k \leftarrow sat_{32}(\left| b_k \right|) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_abs_sum()

int64_t xs3_vect_s32_abs_sum	(	const int32_t	b[],
		const unsigned	length
	)

Compute the sum of the absolute values of elements of a 32-bit vector.

b[] represents the 32-bit mantissa vector \(\bar b\). b[] must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} \sum_{k=0}^{length-1} sat_{32}(\left| b_k \right|) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the 64-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Additional Details

Internally the sum accumulates into 8 separate 40-bit accumulators. These accumulators apply symmetric 40-bit saturation logic (with bounds \(\pm (2^{39}-1)\)) with each added element. At the end, the 8 accumulators are summed together into the 64-bit value \(a\) which is returned by this function. No saturation logic is applied at this final step.

Because symmetric 32-bit saturation logic is applied when computing the absolute value, in the corner case where each element is INT32_MIN, each of the 8 accumulators can accumulate \(256\) elements before saturation is possible. Therefore, with \(b\_hr\) bits of headroom, no saturation of intermediate results is possible with fewer than \(2^{11 + b\_hr}\) elements in \(\bar b\).

If the length of \(\bar b\) is greater than \(2^{11 + b\_hr}\), the sum can be computed piece-wise in several calls to this function, with the partial results summed in user code.

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)

Returns: The 64-bit sum \(a\)

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_add()

headroom_t xs3_vect_s32_add	(	int32_t	a[],
		const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Add together two 32-bit vectors.

a[], b[] and c[] represent the 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' = sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' = sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow sat_{32}\!\left( b_k' + c_k' \right) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_s32_add_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s32_add_prepare()

◆ xs3_vect_s32_add_scalar()

headroom_t xs3_vect_s32_add_scalar	(	int32_t	a[],
		const int32_t	b[],
		const int32_t	c,
		const unsigned	length,
		const right_shift_t	b_shr
	)

Add a scalar to a 32-bit vector.

a[], b[] represent the 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

c is the scalar \(c\) to be added to each element of \(\bar b\).

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & b_k' = sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a_k \leftarrow sat_{32}\!\left( b_k' + c \right) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If elements of \(\bar b\) are the mantissas of BFP vector \( \bar{b} \cdot 2^{b\_exp} \), and \(c\) is the mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_s32_add_scalar_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Note that \(c\_shr\) is an output of xs3_vect_s32_add_scalar_prepare(), but is not a parameter to this function. The \(c\_shr\) produced by xs3_vect_s32_add_scalar_prepare() is to be applied by the user, and the result passed as input c.

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input scalar \(c\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s32_add_scalar_prepare()

◆ xs3_vect_s32_argmin()

unsigned xs3_vect_s32_argmin	(	const int32_t	b[],
		const unsigned	length
	)

Obtain the array index of the minimum element of a 32-bit vector.

b[] represents the 32-bit input vector \(\bar b\). It must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} & a \leftarrow argmin_k\{ b_k \} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elemetns in \(\bar b\)

Returns: \(a\), the index of the minimum element of vector \(\bar b\). If there is a tie for the minimum value, the lowest tying index is returned.

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_clip()

headroom_t xs3_vect_s32_clip	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length,
		const int32_t	lower_bound,
		const int32_t	upper_bound,
		const right_shift_t	b_shr
	)

Clamp the elements of a 32-bit vector to a specified range.

a[] and b[] represent the 32-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

lower_bound and upper_bound are the lower and upper bounds of the clipping range respectively. These bounds are checked for each element of \(\bar b\) only after b_shr is applied.

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\) before being compared to the upper and lower bounds.

If \(\bar b\) are the mantissas for a BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the exponent \(a\_exp\) of the output BFP vector \(\bar{a} \cdot 2^{a\_exp}\) is given by \(a\_exp = b\_exp + b\_shr\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a_k \leftarrow \begin{cases} lower\_bound & b_k' \le lower\_bound \\ & upper\_bound & b_k' \ge upper\_bound \\ & b_k' & otherwise \end{cases} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	lower_bound	Lower bound of clipping range
[in]	upper_bound	Upper bound of clipping range
[in]	b_shr	Arithmetic right-shift applied to elements of \(\bar b\) prior to clipping

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_convolve_same()

headroom_t xs3_vect_s32_convolve_same	(	int32_t	y[],
		const int32_t	x[],
		const int32_t	b_q30[],
		const unsigned	x_length,
		const unsigned	b_length,
		const pad_mode_e	padding_mode
	)

Convolve a 32-bit vector with a short kernel.

32-bit input vector \(\bar x\) is convolved with a short fixed-point kernel \(\bar b\) to produce 32-bit output vector \(\bar y\). In other words, this function applies the \(K\)th-order FIR filter with coefficients given by \(\bar b\) to the input signal \(\bar x\). The convolution mode is "same" in that the input vector is effectively padded such that the input and output vectors are the same length. The padding behavior is one of those given by pad_mode_e.

The maximum filter order \(K\) supported by this function is \(7\).

y[] and x[] are the output and input vectors \(\bar y\) and \(\bar x\) respectively.

b_q30[] is the vector \(\bar b\) of filter coefficients. The coefficients of \(\bar b\) are encoded in a Q2.30 fixed-point format. The effective value of the \(i\)th coefficient is then \(b_i \cdot 2^{-30}\).

x_length is the length \(N\) of \(\bar x\) and \(\bar y\) in elements.

b_length is the length \(K\) of \(\bar b\) in elements (i.e. the number of filter taps). b_length must be one of \( \{ 1, 3, 5, 7 \} \).

padding_mode is one of the values from the pad_mode_e enumeration. The padding mode indicates the filter input values for filter taps that have extended beyond the bounds of the input vector \(\bar x\). See pad_mode_e for a list of supported padding modes and associated behaviors.

Operation Performed:: \begin{align*} & \tilde{x}_i = \begin{cases} \text{determined by padding mode} & i \lt 0 \\ \text{determined by padding mode} & i \ge N \\ x_i & otherwise \end{cases} \\ & y_k \leftarrow \sum_{l=0}^{K-1} (\tilde{x}_{(k+l-P)} \cdot b_l \cdot 2^{-30} ) \\ & \qquad\text{ for }k\in 0\ ...\ (N-2P) \\ & \qquad\text{ where }P = \lfloor K/2 \rfloor \end{align*}

Additional Details

To avoid the possibility of saturating any output elements, \(\bar b\) may be constrained such that \( \sum_{i=0}^{K-1} \left|b_i\right| \leq 2^{30} \).

Note: Unlike xs3_vect_s32_convolve_valid(), this operation cannot be performed safely in-place on x[]

Parameters

[out]	y	Output vector \(\bar y\)
[in]	x	Input vector \(\bar x\)
[in]	b_q30	Filter coefficient vector \(\bar b\)
[in]	x_length	The number of elements \(N\) in vector \(\bar x\)
[in]	b_length	The number of elements \(K\) in \(\bar b\)
[in]	padding_mode	The padding mode to be applied at signal boundaries

Exceptions

ET_LOAD_STORE Raised if x or y or b_q30 is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_convolve_valid()

headroom_t xs3_vect_s32_convolve_valid	(	int32_t	y[],
		const int32_t	x[],
		const int32_t	b_q30[],
		const unsigned	x_length,
		const unsigned	b_length
	)

Convolve a 32-bit vector with a short kernel.

32-bit input vector \(\bar x\) is convolved with a short fixed-point kernel \(\bar b\) to produce 32-bit output vector \(\bar y\). In other words, this function applies the \(K\)th-order FIR filter with coefficients given by \(\bar b\) to the input signal \(\bar x\). The convolution is "valid" in the sense that no output elements are emitted where the filter taps extend beyond the bounds of the input vector, resulting in an output vector \(\bar y\) with fewer elements.

The maximum filter order \(K\) supported by this function is \(7\).

y[] is the output vector \(\bar y\). If input \(\bar x\) has \(N\) elements, and the filter has \(K\) elements, then \(\bar y\) has \(N-2P\) elements, where \(P = \lfloor K / 2 \rfloor\).

x[] is the input vector \(\bar x\) with length \(N\).

b_q30[] is the vector \(\bar b\) of filter coefficients. The coefficients of \(\bar b\) are encoded in a Q2.30 fixed-point format. The effective value of the \(i\)th coefficient is then \(b_i \cdot 2^{-30}\).

x_length is the length \(N\) of \(\bar x\) in elements.

b_length is the length \(K\) of \(\bar b\) in elements (i.e. the number of filter taps). b_length must be one of \( \{ 1, 3, 5, 7 \} \).

Operation Performed:: \begin{align*} & y_k \leftarrow \sum_{l=0}^{K-1} (x_{(k+l)} \cdot b_l \cdot 2^{-30} ) \\ & \qquad\text{ for }k\in 0\ ...\ (N-2P) \\ & \qquad\text{ where }P = \lfloor K/2 \rfloor \end{align*}

Additional Details

To avoid the possibility of saturating any output elements, \(\bar b\) may be constrained such that \( \sum_{i=0}^{K-1} \left|b_i\right| \leq 2^{30} \).

This operation can be applied safely in-place on x[].

Parameters

[out]	y	Output vector \(\bar y\)
[in]	x	Input vector \(\bar x\)
[in]	b_q30	Filter coefficient vector \(\bar b\)
[in]	x_length	The number of elements \(N\) in vector \(\bar x\)
[in]	b_length	The number of elements \(K\) in \(\bar b\)

Exceptions

ET_LOAD_STORE Raised if x or y or b_q30 is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_copy()

headroom_t xs3_vect_s32_copy	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length
	)

Copy one 32-bit vector to another.

This function is effectively a constrained version of memcpy.

With the constraints below met, this function should be modestly faster than memcpy.

a[] is the output vector to which elements are copied.

b[] is the input vector from which elements are copied.

a and b each must begin at a word-aligned address.

length is the number of elements to be copied. length must be a multiple of 8.

Operation Performed:: \begin{align*} & a_k \leftarrow b_k \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar a\) and \(\bar b\)

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_dot()

int64_t xs3_vect_s32_dot	(	const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Compute the inner product between two 32-bit vectors.

b[] and c[] represent the 32-bit mantissa vectors \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address.

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a \leftarrow \sum_{k=0}^{length-1}\left(round( b_k' \cdot c_k' \cdot 2^{-30} ) \right) \\ & \qquad\text{where } a \text{ is returned} \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of the BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c}\cdot 2^{c\_exp}\), then result \(a\) is the 64-bit mantissa of the result \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + b\_shr + c\_shr + 30\).

If needed, the bit-depth of \(a\) can then be reduced to 32 bits to get a new result \(a' \cdot 2^{a\_exp'}\) where \(a' = a \cdot 2^{-a\_shr}\) and \(a\_exp' = a\_exp + a\_shr\).

The function xs3_vect_s32_dot_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Additional Details

The 30-bit rounding right-shift applied to each of the 64-bit products \(b_k \cdot c_k\) is a feature of the hardware and cannot be avoided. As such, if the input vectors \(\bar b\) and \(\bar c\) together have too much headroom (i.e. \(b\_hr + c\_hr\)), the sum may effectively vanish. To avoid this situation, negative values of b_shr and c_shr may be used (with the stipulation that \(b\_shr \ge -b\_hr\) and \(c\_shr \ge -c\_hr\) if saturation of \(b_k'\) and \(c_k'\) is to be avoided). The less headroom \(b_k'\) and \(c_k'\) have, the greater the precision of the final result.

Internally, each product \((b_k' \cdot c_k' \cdot 2^{-30})\) accumulates into one of eight 40-bit accumulators (which are all used simultaneously) which apply symmetric 40-bit saturation logic (with bounds \(\approx 2^{39}\)) with each value added. The saturating arithmetic employed is not associative and no indication is given if saturation occurs at an intermediate step. To avoid satuation errors, length should be no greater than \(2^{10+b\_hr+c\_hr}\), where \(b\_hr\) and \(c\_hr\) are the headroom of \(\bar b\) and \(\bar c\) respectively.

If the caller's mantissa vectors are longer than that, the full inner product can be found by calling this function multiple times for partial inner products on sub-sequences of the input vectors, and adding the results in user code.

In many situations the caller may have a priori knowledge that saturation is impossible (or very nearly so), in which case this guideline may be disregarded. However, such situations are application-specific and are well beyond the scope of this documentation, and as such are left to the user's discretion.

Parameters

[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: The inner product of vectors \(\bar b\) and \(\bar c\), scaled as indicated above.

Exceptions

ET_LOAD_STORE Raised if b or c is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_energy()

int64_t xs3_vect_s32_energy	(	const int32_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr
	)

Calculate the energy (sum of squares of elements) of a 32-bit vector.

b[] represents the 32-bit mantissa vector \(\bar b\). b[] must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a \leftarrow \sum_{k=0}^{length-1} round((b_k')^2 \cdot 2^{-30}) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of the BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then floating-point result is \(a \cdot 2^{a\_exp}\), where the 64-bit mantissa \(a\) is returned by this function, and \(a\_exp = 30 + 2 \cdot (b\_exp + b\_shr) \).

The function xs3_vect_s32_energy_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Additional Details

The 30-bit rounding right-shift applied to each element of the 64-bit products \((b_k')^2\) is a feature of the hardware and cannot be avoided. As such, if the input vector \(\bar b\) has too much headroom (i.e. \(2\cdot b\_hr\)), the sum may effectively vanish. To avoid this situation, negative values of b_shr may be used (with the stipulation that \(b\_shr \ge -b\_hr\) if satuartion of \(b_k'\) is to be avoided). The less headroom \(b_k'\) has, the greater the precision of the final result.

Internally, each product \((b_k')^2 \cdot 2^{-30}\) accumulates into one of eight 40-bit accumulators (which are all used simultaneously) which apply symmetric 40-bit saturation logic (with bounds \(\approx 2^{39}\)) with each value added. The saturating arithmetic employed is not associative and no indication is given if saturation occurs at an intermediate step. To avoid saturation errors, length should be no greater than \(2^{10+2\cdotb\_hr}\), where \(b\_hr\) is the headroom of \(\bar b\).

If the caller's mantissa vector is longer than that, the full result can be found by calling this function multiple times for partial results on sub-sequences of the input, and adding the results in user code.

In many situations the caller may have a priori knowledge that saturation is impossible (or very nearly so), in which case this guideline may be disregarded. However, such situations are application-specific and are well beyond the scope of this documentation, and as such are left to the user's discretion.

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)

Returns: 64-bit mantissa of vector \(\bar b\)'s energy

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_headroom()

headroom_t xs3_vect_s32_headroom	(	const int32_t	x[],
		const unsigned	length
	)

Calculate the headroom of a 32-bit vector.

The headroom of an N-bit integer is the number of bits that the integer's value may be left-shifted without any information being lost. Equivalently, it is one less than the number of leading sign bits.

The headroom of an int32_t array is the minimum of the headroom of each of its int32_t elements.

This function efficiently traverses the elements of a[] to determine its headroom.

x[] represents the 32-bit vector \(\bar x\). x[] must begin at a word-aligned address.

length is the number of elements in x[].

Operation Performed:: \begin{align*} min\!\{ HR_{32}\left(x_0\right), HR_{32}\left(x_1\right), ..., HR_{32}\left(x_{length-1}\right) \} \end{align*}

Parameters

[in]	x	Input vector \(\bar x\)
[in]	length	The number of elements in `x[]`

Returns: Headroom of vector \(\bar x\)

Exceptions

ET_LOAD_STORE Raised if x is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s16_headroom, xs3_vect_complex_s16_headroom, xs3_vect_complex_s32_headroom

◆ xs3_vect_s32_inverse()

headroom_t xs3_vect_s32_inverse	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length,
		const unsigned	scale
	)

Compute the inverse of elements of a 32-bit vector.

a[] and b[] represent the 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each vector must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

scale is a scaling parameter used to maximize the precision of the result.

Operation Performed:: \begin{align*} & a_k \leftarrow \lfloor\frac{2^{scale}}{b_k}\rfloor \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = scale - b\_exp\).

The function xs3_vect_s32_inverse_prepare() can be used to obtain values for \(a\_exp\) and \(scale\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	scale	Scale factor applied to dividend when computing inverse

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s32_inverse_prepare

◆ xs3_vect_s32_macc()

headroom_t xs3_vect_s32_macc	(	int32_t	acc[],
		const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

[xs3_vect_s32_mul]

Multiply one 32-bit vector element-wise by another, and add the result to an accumulator.

acc[] represents the 32-bit accumulator mantissa vector \(\bar a\). Each \(a_k\) is acc[k].

b[] and c[] represent the 32-bit input mantissa vectors \(\bar b\) and \(\bar c\), where each \(b_k\) is b[k] and each \(c_k\) is c[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr, b_shr and c_shr are the signed arithmetic right-shifts applied to input elements \(a_k\), \(b_k\) and \(c_k\).

Operation Performed:: \begin{align*} & \tilde{b}_k \leftarrow sat_{32}( b_k \cdot 2^{-b\_shr} ) \\ & \tilde{c}_k \leftarrow sat_{32}( c_k \cdot 2^{-c\_shr} ) \\ & \tilde{a}_k \leftarrow sat_{32}( a_k \cdot 2^{-acc\_shr} ) \\ & v_k \leftarrow round( sat_{32}( \tilde{b}_k \cdot \tilde{c}_k \cdot 2^{-30} ) ) \\ & a_k \leftarrow sat_{32}( \tilde{a}_k + v_k ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + bc\_sat \).

The function xs3_vect_complex_s16_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\) and \(bc\_sat\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc	Accumulator \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	b_shr	Signed arithmetic right-shift applied to elements of \(\bar b\)
[in]	c_shr	Signed arithmetic right-shift applied to elements of \(\bar c\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s32_macc_prepare

◆ xs3_vect_s32_max()

int32_t xs3_vect_s32_max	(	const int32_t	b[],
		const unsigned	length
	)

Find the maximum value in a 32-bit vector.

b[] represents the 32-bit vector \(\bar b\). It must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} max\{ x_0, x_1, ..., x_{length-1} \} \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the 32-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)

Returns: Maximum value from \(\bar b\)

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_max_elementwise()

headroom_t xs3_vect_s32_max_elementwise	(	int32_t	a[],
		const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Get the element-wise maximum of two 32-bit vectors.

a[], b[] and c[] represent the 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[], but not on c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow max(b_k', c_k') \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\).

The function xs3_vect_2vec_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Warning: For correct operation, this function requires at least 1 bit of headroom in each mantissa vector after the shifts have been applied.

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_merge_accs()

void xs3_vect_s32_merge_accs	(	int32_t	a[],
		const xs3_split_acc_s32_t	b[],
		const unsigned	length
	)

Merge a vector of split 32-bit accumulators into a vector of int32_t's.

Convert a vector of xs3_split_acc_s32_t into a vector of int32_t. This is useful when a function (e.g. xs3_mat_mul_s8_x_s8_yield_s32) outputs a vector of accumulators in the XS3 VPU's native split 32-bit format, which has the upper half of each accumulator in the first 32 bytes and the lower half in the following 32 bytes.

This function is most efficient (in terms of cycles/accumulator) when length is a multiple of

In any case, length will be rounded up such that a multiple of 16 accumulators will always be merged.

This function can safely merge accumulators in-place.

Parameters

[out]	a	Output vector of int32_t
[in]	b	Input vector of xs3_split_acc_s32_t
[in]	length	Number of accumulators to merge

Exceptions

ET_LOAD_STORE Raised if b or a is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_min()

int32_t xs3_vect_s32_min	(	const int32_t	b[],
		const unsigned	length
	)

Find the minimum value in a 32-bit vector.

b[] represents the 32-bit vector \(\bar b\). It must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} max\{ x_0, x_1, ..., x_{length-1} \} \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the 32-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)

Returns: Minimum value from \(\bar b\)

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_min_elementwise()

headroom_t xs3_vect_s32_min_elementwise	(	int32_t	a[],
		const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Get the element-wise minimum of two 32-bit vectors.

a[], b[] and c[] represent the 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[], but not on c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow min(b_k', c_k') \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\).

The function xs3_vect_2vec_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Warning: For correct operation, this function requires at least 1 bit of headroom in each mantissa vector after the shifts have been applied.

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_mul()

headroom_t xs3_vect_s32_mul	(	int32_t	a[],
		const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply one 32-bit vector element-wise by another.

a[], b[] and c[] represent the 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow sat_{32}(round(b_k' \cdot c_k' \cdot 2^{-30})) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + b\_shr + c\_shr + 30\).

The function xs3_vect_s32_mul_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment) [xs3_vect_s32_mul]

◆ xs3_vect_s32_nmacc()

headroom_t xs3_vect_s32_nmacc	(	int32_t	acc[],
		const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply one 32-bit vector element-wise by another, and subtract the result from an accumulator.

acc[] represents the 32-bit accumulator mantissa vector \(\bar a\). Each \(a_k\) is acc[k].

b[] and c[] represent the 32-bit input mantissa vectors \(\bar b\) and \(\bar c\), where each \(b_k\) is b[k] and each \(c_k\) is c[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr, b_shr and c_shr are the signed arithmetic right-shifts applied to input elements \(a_k\), \(b_k\) and \(c_k\).

Operation Performed:: \begin{align*} & \tilde{b}_k \leftarrow sat_{32}( b_k \cdot 2^{-b\_shr} ) \\ & \tilde{c}_k \leftarrow sat_{32}( c_k \cdot 2^{-c\_shr} ) \\ & \tilde{a}_k \leftarrow sat_{32}( a_k \cdot 2^{-acc\_shr} ) \\ & v_k \leftarrow round( sat_{32}( \tilde{b}_k \cdot \tilde{c}_k \cdot 2^{-30} ) ) \\ & a_k \leftarrow sat_{32}( \tilde{a}_k - v_k ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + bc\_sat \).

The function xs3_vect_complex_s16_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\) and \(bc\_sat\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc	Accumulator \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	b_shr	Signed arithmetic right-shift applied to elements of \(\bar b\)
[in]	c_shr	Signed arithmetic right-shift applied to elements of \(\bar c\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s32_nmacc_prepare

◆ xs3_vect_s32_rect()

headroom_t xs3_vect_s32_rect	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length
	)

Rectify the elements of a 32-bit vector.

a[] and b[] represent the 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

Operation Performed:: \begin{align*} & a_k \leftarrow \begin{cases} b_k & b_k \gt 0 \\ & 0 & b_k \leq 0 \end{cases} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_scale()

headroom_t xs3_vect_s32_scale	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length,
		const int32_t	c,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Multiply a 32-bit vector by a scalar.

a[] and b[]represent the 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

c is the 32-bit scalar \(c\) by which each element of \(\bar b\) is multiplied.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and to \(c\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a_k \leftarrow sat_{32}(round(c \cdot b_k' \cdot 2^{-30})) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + b\_shr + c\_shr + 30\).

The function xs3_vect_s32_scale_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	c	Scalar to be multiplied by elements of \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift applied to \(c\)

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s32_scale_prepare

◆ xs3_vect_s32_set()

void xs3_vect_s32_set	(	int32_t	a[],
		const int32_t	b,
		const unsigned	length
	)

Set all elements of a 32-bit vector to the specified value.

a[] represents the 32-bit output vector \(\bar a\). a[] must begin at a word-aligned address.

b is the new value to set each element of \(\bar a\) to.

Operation Performed:: \begin{align*} & a_k \leftarrow b \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(b\) is the mantissa of floating-point value \(b \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	New value for the elements of \(\bar a\)
[in]	length	Number of elements in \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_shl()

headroom_t xs3_vect_s32_shl	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length,
		const left_shift_t	b_shl
	)

Left-shift the elements of a 32-bit vector by a specified number of bits.

a[] and b[] represent the 32-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in vectors \(\bar a\) and \(\bar b\).

b_shl is the signed arithmetic left-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & a_k \leftarrow sat_{32}(\lfloor b_k \cdot 2^{b\_shl} \rfloor) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(\bar{a} = \bar{b} \cdot 2^{b\_shl}\) and \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shl	Arithmetic left-shift applied to elements of \(\bar b\)

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_shr()

headroom_t xs3_vect_s32_shr	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr
	)

Right-shift the elements of a 32-bit vector by a specified number of bits.

a[] and b[] represent the 32-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in vectors \(\bar a\) and \(\bar b\).

b_shr is the signed arithmetic right-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & a_k \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(\bar{a} = \bar{b} \cdot 2^{-b\_shr}\) and \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Arithmetic right-shift applied to elements of \(\bar b\)

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_split_accs()

void xs3_vect_s32_split_accs	(	xs3_split_acc_s32_t	a[],
		const int32_t	b[],
		const unsigned	length
	)

Split a vector of int32_t's into a vector of xs3_split_acc_s32_t.

Convert a vector of int32_t into a vector of xs3_split_acc_s32_t, the native format for the XS3 VPU's 32-bit accumulators. This is useful when a function (e.g. xs3_mat_mul_s8_x_s8_yield_s32) takes in a vector of accumulators in that native format.

This function is most efficient (in terms of cycles/accumulator) when length is a multiple of

In any case, length will be rounded up such that a multiple of 16 accumulators will always be merged.

This function can safely split accumulators in-place.

Parameters

[out]	a	Output vector of xs3_split_acc_s32_t
[in]	b	Input vector of int32_t
[in]	length	Number of accumulators to merge

Exceptions

ET_LOAD_STORE Raised if b or a is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_sqrt()

headroom_t xs3_vect_s32_sqrt	(	int32_t	a[],
		const int32_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const unsigned	depth
	)

Compute the square root of elements of a 32-bit vector.

a[] and b[] represent the 32-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each vector must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\).

depth is the number of most significant bits to calculate of each \(a_k\). For example, a depth value of 8 will only compute the 8 most significant byte of the result, with the remaining 3 bytes as 0. The maximum value for this parameter is XS3_VECT_SQRT_S32_MAX_DEPTH (31). The time cost of this operation is approximately proportional to the number of bits computed.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a_k \leftarrow \sqrt{ b_k' } \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \\ & \qquad\text{ where } sqrt() \text{ computes the first } depth \text{ bits of the square root.} \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = (b\_exp + b\_shr - 30)/2\).

Note that because exponents must be integers, that means \(b\_exp + b\_shr\) must be even.

The function xs3_vect_s32_sqrt_prepare() can be used to obtain values for \(a\_exp\) and \(b\_shr\) based on the input exponent \(b\_exp\) and headroom \(b\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	depth	Number of bits of each output value to compute

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_sub()

headroom_t xs3_vect_s32_sub	(	int32_t	a[],
		const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Subtract one 32-bit vector from another.

a[], b[] and c[] represent the 32-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' = sat_{32}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' = sat_{32}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow sat_{32}\!\left( b_k' - c_k' \right) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_s32_sub_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and * \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s32_sub_prepare

◆ xs3_vect_s32_sum()

int64_t xs3_vect_s32_sum	(	const int32_t	b[],
		const unsigned	length
	)

Sum the elements of a 32-bit vector.

b[] represents the 32-bit mantissa vector \(\bar b\). b[] must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} a \leftarrow \sum_{k=0}^{length-1} b_k \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the 64-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Additional Details

Internally, each element accumulates into one of eight 40-bit accumulators (which are all used simultaneously) which apply symmetric 40-bit saturation logic (with bounds \(\approx 2^{39}\)) with each value added. The saturating arithmetic employed is not associative and no indication is given if saturation occurs at an intermediate step. To avoid the possibility of saturation errors, length should be no greater than \(2^{11+b\_hr}\), where \(b\_hr\) is the headroom of \(\bar b\).

If the caller's mantissa vector is longer than that, the full result can be found by calling this function multiple times for partial results on sub-sequences of the input, and adding the results in user code.

In many situations the caller may have a priori knowledge that saturation is impossible (or very nearly so), in which case this guideline may be disregarded. However, such situations are application-specific and are well beyond the scope of this documentation, and as such are left to the user's discretion.

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vector \(\bar b\)

Returns: 64-bit mantissa of the sum, \(a\).

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_unzip()

void xs3_vect_s32_unzip	(	int32_t	a[],
		int32_t	b[],
		const complex_s32_t	c[],
		const unsigned	length
	)

Deinterleave the real and imaginary parts of a complex 32-bit vector into two separate vectors.

Complex 32-bit input vector \(\bar c\) has its real and imaginary parts (which correspond to the even and odd-indexed elements, if reinterpreted as an int32_t array) split apart to create real 32-bit output vectors \(\bar a\) and \(\bar b\), such that \(\bar{a} = Re{\bar{c}}\) and \(\bar{b} = Im{\bar{c}}\).

a[] and b[] are the real output vectors \(\bar a\) and \(\bar b\) which receive the real and imaginary parts respectively of \(\bar c\). a and b must each begin at a word-aligned address.

c[] is the complex input vector \(\bar c\). c must begin at a double word-aligned address.

length is the number \(N\) of int32_t elements in \(\bar a\) and \(\bar b\) and the number of complex_s32_t in \(\bar c\).

Operation Performed:: \begin{align*} & a_k = Re\{c_k\} \\ & b_k = Im\{c_k\} \\ & \qquad\text{ for }k\in 0\ ...\ (N-1) \end{align*}

Parameters

[out]	a	Output vector \(\bar a\)
[out]	b	Output vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	The number of elements \(N\) in vectors \(\bar a\), \(\bar b\) and \(\bar c\)

Exceptions

ET_LOAD_STORE	Raised if `a` or `b` is not word-aligned (See Note: Vector Alignment)
ET_LOAD_STORE	Raised if `c` is not double word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s32_zip()

void xs3_vect_s32_zip	(	complex_s32_t	a[],
		const int32_t	b[],
		const int32_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Interleave the elements of two vectors into a single vector.

Elements of 32-bit input vectors \(\bar b\) and \(\bar c\) are interleaved into 32-bit output vector \(\bar a\). Each element of \(\bar b\) has a right-shift of \(b\_shr\) applied, and each element of \(\bar c\) has a right-shift of \(c\_shr\) applied.

Alternatively (and equivalently), this function can be conceived of as taking two real vectors \(\bar b\) and \(\bar c\) and forming a new complex vector \(\bar a\) where \(\bar{a} = \bar{b} + i\cdot\bar{c}\).

If vectors \(\bar b\) and \(\bar c\) each have \(N\) elements, then the resulting \(\bar a\) will have either \(2N\) int32_t elements or (equivalently) \(N\) complex_s32_t elements (and must have space for such).

Each element \(b_k\) of \(\bar b\) will end up as end up as element \(a_{2k}\) of \(\bar a\) (with the bit-shift applied). Each element \(c_k\) will end up as element \(a_{2k+1}\) of \(\bar a\).

a[] is the output vector \(\bar a\).

b[] and c[] are the input vectors \(\bar b\) and \(\bar c\) respectively.

a, b and c must each begin at a double word-aligned (8 byte) address. (see DWORD_ALIGNED).

length is the number \(N\) of int32_t elements in \(\bar b\) and \(\bar c\).

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\).

c_shr is the signed arithmetic right-shift applied to elements of \(\bar c\).

Operation Performed:: \begin{align*} & Re{a_{k}} \leftarrow sat_{32}( b_k \cdot 2^{-b\_shr} \\ & Im{a_{k}} \leftarrow sat_{32}( c_k \cdot 2^{-c\_shr} \\ & \qquad\text{ for }k\in 0\ ...\ (N-1) \end{align*}

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements \(N\) in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Signed arithmetic right-shift applied to elements of \(\bar b\)
[in]	c_shr	Signed arithmetic right-shift applied to elements of \(\bar c\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not double word-aligned (See Note: Vector Alignment)

Enumerations

Functions

Detailed Description

Enumeration Type Documentation

◆ pad_mode_e

Function Documentation

◆ xs3_vect_complex_s32_add()

◆ xs3_vect_complex_s32_add_scalar()

◆ xs3_vect_complex_s32_conj_macc()

◆ xs3_vect_complex_s32_conj_mul()

◆ xs3_vect_complex_s32_conj_nmacc()

◆ xs3_vect_complex_s32_conjugate()

◆ xs3_vect_complex_s32_headroom()

◆ xs3_vect_complex_s32_macc()

◆ xs3_vect_complex_s32_mag()

◆ xs3_vect_complex_s32_mul()

◆ xs3_vect_complex_s32_nmacc()

◆ xs3_vect_complex_s32_real_mul()

◆ xs3_vect_complex_s32_real_scale()

◆ xs3_vect_complex_s32_scale()

◆ xs3_vect_complex_s32_set()

◆ xs3_vect_complex_s32_shl()

◆ xs3_vect_complex_s32_shr()

◆ xs3_vect_complex_s32_squared_mag()

◆ xs3_vect_complex_s32_sub()

◆ xs3_vect_complex_s32_sum()

◆ xs3_vect_complex_s32_tail_reverse()

◆ xs3_vect_s32_abs()

◆ xs3_vect_s32_abs_sum()

◆ xs3_vect_s32_add()

◆ xs3_vect_s32_add_scalar()

◆ xs3_vect_s32_argmin()

◆ xs3_vect_s32_clip()

◆ xs3_vect_s32_convolve_same()

◆ xs3_vect_s32_convolve_valid()

◆ xs3_vect_s32_copy()

◆ xs3_vect_s32_dot()

◆ xs3_vect_s32_energy()

◆ xs3_vect_s32_headroom()

◆ xs3_vect_s32_inverse()

◆ xs3_vect_s32_macc()

◆ xs3_vect_s32_max()

◆ xs3_vect_s32_max_elementwise()

◆ xs3_vect_s32_merge_accs()

◆ xs3_vect_s32_min()

◆ xs3_vect_s32_min_elementwise()

◆ xs3_vect_s32_mul()

◆ xs3_vect_s32_nmacc()

◆ xs3_vect_s32_rect()

◆ xs3_vect_s32_scale()

◆ xs3_vect_s32_set()

◆ xs3_vect_s32_shl()

◆ xs3_vect_s32_shr()

◆ xs3_vect_s32_split_accs()

◆ xs3_vect_s32_sqrt()

◆ xs3_vect_s32_sub()

◆ xs3_vect_s32_sum()

◆ xs3_vect_s32_unzip()

◆ xs3_vect_s32_zip()