Functions
headroom_t	xs3_vect_complex_s16_add (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const int16_t c_imag[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Add one complex 16-bit vector to another. More...

headroom_t	xs3_vect_complex_s16_add_scalar (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const complex_s16_t c, const unsigned length, const right_shift_t b_shr)
	Add a scalar to a complex 16-bit vector. More...

headroom_t	xs3_vect_complex_s16_conj_mul (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const int16_t c_imag[], const unsigned length, const right_shift_t a_shr)
	Multiply one complex 16-bit vector element-wise by the complex conjugate of another. More...

headroom_t	xs3_vect_complex_s16_headroom (const int16_t b_real[], const int16_t b_imag[], const unsigned length)
	Calculate the headroom of a complex 16-bit array. More...

headroom_t	xs3_vect_complex_s16_mag (int16_t a[], const int16_t b_real[], const int16_t b_imag[], const unsigned length, const right_shift_t b_shr, const int16_t *rot_table, const unsigned table_rows)
	Compute the magnitude of each element of a complex 16-bit vector. More...

headroom_t	xs3_vect_complex_s16_macc (int16_t acc_real[], int16_t acc_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const int16_t c_imag[], const unsigned length, const right_shift_t acc_shr, const right_shift_t bc_sat)
	Multiply one complex 16-bit vector element-wise by another, and add the result to an accumulator. More...

headroom_t	xs3_vect_complex_s16_nmacc (int16_t acc_real[], int16_t acc_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const int16_t c_imag[], const unsigned length, const right_shift_t acc_shr, const right_shift_t bc_sat)
	Multiply one complex 16-bit vector element-wise by another, and subtract the result from an accumulator. More...

headroom_t	xs3_vect_complex_s16_conj_macc (int16_t acc_real[], int16_t acc_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const int16_t c_imag[], const unsigned length, const right_shift_t acc_shr, const right_shift_t bc_sat)
	Multiply one complex 16-bit vector element-wise by the complex conjugate of another, and add the result to an accumulator. More...

headroom_t	xs3_vect_complex_s16_conj_nmacc (int16_t acc_real[], int16_t acc_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const int16_t c_imag[], const unsigned length, const right_shift_t acc_shr, const right_shift_t bc_sat)
	Multiply one complex 16-bit vector element-wise by the complex conjugate of another, and subtract the result from an accumulator. More...

headroom_t	xs3_vect_complex_s16_mul (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const int16_t c_imag[], const unsigned length, const right_shift_t a_shr)
	Multiply one complex 16-bit vector element-wise by another. More...

headroom_t	xs3_vect_complex_s16_real_mul (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const unsigned length, const right_shift_t a_shr)
	Multiply a complex 16-bit vector element-wise by a real 16-bit vector. More...

headroom_t	xs3_vect_complex_s16_real_scale (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c, const unsigned length, const right_shift_t a_shr)
	Multiply a complex 16-bit vector by a real scalar. More...

headroom_t	xs3_vect_complex_s16_scale (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real, const int16_t c_imag, const unsigned length, const right_shift_t a_shr)
	Multiply a complex 16-bit vector by a complex 16-bit scalar. More...

void	xs3_vect_complex_s16_set (int16_t a_real[], int16_t a_imag[], const int16_t b_real, const int16_t b_imag, const unsigned length)
	Set each element of a complex 16-bit vector to a specified value. More...

headroom_t	xs3_vect_complex_s16_shl (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const unsigned length, const left_shift_t b_shl)
	Left-shift each element of a complex 16-bit vector by a specified number of bits. More...

headroom_t	xs3_vect_complex_s16_shr (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const unsigned length, const right_shift_t b_shr)
	Right-shift each element of a complex 16-bit vector by a specified number of bits. More...

headroom_t	xs3_vect_complex_s16_squared_mag (int16_t a[], const int16_t b_real[], const int16_t b_imag[], const unsigned length, const right_shift_t a_shr)
	Get the squared magnitudes of elements of a complex 16-bit vector. More...

headroom_t	xs3_vect_complex_s16_sub (int16_t a_real[], int16_t a_imag[], const int16_t b_real[], const int16_t b_imag[], const int16_t c_real[], const int16_t c_imag[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Subtract one complex 16-bit vector from another. More...

complex_s32_t	xs3_vect_complex_s16_sum (const int16_t b_real[], const int16_t b_imag[], const unsigned length)
	Get the sum of elements of a complex 16-bit vector. More...

headroom_t	xs3_vect_s16_abs (int16_t a[], const int16_t b[], const unsigned length)
	Compute the element-wise absolute value of a 16-bit vector. More...

int32_t	xs3_vect_s16_abs_sum (const int16_t b[], const unsigned length)
	Compute the sum of the absolute values of elements of a 16-bit vector. More...

headroom_t	xs3_vect_s16_add (int16_t a[], const int16_t b[], const int16_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Add one 16-bit BFP vector to another. More...

headroom_t	xs3_vect_s16_add_scalar (int16_t a[], const int16_t b[], const int16_t c, const unsigned length, const right_shift_t b_shr)
	Add a scalar to a 16-bit vector. More...

unsigned	xs3_vect_s16_argmax (const int16_t b[], const unsigned length)
	Obtain the array index of the maximum element of a 16-bit vector. More...

unsigned	xs3_vect_s16_argmin (const int16_t b[], const unsigned length)
	Obtain the array index of the minimum element of a 16-bit vector. More...

headroom_t	xs3_vect_s16_clip (int16_t a[], const int16_t b[], const unsigned length, const int16_t lower_bound, const int16_t upper_bound, const right_shift_t b_shr)
	Clamp the elements of a 16-bit vector to a specified range. More...

int64_t	xs3_vect_s16_dot (const int16_t b[], const int16_t c[], const unsigned length)
	Compute the inner product of two 16-bit vectors. More...

int32_t	xs3_vect_s16_energy (const int16_t b[], const unsigned length, const right_shift_t b_shr)
	Calculate the energy (sum of squares of elements) of a 16-bit vector. More...

headroom_t	xs3_vect_s16_headroom (const int16_t b[], const unsigned length)
	Calculate the headroom of a 16-bit vector. More...

void	xs3_vect_s16_inverse (int16_t a[], const int16_t b[], const unsigned length, const unsigned scale)
	Compute the inverse of elements of a 16-bit vector. More...

int16_t	xs3_vect_s16_max (const int16_t b[], const unsigned length)
	Find the maximum value in a 16-bit vector. More...

headroom_t	xs3_vect_s16_max_elementwise (int16_t a[], const int16_t b[], const int16_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Get the element-wise maximum of two 16-bit vectors. More...

int16_t	xs3_vect_s16_min (const int16_t b[], const unsigned length)
	Find the minimum value in a 16-bit vector. More...

headroom_t	xs3_vect_s16_min_elementwise (int16_t a[], const int16_t b[], const int16_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Get the element-wise minimum of two 16-bit vectors. More...

headroom_t	xs3_vect_s16_macc (int16_t acc[], const int16_t b[], const int16_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t bc_sat)
	Multiply one 16-bit vector element-wise by another, and add the result to an accumulator. More...

headroom_t	xs3_vect_s16_nmacc (int16_t acc[], const int16_t b[], const int16_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t bc_sat)
	Multiply one 16-bit vector element-wise by another, and subtract the result from an accumulator. More...

headroom_t	xs3_vect_s16_mul (int16_t a[], const int16_t b[], const int16_t c[], const unsigned length, const right_shift_t a_shr)
	Multiply two 16-bit vectors together element-wise. More...

headroom_t	xs3_vect_s16_rect (int16_t a[], const int16_t b[], const unsigned length)
	Rectify the elements of a 16-bit vector. More...

headroom_t	xs3_vect_s16_scale (int16_t a[], const int16_t b[], const unsigned length, const int16_t c, const right_shift_t a_shr)
	Multiply a 16-bit vector by a 16-bit scalar. More...

void	xs3_vect_s16_set (int16_t a[], const int16_t b, const unsigned length)
	Set all elements of a 16-bit vector to the specified value. More...

headroom_t	xs3_vect_s16_shl (int16_t a[], const int16_t b[], const unsigned length, const left_shift_t b_shl)
	Left-shift the elements of a 16-bit vector by a specified number of bits. More...

headroom_t	xs3_vect_s16_shr (int16_t a[], const int16_t b[], const unsigned length, const right_shift_t b_shr)
	Right-shift the elements of a 16-bit vector by a specified number of bits. More...

headroom_t	xs3_vect_s16_sqrt (int16_t a[], const int16_t b[], const unsigned length, const right_shift_t b_shr, const unsigned depth)
	Compute the square roots of elements of a 16-bit vector. More...

headroom_t	xs3_vect_s16_sub (int16_t a[], const int16_t b[], const int16_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
	Subtract one 16-bit BFP vector from another. More...

int32_t	xs3_vect_s16_sum (const int16_t b[], const unsigned length)
	Get the sum of elements of a 16-bit vector. More...

Detailed Description

Function Documentation

◆ xs3_vect_complex_s16_add()

headroom_t xs3_vect_complex_s16_add	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const int16_t	c_imag[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Add one complex 16-bit vector to another.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] and c_imag[] together represent the complex 16-bit input mantissa vector \(\bar c\). Each \(Re\{c_k\}\) is c_real[k], and each \(Im\{c_k\}\) is c_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[], b_imag[], c_real[] and c_imag[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{16}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow Re\{b_k'\} + Re\{c_k'\} \\ & Im\{a_k\} \leftarrow Im\{b_k'\} + Im\{c_k'\} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the complex 16-bit mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the complex 16-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_complex_s16_add_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	c_imag	Imaginary part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift applied to \(\bar b\)
[in]	c_shr	Right-shift applied to \(\bar c\)

Returns: Headroom of output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_add_prepare

◆ xs3_vect_complex_s16_add_scalar()

headroom_t xs3_vect_complex_s16_add_scalar	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const complex_s16_t	c,
		const unsigned	length,
		const right_shift_t	b_shr
	)

Add a scalar to a complex 16-bit vector.

a[] and b[]represent the complex 16-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

c is the complex scalar \(c\)to be added to each element of \(\bar b\).

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow Re\{b_k'\} + Re\{c\} \\ & Im\{a_k\} \leftarrow Im\{b_k'\} + Im\{c\} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If elements of \(\bar b\) are the complex mantissas of BFP vector \( \bar{b} \cdot 2^{b\_exp}\), and \(c\) is the mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_complex_s16_add_scalar_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Note that \(c\_shr\) is an output of xs3_vect_complex_s16_add_scalar_prepare(), but is not a parameter to this function. The \(c\_shr\) produced by xs3_vect_complex_s16_add_scalar_prepare() is to be applied by the user, and the result passed as input c.

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c	Complex input scalar \(c\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift applied to \(\bar b\)

Returns: Headroom of output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_add_scalar_prepare

◆ xs3_vect_complex_s16_conj_macc()

headroom_t xs3_vect_complex_s16_conj_macc	(	int16_t	acc_real[],
		int16_t	acc_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const int16_t	c_imag[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	bc_sat
	)

Multiply one complex 16-bit vector element-wise by the complex conjugate of another, and add the result to an accumulator.

acc_real[] and acc_imag[] together represent the complex 16-bit accumulator mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is acc_real[k], and each \(Im\{a_k\}\) is acc_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] and c_imag[] together represent the complex 16-bit input mantissa vector \(\bar c\). Each \(Re\{c_k\}\) is c_real[k], and each \(Im\{c_k\}\) is c_imag[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr is the signed arithmetic right-shift applied to the accumulators \(a_k\).

bc_sat is the unsigned arithmetic right-shift applied to the product of \(b_k\) and \(c_k^*\) before being added to the accumulator.

Operation Performed:: \begin{align*} & v_k \leftarrow Re\{b_k\} \cdot Re\{c_k\} + Im\{b_k\} \cdot Im\{c_k\} \\ & s_k \leftarrow Im\{b_k\} \cdot Re\{c_k\} - Re\{b_k\} \cdot Im\{c_k\} \\ & \hat{a}_k \leftarrow sat_{16}( a_k \cdot 2^{-acc\_shr} ) \\ & Re\{a_k\} \leftarrow sat_{16}( Re\{\hat{a}_k\} + round( sat_{16}( v_k \cdot 2^{-bc\_sat} ) ) ) \\ & Im\{a_k\} \leftarrow sat_{16}( Im\{\hat{a}_k\} + round( sat_{16}( s_k \cdot 2^{-bc\_sat} ) ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + bc\_sat \).

The function xs3_vect_complex_s16_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\) and \(bc\_sat\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc_real	Real part of complex accumulator \(\bar a\)
[in,out]	acc_imag	Imaginary aprt of complex accumulator \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	c_imag	Imaginary part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	bc_sat	Unsigned arithmetic right-shift applied to the products of elements \(b_k\) and \(c_k^*\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc_real, acc_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_conj_macc_prepare

◆ xs3_vect_complex_s16_conj_mul()

headroom_t xs3_vect_complex_s16_conj_mul	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const int16_t	c_imag[],
		const unsigned	length,
		const right_shift_t	a_shr
	)

Multiply one complex 16-bit vector element-wise by the complex conjugate of another.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] and c_imag[] together represent the complex 16-bit input mantissa vector \(\bar c\). Each \(Re\{c_k\}\) is c_real[k], and each \(Im\{c_k\}\) is c_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[], b_imag[], c_real[] and c_imag[].

length is the number of elements in each of the vectors.

a_shr is the unsigned arithmetic right-shift applied to the 32-bit accumulators holding the penultimate results.

Operation Performed:: \begin{align*} & v_k = \leftarrow Re\{b_k\} \cdot Re\{c_k\} + Im\{b_k\} \cdot Im\{c_k\} \\ & s_k = \leftarrow Im\{b_k\} \cdot Re\{c_k\} - Re\{b_k\} \cdot Im\{c_k\} \\ & Re\{a_k\} \leftarrow round( sat_{16}( v_k \cdot 2^{-a\_shr} ) ) \\ & Im\{a_k\} \leftarrow round( sat_{16}( s_k \cdot 2^{-a\_shr} ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \(\bar{b} \cdot 2^{b\_exp}\) and \(c\) is the complex 16-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + a\_shr\).

The function xs3_vect_complex_s16_mul_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	c_imag	Imaginary part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	a_shr	Right-shift applied to 32-bit intermediate results.

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_mul_prepare

◆ xs3_vect_complex_s16_conj_nmacc()

headroom_t xs3_vect_complex_s16_conj_nmacc	(	int16_t	acc_real[],
		int16_t	acc_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const int16_t	c_imag[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	bc_sat
	)

Multiply one complex 16-bit vector element-wise by the complex conjugate of another, and subtract the result from an accumulator.

acc_real[] and acc_imag[] together represent the complex 16-bit accumulator mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is acc_real[k], and each \(Im\{a_k\}\) is acc_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] and c_imag[] together represent the complex 16-bit input mantissa vector \(\bar c\). Each \(Re\{c_k\}\) is c_real[k], and each \(Im\{c_k\}\) is c_imag[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr is the signed arithmetic right-shift applied to the accumulators \(a_k\).

bc_sat is the unsigned arithmetic right-shift applied to the product of \(b_k\) and \(c_k^*\) before being subtracted from the accumulator.

Operation Performed:: \begin{align*} & v_k \leftarrow Re\{b_k\} \cdot Re\{c_k\} + Im\{b_k\} \cdot Im\{c_k\} \\ & s_k \leftarrow Im\{b_k\} \cdot Re\{c_k\} - Re\{b_k\} \cdot Im\{c_k\} \\ & \hat{a}_k \leftarrow sat_{16}( a_k \cdot 2^{-acc\_shr} ) \\ & Re\{a_k\} \leftarrow sat_{16}( Re\{\hat{a}_k\} - round( sat_{16}( v_k \cdot 2^{-bc\_sat} ) ) ) \\ & Im\{a_k\} \leftarrow sat_{16}( Im\{\hat{a}_k\} - round( sat_{16}( s_k \cdot 2^{-bc\_sat} ) ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + bc\_sat \).

The function xs3_vect_complex_s16_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\) and \(bc\_sat\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc_real	Real part of complex accumulator \(\bar a\)
[in,out]	acc_imag	Imaginary aprt of complex accumulator \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	c_imag	Imaginary part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	bc_sat	Unsigned arithmetic right-shift applied to the products of elements \(b_k\) and \(c_k^*\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc_real, acc_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_conj_nmacc_prepare

◆ xs3_vect_complex_s16_headroom()

headroom_t xs3_vect_complex_s16_headroom	(	const int16_t	b_real[],
		const int16_t	b_imag[],
		const unsigned	length
	)

Calculate the headroom of a complex 16-bit array.

The headroom of an N-bit integer is the number of bits that the integer's value may be left-shifted without any information being lost. Equivalently, it is one less than the number of leading sign bits.

The headroom of a complex_s16_t struct is the minimum of the headroom of each of its 16-bit fields, re and im.

The headroom of a complex_s16_t array is the minimum of the headroom of each of its complex_s16_t elements.

This function efficiently traverses the elements of \(\bar x\) to determine its headroom.

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\).

length is the number of elements in b_real[] and b_imag[].

Operation Performed:: \begin{align*} min\!\{ HR_{16}\left(x_0\right), HR_{16}\left(x_1\right), ..., HR_{16}\left(x_{length-1}\right) \} \end{align*}

Parameters

[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	length	Number of elements in \(\bar x\)

Returns: Headroom of vector \(\bar x\)

See also: xs3_vect_s16_headroom, xs3_vect_s32_headroom, xs3_vect_complex_s32_headroom

◆ xs3_vect_complex_s16_macc()

headroom_t xs3_vect_complex_s16_macc	(	int16_t	acc_real[],
		int16_t	acc_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const int16_t	c_imag[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	bc_sat
	)

Multiply one complex 16-bit vector element-wise by another, and add the result to an accumulator.

acc_real[] and acc_imag[] together represent the complex 16-bit accumulator mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is acc_real[k], and each \(Im\{a_k\}\) is acc_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] and c_imag[] together represent the complex 16-bit input mantissa vector \(\bar c\). Each \(Re\{c_k\}\) is c_real[k], and each \(Im\{c_k\}\) is c_imag[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr is the signed arithmetic right-shift applied to the accumulators \(a_k\).

bc_sat is the unsigned arithmetic right-shift applied to the product of \(b_k\) and \(c_k\) before being added to the accumulator.

Operation Performed:: \begin{align*} & v_k \leftarrow Re\{b_k\} \cdot Re\{c_k\} - Im\{b_k\} \cdot Im\{c_k\} \\ & s_k \leftarrow Im\{b_k\} \cdot Re\{c_k\} + Re\{b_k\} \cdot Im\{c_k\} \\ & \hat{a}_k \leftarrow sat_{16}( a_k \cdot 2^{-acc\_shr} ) \\ & Re\{a_k\} \leftarrow sat_{16}( Re\{\hat{a}_k\} + round( sat_{16}( v_k \cdot 2^{-bc\_sat} ) ) ) \\ & Im\{a_k\} \leftarrow sat_{16}( Im\{\hat{a}_k\} + round( sat_{16}( s_k \cdot 2^{-bc\_sat} ) ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + bc\_sat \).

The function xs3_vect_complex_s16_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\) and \(bc\_sat\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc_real	Real part of complex accumulator \(\bar a\)
[in,out]	acc_imag	Imaginary aprt of complex accumulator \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	c_imag	Imaginary part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	bc_sat	Unsigned arithmetic right-shift applied to the products of elements \(b_k\) and \(c_k\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc_real, acc_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_macc_prepare

◆ xs3_vect_complex_s16_mag()

headroom_t xs3_vect_complex_s16_mag	(	int16_t	a[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const int16_t *	rot_table,
		const unsigned	table_rows
	)

Compute the magnitude of each element of a complex 16-bit vector.

a[] represents the real 16-bit output mantissa vector \(\bar a\).

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[] or b_imag[].

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\).

rot_table must point to a pre-computed table of complex vectors used in calculating the magnitudes. table_rows is the number of rows in the table. This library is distributed with a default version of the required rotation table. The following symbols can be used to refer to it in user code:

const extern unsigned rot_table16_rows;

const extern complex_s16_t rot_table16[30][4];

complex_s16_t

A complex number with a 16-bit real part and 16-bit imaginary part.

Definition: xs3_math_types.h:60

Faster computation (with reduced precision) can be achieved by generating a smaller version of the table. A python script is provided to generate this table.

Todo:: Point to documentation page on generating this table.

Operation Performed:: \begin{align*} & v_k \leftarrow b_k \cdot 2^{-b\_shr} \\ & a_k \leftarrow \sqrt { {\left( Re\{v_k\} \right)}^2 + {\left( Im\{v_k\} \right)}^2 } & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the real 16-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr\).

The function xs3_vect_complex_s16_mag_prepare() can be used to obtain values for \(a\_exp\) and \(b\_shr\) based on the input exponent \(b\_exp\) and headroom \(b\_hr\).

Parameters

[out]	a	Real output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imag part of complex input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	rot_table	Pre-computed rotation table required for calculating magnitudes
[in]	table_rows	Number of rows in `rot_table`

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a, b_real or b_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_mag_prepare

◆ xs3_vect_complex_s16_mul()

headroom_t xs3_vect_complex_s16_mul	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const int16_t	c_imag[],
		const unsigned	length,
		const right_shift_t	a_shr
	)

Multiply one complex 16-bit vector element-wise by another.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] and c_imag[] together represent the complex 16-bit input mantissa vector \(\bar c\). Each \(Re\{c_k\}\) is c_real[k], and each \(Im\{c_k\}\) is c_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[], b_imag[], c_real[] and c_imag[].

length is the number of elements in each of the vectors.

a_shr is the unsigned arithmetic right-shift applied to the 32-bit accumulators holding intermediate results.

Operation Performed:: \begin{align*} & v_k = \leftarrow Re\{b_k\} \cdot Re\{c_k\} - Im\{b_k\} \cdot Im\{c_k\} \\ & s_k = \leftarrow Im\{b_k\} \cdot Re\{c_k\} + Re\{b_k\} \cdot Im\{c_k\} \\ & Re\{a_k\} \leftarrow round( sat_{16}( v_k \cdot 2^{-a\_shr} ) ) \\ & Im\{a_k\} \leftarrow round( sat_{16}( s_k \cdot 2^{-a\_shr} ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \(\bar{b} \cdot 2^{b\_exp}\) and \(c\) is the complex 16-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + a\_shr\).

The function xs3_vect_complex_s16_mul_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	c_imag	Imaginary part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	a_shr	Right-shift applied to 32-bit intermediate results.

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_mul_prepare

◆ xs3_vect_complex_s16_nmacc()

headroom_t xs3_vect_complex_s16_nmacc	(	int16_t	acc_real[],
		int16_t	acc_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const int16_t	c_imag[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	bc_sat
	)

Multiply one complex 16-bit vector element-wise by another, and subtract the result from an accumulator.

acc_real[] and acc_imag[] together represent the complex 16-bit accumulator mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is acc_real[k], and each \(Im\{a_k\}\) is acc_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] and c_imag[] together represent the complex 16-bit input mantissa vector \(\bar c\). Each \(Re\{c_k\}\) is c_real[k], and each \(Im\{c_k\}\) is c_imag[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr is the signed arithmetic right-shift applied to the accumulators \(a_k\).

bc_sat is the unsigned arithmetic right-shift applied to the product of \(b_k\) and \(c_k\) before being subtracted from the accumulator.

Operation Performed:: \begin{align*} & v_k \leftarrow Re\{b_k\} \cdot Re\{c_k\} - Im\{b_k\} \cdot Im\{c_k\} \\ & s_k \leftarrow Im\{b_k\} \cdot Re\{c_k\} + Re\{b_k\} \cdot Im\{c_k\} \\ & \hat{a}_k \leftarrow sat_{16}( a_k \cdot 2^{-acc\_shr} ) \\ & Re\{a_k\} \leftarrow sat_{16}( Re\{\hat{a}_k\} - round( sat_{16}( v_k \cdot 2^{-bc\_sat} ) ) ) \\ & Im\{a_k\} \leftarrow sat_{16}( Im\{\hat{a}_k\} - round( sat_{16}( s_k \cdot 2^{-bc\_sat} ) ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + bc\_sat \).

The function xs3_vect_complex_s16_nmacc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\) and \(bc\_sat\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc_real	Real part of complex accumulator \(\bar a\)
[in,out]	acc_imag	Imaginary aprt of complex accumulator \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	c_imag	Imaginary part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	bc_sat	Unsigned arithmetic right-shift applied to the products of elements \(b_k\) and \(c_k\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc_real, acc_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_nmacc_prepare

◆ xs3_vect_complex_s16_real_mul()

headroom_t xs3_vect_complex_s16_real_mul	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const unsigned	length,
		const right_shift_t	a_shr
	)

Multiply a complex 16-bit vector element-wise by a real 16-bit vector.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] represents the real 16-bit input mantissa vector \(\bar c\).

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[], b_imag[] and c_real[].

length is the number of elements in each of the vectors.

a_shr is the unsigned arithmetic right-shift applied to the 32-bit accumulators holding the penultimate results.

Operation Performed:: \begin{align*} & v_k = \leftarrow Re\{b_k\} \cdot c_k \\ & s_k = \leftarrow Im\{b_k\} \cdot c_k \\ & Re\{a_k\} \leftarrow round( sat_{16}( v_k \cdot 2^{-a\_shr} ) ) \\ & Im\{a_k\} \leftarrow round( sat_{16}( s_k \cdot 2^{-a\_shr} ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the complex 16-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + a\_shr\).

The function xs3_vect_s16_real_mul_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	a_shr	Right-shift applied to 32-bit intermediate results.

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real, b_imag or c_real is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_real_mul_prepare

◆ xs3_vect_complex_s16_real_scale()

headroom_t xs3_vect_complex_s16_real_scale	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c,
		const unsigned	length,
		const right_shift_t	a_shr
	)

Multiply a complex 16-bit vector by a real scalar.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[] and b_imag[].

c is the real 16-bit input mantissa \(c\).

length is the number of elements in each of the vectors.

a_shr is an unsigned arithmetic right-shift applied to the 32-bit accumulators holding the penultimate results.

Operation Performed:: \begin{align*} & v_k = \leftarrow Re\{b_k\} \cdot c \\ & s_k = \leftarrow Im\{b_k\} \cdot c \\ & Re\{a_k\} \leftarrow round( sat_{16}( v_k \cdot 2^{-a\_shr} ) ) \\ & Im\{a_k\} \leftarrow round( sat_{16}( s_k \cdot 2^{-a\_shr} ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the complex 16-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + a\_shr\).

The function xs3_vect_complex_s16_real_scale_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c	Real input scalar \(c\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	a_shr	Right-shift applied to 32-bit intermediate results.

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real, b_imag or c is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s16_scale()

headroom_t xs3_vect_complex_s16_scale	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real,
		const int16_t	c_imag,
		const unsigned	length,
		const right_shift_t	a_shr
	)

Multiply a complex 16-bit vector by a complex 16-bit scalar.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[] and b_imag[].

c_real and c_imag are the real and imaginary parts of the complex 16-bit input mantissa \(c\).

length is the number of elements in each of the vectors.

a_shr is the unsigned arithmetic right-shift applied to the 32-bit accumulators holding the penultimate results.

Operation Performed:: \begin{align*} & v_k = \leftarrow Re\{b_k\} \cdot Re\{c\} - Im\{b_k\} \cdot Im\{c\} \\ & s_k = \leftarrow Im\{b_k\} \cdot Re\{c\} + Re\{b_k\} \cdot Im\{c\} \\ & Re\{a_k\} \leftarrow round( sat_{16}( v_k \cdot 2^{-a\_shr} ) ) \\ & Im\{a_k\} \leftarrow round( sat_{16}( s_k \cdot 2^{-a\_shr} ) ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the complex 16-bit mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + a\_shr\).

The function xs3_vect_complex_s16_scale_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input scalar \(c\)
[in]	c_imag	Imaginary part of complex input scalar \(c\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	a_shr	Right-shift applied to 32-bit intermediate results

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s16_set()

void xs3_vect_complex_s16_set	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real,
		const int16_t	b_imag,
		const unsigned	length
	)

Set each element of a complex 16-bit vector to a specified value.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k]. Each must begin at a word-aligned address.

b_real and b_imag are the real and imaginary parts of the complex 16-bit input mantissa \(b\). Each a_real[k] will be set to b_real. Each a_imag[k] will be set to b_imag.

length is the number of elements in a_real[] and a_imag[].

Operation Performed:: \begin{align*} & Re\{a_k\} \leftarrow Re\{b\} \\ & Im\{a_k\} \leftarrow Im\{b\} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(b\) is the mantissa of floating-point value \(b \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input scalar \(b\)
[in]	b_imag	Imaginary part of complex input scalar \(b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)

Exceptions

ET_LOAD_STORE Raised if a_real or a_imag is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s16_shl()

headroom_t xs3_vect_complex_s16_shl	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const unsigned	length,
		const left_shift_t	b_shl
	)

Left-shift each element of a complex 16-bit vector by a specified number of bits.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[] and b_imag[].

length is the number of elements in \(\bar a\) and \(\bar b\).

b_shl is the signed arithmetic left-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & Re\{a_k\} \leftarrow sat_{16}(\lfloor Re\{b_k\} \cdot 2^{b\_shl} \rfloor) \\ & Im\{a_k\} \leftarrow sat_{16}(\lfloor Im\{b_k\} \cdot 2^{b\_shl} \rfloor) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the complex 16-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(\bar{a} = \bar{b} \cdot 2^{b\_shl}\) and \(a\_exp = b\_exp\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shl	Left-shift applied to \(\bar b\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real or b_imag is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s16_shr()

headroom_t xs3_vect_complex_s16_shr	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const unsigned	length,
		const right_shift_t	b_shr
	)

Right-shift each element of a complex 16-bit vector by a specified number of bits.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[] and b_imag[].

length is the number of elements in \(\bar a\) and \(\bar b\).

b_shr is the signed arithmetic right-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & Re\{a_k\} \leftarrow sat_{16}(\lfloor Re\{b_k\} \cdot 2^{-b\_shr} \rfloor) \\ & Im\{a_k\} \leftarrow sat_{16}(\lfloor Im\{b_k\} \cdot 2^{-b\_shr} \rfloor) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the complex 16-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(\bar{a} = \bar{b} \cdot 2^{-b\_shr}\) and \(a\_exp = b\_exp\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift applied to \(\bar b\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real or b_imag is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_complex_s16_squared_mag()

headroom_t xs3_vect_complex_s16_squared_mag	(	int16_t	a[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const unsigned	length,
		const right_shift_t	a_shr
	)

Get the squared magnitudes of elements of a complex 16-bit vector.

a[] represents the real 16-bit output mantissa vector \(\bar a\).

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

a_shr is the unsigned arithmetic right-shift applied to the 32-bit accumulators holding the penultimate results.

Operation Performed:: \begin{align*} & a_k \leftarrow ((Re\{b_k'\})^2 + (Im\{b_k'\})^2)\cdot 2^{-a\_shr} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the complex 16-bit mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the real 16-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = 2 \cdot b\_exp + a\_shr\).

The function xs3_vect_complex_s16_squared_mag_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponent \(b\_exp\) and headroom \(b\_hr\).

Parameters

[out]	a	Real output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	a_shr	Right-shift appled to 32-bit intermediate results

Exceptions

ET_LOAD_STORE Raised if a, b_real or b_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_squared_mag_prepare

◆ xs3_vect_complex_s16_sub()

headroom_t xs3_vect_complex_s16_sub	(	int16_t	a_real[],
		int16_t	a_imag[],
		const int16_t	b_real[],
		const int16_t	b_imag[],
		const int16_t	c_real[],
		const int16_t	c_imag[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Subtract one complex 16-bit vector from another.

a_real[] and a_imag[] together represent the complex 16-bit output mantissa vector \(\bar a\). Each \(Re\{a_k\}\) is a_real[k], and each \(Im\{a_k\}\) is a_imag[k].

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\). Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

c_real[] and c_imag[] together represent the complex 16-bit input mantissa vector \(\bar c\). Each \(Re\{c_k\}\) is c_real[k], and each \(Im\{c_k\}\) is c_imag[k].

Each of the input vectors must begin at a word-aligned address. This operation can be performed safely in-place on inputs b_real[], b_imag[], c_real[] and c_imag[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{16}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & Re\{a_k\} \leftarrow Re\{b_k'\} - Re\{c_k'\} \\ & Im\{a_k\} \leftarrow Im\{b_k'\} - Im\{c_k'\} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the complex 16-bit mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the complex 16-bit mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_complex_s16_sub_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a_real	Real part of complex output vector \(\bar a\)
[out]	a_imag	Imaginary aprt of complex output vector \(\bar a\)
[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	c_real	Real part of complex input vector \(\bar c\)
[in]	c_imag	Imaginary part of complex input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift applied to \(\bar b\)
[in]	c_shr	Right-shift applied to \(\bar c\)

Returns: Headroom of output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a_real, a_imag, b_real, b_imag, c_real or c_imag is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_complex_s16_sub_prepare

◆ xs3_vect_complex_s16_sum()

complex_s32_t xs3_vect_complex_s16_sum	(	const int16_t	b_real[],
		const int16_t	b_imag[],
		const unsigned	length
	)

Get the sum of elements of a complex 16-bit vector.

b_real[] and b_imag[] together represent the complex 16-bit input mantissa vector \(\bar b\), and must both begin at a word-aligned address. Each \(Re\{b_k\}\) is b_real[k], and each \(Im\{b_k\}\) is b_imag[k].

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} & Re\{a\} \leftarrow \sum_{k=0}^{length-1} \left( Re\{b_k\} \right) \\ & Im\{a\} \leftarrow \sum_{k=0}^{length-1} \left( Im\{b_k\} \right) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the complex 32-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[in]	b_real	Real part of complex input vector \(\bar b\)
[in]	b_imag	Imaginary part of complex input vector \(\bar b\)
[in]	length	Number of elements in vector \(\bar b\).

Returns: \(a\), the 32-bit complex sum of elements in \(\bar b\).

Exceptions

ET_LOAD_STORE Raised if b_real or b_imag is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_abs()

headroom_t xs3_vect_s16_abs	(	int16_t	a[],
		const int16_t	b[],
		const unsigned	length
	)

Compute the element-wise absolute value of a 16-bit vector.

a[] and b[] represent the 16-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

Operation Performed:: \begin{align*} & a_k \leftarrow sat_{32}(\left| b_k \right|) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_abs_sum()

int32_t xs3_vect_s16_abs_sum	(	const int16_t	b[],
		const unsigned	length
	)

Compute the sum of the absolute values of elements of a 16-bit vector.

b[] represents the 16-bit vector \(\bar b\). b[] must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} a \leftarrow \sum_{k=0}^{length-1} \left| b_k \right| \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the 32-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)

Returns: The 32-bit sum \(a\)

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_add()

headroom_t xs3_vect_s16_add	(	int16_t	a[],
		const int16_t	b[],
		const int16_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Add one 16-bit BFP vector to another.

a[], b[] and c[] represent the 16-bit vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' = sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' = sat_{16}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow sat_{16}\!\left( b_k' + c_k' \right) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_s16_add_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s16_add_prepare

◆ xs3_vect_s16_add_scalar()

headroom_t xs3_vect_s16_add_scalar	(	int16_t	a[],
		const int16_t	b[],
		const int16_t	c,
		const unsigned	length,
		const right_shift_t	b_shr
	)

Add a scalar to a 16-bit vector.

a[], b[] represent the 16-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

c is the scalar \(c\) to be added to each element of \(\bar b\).

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shifts applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & b_k' = sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a_k \leftarrow sat_{16}\!\left( b_k' + c \right) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If elements of \(\bar b\) are the mantissas of BFP vector \( \bar{b} \cdot 2^{b\_exp} \), and \(c\) is the mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_s16_add_scalar_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Note that \(c\_shr\) is an output of xs3_vect_s16_add_scalar_prepare(), but is not a parameter to this function. The \(c\_shr\) produced by xs3_vect_s16_add_scalar_prepare() is to be applied by the user, and the result passed as input c.

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input scalar \(c\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s16_add_scalar_prepare()

◆ xs3_vect_s16_argmax()

unsigned xs3_vect_s16_argmax	(	const int16_t	b[],
		const unsigned	length
	)

Obtain the array index of the maximum element of a 16-bit vector.

b[] represents the 16-bit input vector \(\bar b\). It must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} & a \leftarrow argmax_k\{ b_k \} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elemetns in \(\bar b\)

Returns: \(a\), the index of the maximum element of vector \(\bar b\). If there is a tie for the maximum value, the lowest tying index is returned.

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_argmin()

unsigned xs3_vect_s16_argmin	(	const int16_t	b[],
		const unsigned	length
	)

Obtain the array index of the minimum element of a 16-bit vector.

b[] represents the 16-bit input vector \(\bar b\). It must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} & a \leftarrow argmin_k\{ b_k \} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elemetns in \(\bar b\)

Returns: \(a\), the index of the minimum element of vector \(\bar b\). If there is a tie for the minimum value, the lowest tying index is returned.

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_clip()

headroom_t xs3_vect_s16_clip	(	int16_t	a[],
		const int16_t	b[],
		const unsigned	length,
		const int16_t	lower_bound,
		const int16_t	upper_bound,
		const right_shift_t	b_shr
	)

Clamp the elements of a 16-bit vector to a specified range.

a[] and b[] represent the 16-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

lower_bound and upper_bound are the lower and upper bounds of the clipping range respectively. These bounds are checked for each element of \(\bar b\) only after b_shr is applied.

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\) before being compared to the upper and lower bounds.

If \(\bar b\) are the mantissas for a BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the exponent \(a\_exp\) of the output BFP vector \(\bar{a} \cdot 2^{a\_exp}\) is given by \(a\_exp = b\_exp + b\_shr\).

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a_k \leftarrow \begin{cases} lower\_bound & b_k' \le lower\_bound \\ & upper\_bound & b_k' \ge upper\_bound \\ & b_k' & otherwise \end{cases} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	lower_bound	Lower bound of clipping range
[in]	upper_bound	Upper bound of clipping range
[in]	b_shr	Arithmetic right-shift applied to elements of \(\bar b\) prior to clipping

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_dot()

int64_t xs3_vect_s16_dot	(	const int16_t	b[],
		const int16_t	c[],
		const unsigned	length
	)

Compute the inner product of two 16-bit vectors.

b[] and c[] represent the 32-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address.

length is the number of elements in each of the vectors.

Operation Performed:: \begin{align*} a \leftarrow \sum_{k=0}^{length-1}\left( b_k \cdot c_k \right) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of the BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c}\cdot 2^{c\_exp}\), then result \(a\) is the mantissa of the result \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp\).

If needed, the bit-depth of \(a\) can then be reduced to 16 or 32 bits to get a new result \(a' \cdot 2^{a\_exp'}\) where \(a' = a \cdot 2^{-a\_shr}\) and \(a\_exp' = a\_exp + a\_shr\).

Todo:: I don't think there are currently any functions in this library to perform this bit-depth reduction in a user-friendly way.

Notes

The sum \(a\) is accumulated simultaneously into 16 48-bit accumulators which are summed together at the final step. So long as length is less than roughly 2 million, no overflow or saturation of the resulting sum is possible.

Parameters

[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar b\) and \(\bar c\)

Returns: \(a\), the inner product of vectors \(\bar b\) and \(\bar c\).

Exceptions

ET_LOAD_STORE Raised if b or c is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_energy()

int32_t xs3_vect_s16_energy	(	const int16_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr
	)

Calculate the energy (sum of squares of elements) of a 16-bit vector.

b[] represents the 16-bit vector \(\bar b\). b[] must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\). b_shr should be chosen to avoid the possibility of saturation. See the note below.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & a \leftarrow \sum_{k=0}^{length-1} (b_k')^2 \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of the BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then floating-point result is \(a \cdot 2^{a\_exp}\), where the 32-bit mantissa \(a\) is returned by this function, and \(a\_exp = 2 \cdot (b\_exp + b\_shr) \).

Additional Details

If \(\bar b\) has \(b\_hr\) bits of headroom, then each product \((b_k')^2\) can be a maximum of \( 2^{30 - 2 \cdot (b\_hr + b\_shr)}\). So long as length is less than \(1 + 2\cdot (b\_hr + b\_shr) \), such errors should not be possible. Each increase of \(b\_shr\) by \(1\) doubles the number of elements that can be summed without risk of overflow.

If the caller's mantissa vector is longer than that, the full result can be found by calling this function multiple times for partial results on sub-sequences of the input, and adding the results in user code.

In many situations the caller may have a priori knowledge that saturation is impossible (or very nearly so), in which case this guideline may be disregarded. However, such situations are application-specific and are well beyond the scope of this documentation, and as such are left to the user's discretion.

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)

Returns: 64-bit mantissa of vector \(\bar b\)'s energy

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_headroom()

headroom_t xs3_vect_s16_headroom	(	const int16_t	b[],
		const unsigned	length
	)

Calculate the headroom of a 16-bit vector.

The headroom of an N-bit integer is the number of bits that the integer's value may be left-shifted without any information being lost. Equivalently, it is one less than the number of leading sign bits.

The headroom of an int16_t array is the minimum of the headroom of each of its int16_t elements.

This function efficiently traverses the elements of b[] to determine its headroom.

b[] represents the 16-bit vector \(\bar b\). b[] must begin at a word-aligned address.

length is the number of elements in b[].

Operation Performed:: \begin{align*} a \leftarrow min\!\{ HR_{16}\left(x_0\right), HR_{16}\left(x_1\right), ..., HR_{16}\left(x_{length-1}\right) \} \end{align*}

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	The number of elements in vector \(\bar b\)

Returns: Headroom of vector \(\bar b\)

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s32_headroom,
xs3_vect_complex_s16_headroom, xs3_vect_complex_s32_headroom

◆ xs3_vect_s16_inverse()

void xs3_vect_s16_inverse	(	int16_t	a[],
		const int16_t	b[],
		const unsigned	length,
		const unsigned	scale
	)

Compute the inverse of elements of a 16-bit vector.

a[] and b[] represent the 16-bit mantissa vectors \(\bar a\) and \(\bar b\) respectively. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

scale is a scaling parameter used to maximize the precision of the result.

Operation Performed:: \begin{align*} & a_k \leftarrow \lfloor\frac{2^{scale}}{b_k}\rfloor \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = scale - b\_exp\).

The function xs3_vect_s16_inverse_prepare() can be used to obtain values for \(a\_exp\) and \(scale\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	scale	Scale factor applied to dividend when computing inverse

Returns: Headroom of output vector \(\bar a\)

See also: xs3_vect_s16_inverse_prepare

◆ xs3_vect_s16_macc()

headroom_t xs3_vect_s16_macc	(	int16_t	acc[],
		const int16_t	b[],
		const int16_t	c[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	bc_sat
	)

Multiply one 16-bit vector element-wise by another, and add the result to an accumulator.

acc[] represents the 16-bit accumulator mantissa vector \(\bar a\). Each \(a_k\) is acc[k].

b[] and c[] represent the 16-bit input mantissa vectors \(\bar b\) and \(\bar c\), where each \(b_k\) is b[k] and each \(c_k\) is c[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr is the signed arithmetic right-shift applied to the accumulators \(a_k\) prior to accumulation.

bc_sat is the unsigned arithmetic right-shift applied to the product of \(b_k\) and \(c_k\) before accumulation.

Operation Performed:: \begin{align*} & v_k \leftarrow round( sat_{16}( b_k \cdot c_k \cdot 2^{-bc\_sat} ) ) \\ & \hat{a}_k \leftarrow sat_{16}( a_k \cdot 2^{-acc\_shr} ) \\ & a_k \leftarrow sat_{16}( \hat{a}_k + v_k ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + bc\_sat \).

The function xs3_vect_complex_s16_macc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\) and \(bc\_sat\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc	Accumulator \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	bc_sat	Unsigned arithmetic right-shift applied to the products of elements \(b_k\) and \(c_k\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s16_macc_prepare

◆ xs3_vect_s16_max()

int16_t xs3_vect_s16_max	(	const int16_t	b[],
		const unsigned	length
	)

Find the maximum value in a 16-bit vector.

b[] represents the 16-bit vector \(\bar b\). It must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} max\{ x_0, x_1, ..., x_{length-1} \} \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the 16-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)

Returns: Maximum value from \(\bar b\)

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_max_elementwise()

headroom_t xs3_vect_s16_max_elementwise	(	int16_t	a[],
		const int16_t	b[],
		const int16_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Get the element-wise maximum of two 16-bit vectors.

a[], b[] and c[] represent the 16-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[], but not on c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{16}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow max(b_k', c_k') \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\).

The function xs3_vect_2vec_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Warning: For correct operation, this function requires at least 1 bit of headroom in each mantissa vector after the shifts have been applied.

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_min()

int16_t xs3_vect_s16_min	(	const int16_t	b[],
		const unsigned	length
	)

Find the minimum value in a 16-bit vector.

b[] represents the 16-bit vector \(\bar b\). It must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} max\{ x_0, x_1, ..., x_{length-1} \} \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the 16-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)

Returns: Minimum value from \(\bar b\)

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_min_elementwise()

headroom_t xs3_vect_s16_min_elementwise	(	int16_t	a[],
		const int16_t	b[],
		const int16_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Get the element-wise minimum of two 16-bit vectors.

a[], b[] and c[] represent the 16-bit mantissa vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[], but not on c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' \leftarrow sat_{16}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow min(b_k', c_k') \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\).

The function xs3_vect_2vec_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Warning: For correct operation, this function requires at least 1 bit of headroom in each mantissa vector after the shifts have been applied.

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_mul()

headroom_t xs3_vect_s16_mul	(	int16_t	a[],
		const int16_t	b[],
		const int16_t	c[],
		const unsigned	length,
		const right_shift_t	a_shr
	)

Multiply two 16-bit vectors together element-wise.

a[], b[] and c[] represent the 16-bit vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

a_shr is an unsigned arithmetic right-shift applied to the 32-bit accumulators holding the penultimate results.

Operation Performed:: \begin{align*} & a_k' \leftarrow b_k \cdot c_k \\ & a_k \leftarrow sat_{16}(round(a_k' \cdot 2^{-a\_shr})) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + a\_shr\).

The function xs3_vect_s16_mul_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	a_shr	Right-shift appled to 32-bit products

Returns: Headroom of vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment) [xs3_vect_s16_mul]

◆ xs3_vect_s16_nmacc()

headroom_t xs3_vect_s16_nmacc	(	int16_t	acc[],
		const int16_t	b[],
		const int16_t	c[],
		const unsigned	length,
		const right_shift_t	acc_shr,
		const right_shift_t	bc_sat
	)

Multiply one 16-bit vector element-wise by another, and subtract the result from an accumulator.

acc[] represents the 16-bit accumulator mantissa vector \(\bar a\). Each \(a_k\) is acc[k].

b[] and c[] represent the 16-bit input mantissa vectors \(\bar b\) and \(\bar c\), where each \(b_k\) is b[k] and each \(c_k\) is c[k].

Each of the input vectors must begin at a word-aligned address.

length is the number of elements in each of the vectors.

acc_shr is the signed arithmetic right-shift applied to the accumulators \(a_k\) prior to accumulation.

bc_sat is the unsigned arithmetic right-shift applied to the product of \(b_k\) and \(c_k\) before accumulation.

Operation Performed:: \begin{align*} & v_k \leftarrow round( sat_{16}( b_k \cdot c_k \cdot 2^{-bc\_sat} ) ) \\ & \hat{a}_k \leftarrow sat_{16}( a_k \cdot 2^{-acc\_shr} ) \\ & a_k \leftarrow sat_{16}( \hat{a}_k - v_k ) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If inputs \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), and input \(\bar a\) is the accumulator BFP vector \(\bar{a} \cdot 2^{a\_exp}\), then the output values of \(\bar a\) have the exponent \(2^{a\_exp + acc\_shr}\).

For accumulation to make sense mathematically, \(bc\_sat\) must be chosen such that \( a\_exp + acc\_shr = b\_exp + c\_exp + bc\_sat \).

The function xs3_vect_complex_s16_nmacc_prepare() can be used to obtain values for \(a\_exp\), \(acc\_shr\) and \(bc\_sat\) based on the input exponents \(a\_exp\), \(b\_exp\) and \(c\_exp\) and the input headrooms \(a\_hr\), \(b\_hr\) and \(c\_hr\).

Parameters

[in,out]	acc	Accumulator \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	acc_shr	Signed arithmetic right-shift applied to accumulator elements.
[in]	bc_sat	Unsigned arithmetic right-shift applied to the products of elements \(b_k\) and \(c_k\)

Returns: Headroom of the output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if acc, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s16_nmacc_prepare

◆ xs3_vect_s16_rect()

headroom_t xs3_vect_s16_rect	(	int16_t	a[],
		const int16_t	b[],
		const unsigned	length
	)

Rectify the elements of a 16-bit vector.

Rectification ensures that all outputs are non-negative, changing negative values to 0.

a[] and b[] represent the 16-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

Each output element a[k] is set to the value of the corresponding input element b[k] if it is positive, and a[k] is set to zero otherwise.

Operation Performed:: \begin{align*} & a_k \leftarrow \begin{cases} b_k & b_k \gt 0 \\ & 0 & b_k \leq 0\end{cases} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_scale()

headroom_t xs3_vect_s16_scale	(	int16_t	a[],
		const int16_t	b[],
		const unsigned	length,
		const int16_t	c,
		const right_shift_t	a_shr
	)

Multiply a 16-bit vector by a 16-bit scalar.

a[] and b[] represent the 16-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

c is the 16-bit scalar \(c\) by which elements of \(\bar b\) are multiplied.

a_shr is an unsigned arithmetic right-shift applied to the 32-bit accumulators holding the penultimate results.

Operation Performed:: \begin{align*} & a_k' \leftarrow b_k \cdot c \\ & a_k \leftarrow sat_{16}(round(a_k' \cdot 2^{-a\_shr})) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \) and \(c\) is the mantissa of floating-point value \(c \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp + c\_exp + a\_shr\).

The function xs3_vect_s16_scale_prepare() can be used to obtain values for \(a\_exp\) and \(a\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	a_shr	Right-shift appled to 32-bit products

Returns: Headroom of vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_set()

void xs3_vect_s16_set	(	int16_t	a[],
		const int16_t	b,
		const unsigned	length
	)

Set all elements of a 16-bit vector to the specified value.

a[] represents the 16-bit vector \(\bar a\). It must begin at a word-aligned address.

b is the value elements of \(\bar a\) are set to.

length is the number of elements in a[].

Operation Performed:: \begin{align*} & a_k \leftarrow b \\ & \qquad\text{for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(b\) is the mantissa of floating-point value \(b \cdot 2^{b\_exp}\), then the output vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input value \(b\)
[in]	length	Number of elements in vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_shl()

headroom_t xs3_vect_s16_shl	(	int16_t	a[],
		const int16_t	b[],
		const unsigned	length,
		const left_shift_t	b_shl
	)

Left-shift the elements of a 16-bit vector by a specified number of bits.

a[] and b[] represent the 16-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in vectors \(\bar a\) and \(\bar b\).

b_shl is the signed arithmetic left-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & a_k \leftarrow sat_{16}(\lfloor b_k \cdot 2^{b\_shl} \rfloor) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(\bar{a} = \bar{b} \cdot 2^{b\_shl}\) and \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shl	Arithmetic left-shift applied to elements of \(\bar b\)

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_shr()

headroom_t xs3_vect_s16_shr	(	int16_t	a[],
		const int16_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr
	)

Right-shift the elements of a 16-bit vector by a specified number of bits.

a[] and b[] represent the 16-bit vectors \(\bar a\) and \(\bar b\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in vectors \(\bar a\) and \(\bar b\).

b_shr is the signed arithmetic right-shift applied to each element of \(\bar b\).

Operation Performed:: \begin{align*} & a_k \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of a BFP vector \( \bar{b} \cdot 2^{b\_exp} \), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(\bar{a} = \bar{b} \cdot 2^{-b\_shr}\) and \(a\_exp = b\_exp\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Arithmetic right-shift applied to elements of \(\bar b\)

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_sqrt()

headroom_t xs3_vect_s16_sqrt	(	int16_t	a[],
		const int16_t	b[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const unsigned	depth
	)

Compute the square roots of elements of a 16-bit vector.

a[] and b[] represent the 16-bit vectors \(\bar a\) and \(\bar b\) respectively. Each vector must begin at a word-aligned address. This operation can be performed safely in-place on b[].

length is the number of elements in each of the vectors.

b_shr is the signed arithmetic right-shift applied to elements of \(\bar b\).

depth is the number of most significant bits to calculate of each \(a_k\). For example, a depth value of 8 will only compute the 8 most significant byte of the result, with the remaining byte as 0. The maximum value for this parameter is XS3_VECT_SQRT_S16_MAX_DEPTH (31). The time cost of this operation is approximately proportional to the number of bits computed.

Operation Performed:: \begin{align*} & b_k' \leftarrow sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ a_k \leftarrow \begin{cases} & \sqrt{ b_k' } & b_k' >= 0 \\ & 0 & otherwise\end{cases} \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \\ & \qquad\text{ where } \sqrt{\cdot} \text{ computes the most significant } depth \text{ bits of the square root.} \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\), where \(a\_exp = (b\_exp + b\_shr - 14)/2\).

Note that because exponents must be integers, that means \(b\_exp + b\_shr\) must be even.

The function xs3_vect_s16_sqrt_prepare() can be used to obtain values for \(a\_exp\) and \(b\_shr\) based on the input exponent \(b\_exp\) and headroom \(b\_hr\).

Notes

This function assumes roots are real. Negative input elements will result in corresponding outputs of 0.

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar a\) and \(\bar b\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	depth	Number of bits of each output value to compute

Returns: Headroom of output vector \(\bar a\)

Exceptions

ET_LOAD_STORE Raised if a or b is not word-aligned (See Note: Vector Alignment)

◆ xs3_vect_s16_sub()

headroom_t xs3_vect_s16_sub	(	int16_t	a[],
		const int16_t	b[],
		const int16_t	c[],
		const unsigned	length,
		const right_shift_t	b_shr,
		const right_shift_t	c_shr
	)

Subtract one 16-bit BFP vector from another.

a[], b[] and c[] represent the 16-bit vectors \(\bar a\), \(\bar b\) and \(\bar c\) respectively. Each must begin at a word-aligned address. This operation can be performed safely in-place on b[] or c[].

length is the number of elements in each of the vectors.

b_shr and c_shr are the signed arithmetic right-shifts applied to each element of \(\bar b\) and \(\bar c\) respectively.

Operation Performed:: \begin{align*} & b_k' = sat_{16}(\lfloor b_k \cdot 2^{-b\_shr} \rfloor) \\ & c_k' = sat_{16}(\lfloor c_k \cdot 2^{-c\_shr} \rfloor) \\ & a_k \leftarrow sat_{16}\!\left( b_k' - c_k' \right) \\ & \qquad\text{ for }k\in 0\ ...\ (length-1) \end{align*}

Block Floating-Point

If \(\bar b\) and \(\bar c\) are the mantissas of BFP vectors \( \bar{b} \cdot 2^{b\_exp} \) and \(\bar{c} \cdot 2^{c\_exp}\), then the resulting vector \(\bar a\) are the mantissas of BFP vector \(\bar{a} \cdot 2^{a\_exp}\).

In this case, \(b\_shr\) and \(c\_shr\) must be chosen so that \(a\_exp = b\_exp + b\_shr = c\_exp + c\_shr\). Adding or subtracting mantissas only makes sense if they are associated with the same exponent.

The function xs3_vect_s16_sub_prepare() can be used to obtain values for \(a\_exp\), \(b\_shr\) and \(c\_shr\) based on the input exponents \(b\_exp\) and \(c\_exp\) and the input headrooms \(b\_hr\) and \(c\_hr\).

Parameters

[out]	a	Output vector \(\bar a\)
[in]	b	Input vector \(\bar b\)
[in]	c	Input vector \(\bar c\)
[in]	length	Number of elements in vectors \(\bar a\), \(\bar b\) and \(\bar c\)
[in]	b_shr	Right-shift appled to \(\bar b\)
[in]	c_shr	Right-shift appled to \(\bar c\)

Returns: Headroom of the output vector \(\bar a\).

Exceptions

ET_LOAD_STORE Raised if a, b or c is not word-aligned (See Note: Vector Alignment)

See also: xs3_vect_s16_sub_prepare

◆ xs3_vect_s16_sum()

int32_t xs3_vect_s16_sum	(	const int16_t	b[],
		const unsigned	length
	)

Get the sum of elements of a 16-bit vector.

b[] represents the 16-bit vector \(\bar b\). b[] must begin at a word-aligned address.

length is the number of elements in \(\bar b\).

Operation Performed:: \begin{align*} a \leftarrow \sum_{k=0}^{length-1} b_k \end{align*}

Block Floating-Point

If \(\bar b\) are the mantissas of BFP vector \(\bar{b} \cdot 2^{b\_exp}\), then the returned value \(a\) is the 32-bit mantissa of floating-point value \(a \cdot 2^{a\_exp}\), where \(a\_exp = b\_exp\).

Parameters

[in]	b	Input vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)

Returns: The 32-bit sum \(a\)

Exceptions

ET_LOAD_STORE Raised if b is not word-aligned (See Note: Vector Alignment)

Functions

Detailed Description

Function Documentation

◆ xs3_vect_complex_s16_add()

◆ xs3_vect_complex_s16_add_scalar()

◆ xs3_vect_complex_s16_conj_macc()

◆ xs3_vect_complex_s16_conj_mul()

◆ xs3_vect_complex_s16_conj_nmacc()

◆ xs3_vect_complex_s16_headroom()

◆ xs3_vect_complex_s16_macc()

◆ xs3_vect_complex_s16_mag()

◆ xs3_vect_complex_s16_mul()

◆ xs3_vect_complex_s16_nmacc()

◆ xs3_vect_complex_s16_real_mul()

◆ xs3_vect_complex_s16_real_scale()

◆ xs3_vect_complex_s16_scale()

◆ xs3_vect_complex_s16_set()

◆ xs3_vect_complex_s16_shl()

◆ xs3_vect_complex_s16_shr()

◆ xs3_vect_complex_s16_squared_mag()

◆ xs3_vect_complex_s16_sub()

◆ xs3_vect_complex_s16_sum()

◆ xs3_vect_s16_abs()

◆ xs3_vect_s16_abs_sum()

◆ xs3_vect_s16_add()

◆ xs3_vect_s16_add_scalar()

◆ xs3_vect_s16_argmax()

◆ xs3_vect_s16_argmin()

◆ xs3_vect_s16_clip()

◆ xs3_vect_s16_dot()

◆ xs3_vect_s16_energy()

◆ xs3_vect_s16_headroom()

◆ xs3_vect_s16_inverse()

◆ xs3_vect_s16_macc()

◆ xs3_vect_s16_max()

◆ xs3_vect_s16_max_elementwise()

◆ xs3_vect_s16_min()

◆ xs3_vect_s16_min_elementwise()

◆ xs3_vect_s16_mul()

◆ xs3_vect_s16_nmacc()

◆ xs3_vect_s16_rect()

◆ xs3_vect_s16_scale()

◆ xs3_vect_s16_set()

◆ xs3_vect_s16_shl()

◆ xs3_vect_s16_shr()

◆ xs3_vect_s16_sqrt()

◆ xs3_vect_s16_sub()

◆ xs3_vect_s16_sum()