Macros
#define	xs3_vect_complex_s32_add_prepare xs3_vect_s32_add_prepare
	Obtain the output exponent and shifts required for a call to `xs3_vect_complex_s32_add()`. More...

#define	xs3_vect_complex_s32_add_scalar_prepare xs3_vect_s32_add_prepare
	Obtain the output exponent and shifts required for a call to `xs3_vect_complex_s32_add_scalar()`. More...

#define	xs3_vect_complex_s32_conj_mul_prepare xs3_vect_complex_s32_mul_prepare
	Obtain the output exponent and shifts required for a call to `xs3_vect_complex_s32_conj_mul()`. More...

#define	xs3_vect_complex_s32_nmacc_prepare xs3_vect_complex_s32_macc_prepare
	Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_nmacc(). More...

#define	xs3_vect_complex_s32_conj_macc_prepare xs3_vect_complex_s32_macc_prepare
	Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_conj_macc(). More...

#define	xs3_vect_complex_s32_conj_nmacc_prepare xs3_vect_complex_s32_macc_prepare
	Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_conj_nmacc(). More...

#define	xs3_vect_complex_s32_real_scale_prepare xs3_vect_s32_mul_prepare
	Obtain the output exponent and shifts required for a call to `xs3_vect_complex_s32_real_scale()`. More...

#define	xs3_vect_complex_s32_sub_prepare xs3_vect_s32_add_prepare
	Obtain the output exponent and shifts required for a call to `xs3_vect_complex_s32_sub()`. More...

#define	xs3_vect_s32_add_scalar_prepare xs3_vect_s32_add_prepare
	Obtain the output exponent and shifts required for a call to `xs3_vect_s32_add_scalar()`. More...

#define	xs3_vect_s32_nmacc_prepare xs3_vect_s32_macc_prepare
	Obtain the output exponent and shifts required for a call to xs3_vect_s32_nmacc(). More...

#define	xs3_vect_s32_scale_prepare xs3_vect_s32_mul_prepare
	Obtain the output exponent and shifts required for a call to `xs3_vect_s32_scale()`. More...

#define	xs3_vect_s32_sub_prepare xs3_vect_s32_add_prepare
	Obtain the output exponent and shifts required for a call to `xs3_vect_s32_sub()`. More...

Functions
void	xs3_vect_complex_s32_macc_prepare (exponent_t new_acc_exp, right_shift_t acc_shr, right_shift_t b_shr, right_shift_t c_shr, const exponent_t acc_exp, const exponent_t b_exp, const exponent_t c_exp, const exponent_t acc_hr, const headroom_t b_hr, const headroom_t c_hr)
	Obtain the output exponent and shifts needed by xs3_vect_complex_s32_macc(). More...

void	xs3_vect_complex_s32_mag_prepare (exponent_t a_exp, right_shift_t b_shr, const exponent_t b_exp, const headroom_t b_hr)
	Obtain the output exponent and input shift used by xs3_vect_complex_s32_mag() and xs3_vect_complex_s16_mag(). More...

void	xs3_vect_complex_s32_mul_prepare (exponent_t a_exp, right_shift_t b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr)
	Obtain the output exponent and input shifts used by xs3_vect_complex_s32_mul() and xs3_vect_complex_s32_conj_mul(). More...

void	xs3_vect_complex_s32_real_mul_prepare (exponent_t a_exp, right_shift_t b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr)
	Obtain the output exponent and input shifts used by xs3_vect_complex_s32_real_mul(). More...

void	xs3_vect_complex_s32_scale_prepare (exponent_t a_exp, right_shift_t b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr)
	Obtain the output exponent and input shifts used by xs3_vect_complex_s32_scale(). More...

void	xs3_vect_complex_s32_squared_mag_prepare (exponent_t a_exp, right_shift_t b_shr, const exponent_t b_exp, const headroom_t b_hr)
	Obtain the output exponent and input shift used by xs3_vect_complex_s32_squared_mag(). More...

void	xs3_vect_complex_s32_sum_prepare (exponent_t a_exp, right_shift_t b_shr, const exponent_t b_exp, const headroom_t b_hr, const unsigned length)
	Obtain the output exponent and input shift used by xs3_vect_complex_s32_sum(). More...

void	xs3_vect_s32_add_prepare (exponent_t a_exp, right_shift_t b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr)
	Obtain the output exponent and input shifts to add or subtract two 16- or 32-bit BFP vectors. More...

void	xs3_vect_s32_clip_prepare (exponent_t a_exp, right_shift_t b_shr, int32_t lower_bound, int32_t upper_bound, const exponent_t b_exp, const exponent_t bound_exp, const headroom_t b_hr)
	Obtain the output exponent, input shift and modified bounds used by xs3_vect_s32_clip(). More...

void	xs3_vect_s32_dot_prepare (exponent_t a_exp, right_shift_t b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr, const unsigned length)
	Obtain the output exponent and input shift used by xs3_vect_s32_dot(). More...

void	xs3_vect_s32_energy_prepare (exponent_t a_exp, right_shift_t b_shr, const unsigned length, const exponent_t b_exp, const headroom_t b_hr)
	Obtain the output exponent and input shift used by xs3_vect_s32_energy(). More...

void	xs3_vect_s32_inverse_prepare (exponent_t a_exp, unsigned scale, const int32_t b[], const exponent_t b_exp, const unsigned length)
	Obtain the output exponent and scale used by xs3_vect_s32_inverse(). More...

void	xs3_vect_s32_macc_prepare (exponent_t new_acc_exp, right_shift_t acc_shr, right_shift_t b_shr, right_shift_t c_shr, const exponent_t acc_exp, const exponent_t b_exp, const exponent_t c_exp, const headroom_t acc_hr, const headroom_t b_hr, const headroom_t c_hr)
	Obtain the output exponent and shifts needed by xs3_vect_s32_macc(). More...

void	xs3_vect_s32_mul_prepare (exponent_t a_exp, right_shift_t b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr)
	Obtain the output exponent and input shifts used by xs3_vect_s32_mul(). More...

void	xs3_vect_s32_sqrt_prepare (exponent_t a_exp, right_shift_t b_shr, const exponent_t b_exp, const right_shift_t b_hr)
	Obtain the output exponent and shift parameter used by xs3_vect_s32_sqrt(). More...

void	xs3_vect_2vec_prepare (exponent_t a_exp, right_shift_t b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr, const headroom_t extra_operand_hr)
	Obtain the output exponent and input shifts required to perform a binary add-like operation. More...

Detailed Description

Macro Definition Documentation

◆ xs3_vect_complex_s32_add_prepare

#define xs3_vect_complex_s32_add_prepare xs3_vect_s32_add_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_add().

The logic for computing the shifts and exponents of xs3_vect_complex_s32_add() is identical to that for xs3_vect_s32_add().

This macro is provided as a convenience to developers and to make the code more coherent.

See also: xs3_vect_s32_add_prepare()

◆ xs3_vect_complex_s32_add_scalar_prepare

#define xs3_vect_complex_s32_add_scalar_prepare xs3_vect_s32_add_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_add_scalar().

The logic for computing the shifts and exponents of xs3_vect_complex_s32_add_scalar() is identical to that for xs3_vect_s32_add().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_s32_add_prepare()

◆ xs3_vect_complex_s32_conj_macc_prepare

#define xs3_vect_complex_s32_conj_macc_prepare xs3_vect_complex_s32_macc_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_conj_macc().

The logic for computing the shifts and exponents of xs3_vect_complex_s32_conj_macc() is identical to that for xs3_vect_complex_s32_macc_prepare().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_complex_s32_macc_prepare(), xs3_vect_complex_s32_conj_macc()

◆ xs3_vect_complex_s32_conj_mul_prepare

#define xs3_vect_complex_s32_conj_mul_prepare xs3_vect_complex_s32_mul_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_conj_mul().

The logic for computing the shifts and exponents of xs3_vect_complex_s32_conj_mul() is identical to that for xs3_vect_complex_s32_mul().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_complex_s32_mul_prepare()

◆ xs3_vect_complex_s32_conj_nmacc_prepare

#define xs3_vect_complex_s32_conj_nmacc_prepare xs3_vect_complex_s32_macc_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_conj_nmacc().

The logic for computing the shifts and exponents of xs3_vect_complex_s32_conj_nmacc() is identical to that for xs3_vect_complex_s32_macc_prepare().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_complex_s32_macc_prepare(), xs3_vect_complex_s32_conj_nmacc()

◆ xs3_vect_complex_s32_nmacc_prepare

#define xs3_vect_complex_s32_nmacc_prepare xs3_vect_complex_s32_macc_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_nmacc().

The logic for computing the shifts and exponents of xs3_vect_complex_s32_nmacc() is identical to that for xs3_vect_complex_s32_macc_prepare().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_complex_s32_macc_prepare(), xs3_vect_complex_s32_nmacc()

◆ xs3_vect_complex_s32_real_scale_prepare

#define xs3_vect_complex_s32_real_scale_prepare xs3_vect_s32_mul_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_real_scale().

The logic for computing the shifts and exponents of xs3_vect_complex_s32_real_scale() is identical to that for xs3_vect_s32_mul().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_s32_mul_prepare()

◆ xs3_vect_complex_s32_sub_prepare

#define xs3_vect_complex_s32_sub_prepare xs3_vect_s32_add_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_complex_s32_sub().

The logic for computing the shifts and exponents of xs3_vect_complex_s32_sub() is identical to that for xs3_vect_s32_add().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_s32_add_prepare()

◆ xs3_vect_s32_add_scalar_prepare

#define xs3_vect_s32_add_scalar_prepare xs3_vect_s32_add_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_s32_add_scalar().

The logic for computing the shifts and exponents of xs3_vect_s32_add_scalar() is identical to that for xs3_vect_s32_add().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_s32_add_prepare()

◆ xs3_vect_s32_nmacc_prepare

#define xs3_vect_s32_nmacc_prepare xs3_vect_s32_macc_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_s32_nmacc().

The logic for computing the shifts and exponents of xs3_vect_s32_nmacc() is identical to that for xs3_vect_s32_macc_prepare().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_s32_macc_prepare(), xs3_vect_s32_nmacc()

◆ xs3_vect_s32_scale_prepare

#define xs3_vect_s32_scale_prepare xs3_vect_s32_mul_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_s32_scale().

The logic for computing the shifts and exponents of xs3_vect_s32_scale() is identical to that for xs3_vect_s32_mul().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_s32_mul_prepare()

◆ xs3_vect_s32_sub_prepare

#define xs3_vect_s32_sub_prepare xs3_vect_s32_add_prepare

Obtain the output exponent and shifts required for a call to xs3_vect_s32_sub().

The logic for computing the shifts and exponents of xs3_vect_s32_sub() is identical to that for xs3_vect_s32_add().

This macro is provided as a convenience to developers and to make the code more readable.

See also: xs3_vect_s32_add_prepare()

Function Documentation

◆ xs3_vect_2vec_prepare()

void xs3_vect_2vec_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const headroom_t	b_hr,
		const headroom_t	c_hr,
		const headroom_t	extra_operand_hr
	)

Obtain the output exponent and input shifts required to perform a binary add-like operation.

This function computes the output exponent and input shifts required for BFP operations which take two vectors as input, where the operation is "add-like".

Here, "add-like" operations are loosely defined as those which require input vectors to share an exponent before their mantissas can be meaningfully used to perform that operation.

For example, consider adding \( 3 \cdot 2^{x} + 4 \cdot 2^{y} \). If \(x = y\), then the mantissas can be added directly to get a meaningful result \( (3+4) \cdot 2^{x} \). If \(x \ne y\) however, adding the mantissas together is meaningless. Before the mantissas can be added in this case, one or both of the input mantissas must be shifted so that the representations correspond to the same exponent. Likewise, similar logic applies to binary comparisons.

This is in contrast to a "multiply-like" operation, which does not have this same requirement (e.g. \(a \cdot 2^x \cdot b \cdot 2^y = ab \cdot 2^{x+y}\), regardless of whether \(x=y\)).

For a general operation like:

\( \bar{a} \cdot 2^{a\_exp} = \bar{b}\cdot 2^{b\_exp} \oplus \bar{c}\cdot 2^{c\_exp} \)

\(\bar b\) and \(\bar c\) are the input mantissa vectors with exponents \(b\_exp\) and \(c\_exp\), which are shared by each element of their respective vectors. \(\bar a\) is the output mantissa vector with exponent \(a\_exp\). Two additional properties, \(b\_hr\) and \(c\_hr\), which are the headroom of mantissa vectors \(\bar b\) and \(\bar c\) respectively, are required by this function.

In addition to \(a\_exp\), this function computes \(b\_shr\) and \(c\_shr\), signed arithmetic right-shifts applied to the mantissa vectors \(\bar b\) and \(\bar c\) so that the add-like \(\oplus\) operation can be applied.

This function chooses \(a\_exp\) to be the minimum exponent which can be used to express both \(\bar B\) and \(\bar C\) without saturation of their mantissas, and which leaves both \(\bar b\) and \(\bar c\) with at least extra_operand_hr bits of headroom. The shifts \(b\_shr\) and \(c\_shr\) are derived from \(a\_exp\) using \(b\_exp\) and \(c\_exp\).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr and c_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori
right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);
right_shift_t new_c_shr = c_shr + (desired_exp - a_exp);

When applying the above adjustment, the following conditions should be maintained:

b_hr + b_shr >= 0
c_hr + c_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr and c_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Notes

If \(b\_hr\) or \(c\_hr\) are unknown, they can be calculated using the appropriate headroom function (e.g. xs3_vect_complex_s16_headroom() for complex 16-bit vectors) or the value 0 can always be safely used (but may result in reduced precision).

Parameters

[out]	a_exp	Output exponent associated with output mantissa vector \(\bar a\)
[out]	b_shr	Signed arithmetic right-shift to be applied to elements of \(\bar b\). Used by the function which computes the output mantissas \(\bar a\)
[out]	c_shr	Signed arithmetic right-shift to be applied to elements of \(\bar c\). Used by the function which computes the output mantissas \(\bar a\)
[in]	b_exp	Exponent of BFP vector \(\bar b\)
[in]	c_exp	Exponent of BFP vector \(\bar c\)
[in]	b_hr	Headroom of BFP vector \(\bar b\)
[in]	c_hr	Headroom of BFP vector \(\bar c\)
[in]	extra_operand_hr	The minimum amount of headroom that will be left in the mantissa vectors following the arithmetic right-shift, as required by some operations.

◆ xs3_vect_complex_s32_macc_prepare()

void xs3_vect_complex_s32_macc_prepare	(	exponent_t *	new_acc_exp,
		right_shift_t *	acc_shr,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	acc_exp,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const exponent_t	acc_hr,
		const headroom_t	b_hr,
		const headroom_t	c_hr
	)

Obtain the output exponent and shifts needed by xs3_vect_complex_s32_macc().

This function is used in conjunction with xs3_vect_complex_s32_macc() to perform an element-wise multiply-accumlate of 32-bit BFP vectors.

This function computes new_acc_exp, acc_shr, b_shr and c_shr, which are selected to maximize precision in the resulting accumulator vector without causing saturation of final or intermediate values. Normally the caller will pass these outputs to their corresponding inputs of xs3_vect_complex_s32_macc().

acc_exp is the exponent associated with the accumulator mantissa vector \(\bar a\) prior to the operation, whereas new_acc_exp is the exponent corresponding to the updated accumulator vector.

b_exp and c_exp are the exponents associated with the complex input mantissa vectors \(\bar b\) and \(\bar c\) respectively.

acc_hr, b_hr and c_hr are the headrooms of \(\bar a\), \(\bar b\) and \(\bar c\) respectively. If the headroom of any of these vectors is unknown, it can be obtained by calling xs3_vect_complex_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the acc_shr and bc_sat produced by this function can be adjusted according to the following:

// Presumed to be set somewhere
exponent_t acc_exp, b_exp, c_exp;
headroom_t acc_hr, b_hr, c_hr;
exponent_t desired_exp;
 
...
 
// Call prepare
right_shift_t acc_shr, b_shr, c_shr;
xs3_vect_complex_s32_macc_prepare(&acc_exp, &acc_shr, &b_shr, &c_shr, 
                                  acc_exp, b_exp, c_exp,
                                  acc_hr, b_hr, c_hr);
 
// Modify results
right_shift_t mant_shr = desired_exp - acc_exp;
acc_exp += mant_shr;
acc_shr += mant_shr;
b_shr  += mant_shr;
c_shr  += mant_shr;
 
// acc_shr, b_shr and c_shr may now be used in a call to xs3_vect_complex_s32_macc() 

When applying the above adjustment, the following conditions should be maintained:

acc_shr > -acc_hr (Shifting any further left may cause saturation)
b_shr => -b_hr (Shifting any further left may cause saturation)
c_shr => -c_hr (Shifting any further left may cause saturation)

It is up to the user to ensure any such modification does not result in saturation or unacceptable loss of precision.

Parameters

[out]	new_acc_exp	Exponent associated with output mantissa vector \(\bar a\) (after macc)
[out]	acc_shr	Signed arithmetic right-shift used for \(\bar a\) in xs3_vect_complex_s32_macc()
[out]	b_shr	Signed arithmetic right-shift used for \(\bar b\) in xs3_vect_complex_s32_macc()
[out]	c_shr	Signed arithmetic right-shift used for \(\bar c\) in xs3_vect_complex_s32_macc()
[in]	acc_exp	Exponent associated with input mantissa vector \(\bar a\) (before macc)
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	c_exp	Exponent associated with input mantissa vector \(\bar c\)
[in]	acc_hr	Headroom of input mantissa vector \(\bar a\) (before macc)
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)
[in]	c_hr	Headroom of input mantissa vector \(\bar c\)

See also: xs3_vect_complex_s32_macc

◆ xs3_vect_complex_s32_mag_prepare()

void xs3_vect_complex_s32_mag_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		const exponent_t	b_exp,
		const headroom_t	b_hr
	)

Obtain the output exponent and input shift used by xs3_vect_complex_s32_mag() and xs3_vect_complex_s16_mag().

This function is used in conjunction with xs3_vect_complex_s32_mag() to compute the magnitude of each element of a complex 32-bit BFP vector.

This function computes a_exp and b_shr.

a_exp is the exponent associated with mantissa vector \(\bar a\), and is be chosen to maximize precision when elements of \(\bar a\) are computed. The a_exp chosen by this function is derived from the exponent and headroom associated with the input vector.

b_shr is the shift parameter required by xs3_vect_complex_s32_mag() to achieve the chosen output exponent a_exp.

b_exp is the exponent associated with the input mantissa vector \(\bar b\).

b_hr is the headroom of \(\bar b\). If the headroom of \(\bar b\) is unknown it can be calculated using xs3_vect_complex_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori

right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);

When applying the above adjustment, the following condition should be maintained:

b_hr + b_shr >= 0

Using larger values than strictly necessary for b_shr may result in unnecessary underflows or loss of precision.

Parameters

[out]	a_exp	Output exponent associated with output mantissa vector \(\bar a\)
[out]	b_shr	Signed arithmetic right-shift for \(\bar b\) used by xs3_vect_complex_s32_mag()
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)

See also: xs3_vect_complex_s32_mag()

◆ xs3_vect_complex_s32_mul_prepare()

void xs3_vect_complex_s32_mul_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const headroom_t	b_hr,
		const headroom_t	c_hr
	)

Obtain the output exponent and input shifts used by xs3_vect_complex_s32_mul() and xs3_vect_complex_s32_conj_mul().

This function is used in conjunction with xs3_vect_complex_s32_mul() to perform a complex element-wise multiplication of two complex 32-bit BFP vectors.

This function computes a_exp, b_shr and c_shr.

a_exp is the exponent associated with mantissa vector \(\bar a\), and must be chosen to be large enough to avoid overflow when elements of \(\bar a\) are computed. To maximize precision, this function chooses a_exp to be the smallest exponent known to avoid saturation (see exception below). The a_exp chosen by this function is derived from the exponents and headrooms of associated with the input vectors.

b_shr and c_shr are the shift parameters required by xs3_vect_complex_s32_mul() to achieve the chosen output exponent a_exp.

b_exp and c_exp are the exponents associated with the input mantissa vectors \(\bar b\) and \(\bar c\) respectively.

b_hr and c_hr are the headroom of \(\bar b\) and \(\bar c\) respectively. If the headroom of \(\bar b\) or \(\bar c\) is unknown, they can be obtained by calling xs3_vect_complex_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr and c_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori
right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);
right_shift_t new_c_shr = c_shr + (desired_exp - a_exp);

When applying the above adjustment, the following conditions should be maintained:

b_hr + b_shr >= 0
c_hr + c_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr and c_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Notes

Using the outputs of this function, an output mantissa which would otherwise be INT32_MIN will instead saturate to -INT32_MAX. This is due to the symmetric saturation logic employed by the VPU and is a hardware feature. This is a corner case which is usually unlikely and results in 1 LSb of error when it occurs.

Parameters

[out]	a_exp	Exponent associated with output mantissa vector \(\bar a\)
[out]	b_shr	Signed arithmetic right-shift for \(\bar b\) used by xs3_vect_complex_s32_mul()
[out]	c_shr	Signed arithmetic right-shift for \(\bar c\) used by xs3_vect_complex_s32_mul()
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	c_exp	Exponent associated with input mantissa vector \(\bar c\)
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)
[in]	c_hr	Headroom of input mantissa vector \(\bar c\)

See also: xs3_vect_complex_s32_conj_mul, xs3_vect_complex_s32_mul

◆ xs3_vect_complex_s32_real_mul_prepare()

void xs3_vect_complex_s32_real_mul_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const headroom_t	b_hr,
		const headroom_t	c_hr
	)

Obtain the output exponent and input shifts used by xs3_vect_complex_s32_real_mul().

This function is used in conjunction with xs3_vect_complex_s32_real_mul() to perform a the element-wise multiplication of complex 32-bit BFP vector by a real 32-bit BFP vector.

This function computes a_exp, b_shr and c_shr.

a_exp is the exponent associated with mantissa vector \(\bar a\), and must be chosen to be large enough to avoid overflow when elements of \(\bar a\) are computed. To maximize precision, this function chooses a_exp to be the smallest exponent known to avoid saturation (see exception below). The a_exp chosen by this function is derived from the exponents and headrooms of associated with the input vectors.

b_shr and c_shr are the shift parameters required by xs3_vect_complex_s32_mul() to achieve the chosen output exponent a_exp.

b_exp and c_exp are the exponents associated with the input mantissa vectors \(\bar b\) and \(\bar c\) respectively.

b_hr and c_hr are the headroom of \(\bar b\) and \(\bar c\) respectively. If the headroom of \(\bar b\) or \(\bar c\) is unknown, they can be obtained by calling xs3_vect_complex_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr and c_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori
right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);
right_shift_t new_c_shr = c_shr + (desired_exp - a_exp);

When applying the above adjustment, the following conditions should be maintained:

b_hr + b_shr >= 0
c_hr + c_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr and c_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Notes

Using the outputs of this function, an output mantissa which would otherwise be INT32_MIN will instead saturate to -INT32_MAX. This is due to the symmetric saturation logic employed by the VPU and is a hardware feature. This is a corner case which is usually unlikely and results in 1 LSb of error when it occurs.

Parameters

[out]	a_exp	Output exponent associated with \(\bar a\)
[out]	b_shr	Signed arithmetic right-shift for \(\bar b\) used by xs3_vect_complex_s32_real_mul()
[out]	c_shr	Signed arithmetic right-shift for \(\bar c\) used by xs3_vect_complex_s32_real_mul()
[in]	b_exp	Exponent associated with \(\bar b\)
[in]	c_exp	Exponent associated with \(\bar c\)
[in]	b_hr	Headroom of mantissa vector \(\bar b\)
[in]	c_hr	Headroom of mantissa vector \(\bar c\)

See also: xs3_vect_complex_s32_real_mul

◆ xs3_vect_complex_s32_scale_prepare()

void xs3_vect_complex_s32_scale_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const headroom_t	b_hr,
		const headroom_t	c_hr
	)

Obtain the output exponent and input shifts used by xs3_vect_complex_s32_scale().

This function is used in conjunction with xs3_vect_complex_s32_scale() to perform a complex multiplication of a complex 32-bit BFP vector by a complex 32-bit scalar.

This function computes a_exp, b_shr and c_shr.

a_exp is the exponent associated with mantissa vector \(\bar a\), and must be chosen to be large enough to avoid overflow when elements of \(\bar a\) are computed. To maximize precision, this function chooses a_exp to be the smallest exponent known to avoid saturation (see exception below). The a_exp chosen by this function is derived from the exponents and headrooms associated with the input vectors.

b_shr and c_shr are the shift parameters required by xs3_vect_complex_s32_mul() to achieve the chosen output exponent a_exp.

b_exp and c_exp are the exponents associated with the input mantissa vectors \(\bar b\) and \(\bar c\) respectively.

b_hr and c_hr are the headroom of \(\bar b\) and \(\bar c\) respectively. If the headroom of \(\bar b\) or \(\bar c\) is unknown, they can be obtained by calling xs3_vect_complex_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr and c_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori
right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);
right_shift_t new_c_shr = c_shr + (desired_exp - a_exp);

When applying the above adjustment, the following conditions should be maintained:

b_hr + b_shr >= 0
c_hr + c_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr and c_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Notes

Using the outputs of this function, an output mantissa which would otherwise be INT32_MIN will instead saturate to -INT32_MAX. This is due to the symmetric saturation logic employed by the VPU and is a hardware feature. This is a corner case which is usually unlikely and results in 1 LSb of error when it occurs.

Parameters

[out]	a_exp	Exponent associated with output mantissa vector \(\bar a\)
[out]	b_shr	Signed arithmetic right-shift for \(\bar b\) used by xs3_vect_complex_s32_scale()
[out]	c_shr	Signed arithmetic right-shift for \(\bar c\) used by xs3_vect_complex_s32_scale()
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	c_exp	Exponent associated with input mantissa vector \(\bar c\)
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)
[in]	c_hr	Headroom of input mantissa vector \(\bar c\)

See also: xs3_vect_complex_s32_scale

◆ xs3_vect_complex_s32_squared_mag_prepare()

void xs3_vect_complex_s32_squared_mag_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		const exponent_t	b_exp,
		const headroom_t	b_hr
	)

Obtain the output exponent and input shift used by xs3_vect_complex_s32_squared_mag().

This function is used in conjunction with xs3_vect_complex_s32_squared_mag() to compute the squared magnitude of each element of a complex 32-bit BFP vector.

This function computes a_exp and b_shr.

a_exp is the exponent associated with mantissa vector \(\bar a\), and is be chosen to maximize precision when elements of \(\bar a\) are computed. The a_exp chosen by this function is derived from the exponent and headroom associated with the input vector.

b_shr is the shift parameter required by xs3_vect_complex_s32_mag() to achieve the chosen output exponent a_exp.

b_exp is the exponent associated with the input mantissa vector \(\bar b\).

b_hr is the headroom of \(\bar b\). If the headroom of \(\bar b\) is unknown it can be calculated using xs3_vect_complex_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori

right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);

When applying the above adjustment, the following condition should be maintained:

b_hr + b_shr >= 0

Using larger values than strictly necessary for b_shr may result in unnecessary underflows or loss of precision.

Parameters

[out]	a_exp	Output exponent associated with output mantissa vector \(\bar a\)
[out]	b_shr	Signed arithmetic right-shift for \(\bar b\) used by xs3_vect_complex_s32_squared_mag()
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)

See also: xs3_vect_complex_s32_squared_mag()

◆ xs3_vect_complex_s32_sum_prepare()

void xs3_vect_complex_s32_sum_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		const exponent_t	b_exp,
		const headroom_t	b_hr,
		const unsigned	length
	)

Obtain the output exponent and input shift used by xs3_vect_complex_s32_sum().

This function is used in conjunction with xs3_vect_complex_s32_sum() to compute the sum of elements of a complex 32-bit BFP vector.

This function computes a_exp and b_shr.

a_exp is the exponent associated with the 64-bit mantissa \(a\) returned by xs3_vect_complex_s32_sum(), and must be chosen to be large enough to avoid saturation when \(a\) is computed. To maximize precision, this function chooses a_exp to be the smallest exponent known to avoid saturation (see exception below). The a_exp chosen by this function is derived from the exponents and headrooms associated with the input vector.

b_shr is the shift parameter required by xs3_vect_complex_s32_sum() to achieve the chosen output exponent a_exp.

b_exp is the exponent associated with the input mantissa vector \(\bar b\).

b_hr is the headroom of \(\bar b\). If the headroom of \(\bar b\) is unknown it can be calculated using xs3_vect_complex_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

length is the number of elements in the input mantissa vector \(\bar b\).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori

right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);

When applying the above adjustment, the following conditions should be maintained:

b_hr + b_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Parameters

[out]	a_exp	Exponent associated with output mantissa \(a\)
[out]	b_shr	Signed arithmetic right-shift for \(\bar b\) used by xs3_vect_complex_s32_sum()
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)
[in]	length	Number of elements in \(\bar b\)

See also: xs3_vect_complex_s32_sum

◆ xs3_vect_s32_add_prepare()

void xs3_vect_s32_add_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const headroom_t	b_hr,
		const headroom_t	c_hr
	)

Obtain the output exponent and input shifts to add or subtract two 16- or 32-bit BFP vectors.

The block floating-point functions in this library which add or subtract vectors are of the general form:

\( \bar{a} \cdot 2^{a\_exp} = \bar{b}\cdot 2^{b\_exp} \pm \bar{c}\cdot 2^{c\_exp} \) }

\(\bar b\) and \(\bar c\) are the input mantissa vectors with exponents \(b\_exp\) and \(c\_exp\), which are shared by each element of their respective vectors. \(\bar a\) is the output mantissa vector with exponent \(a\_exp\). Two additional properties, \(b\_hr\) and \(c\_hr\), which are the headroom of mantissa vectors \(\bar b\) and \(\bar c\) respectively, are required by this function.

In order to avoid any overflows in the output mantissas, the output exponent \(a\_exp\) must be chosen such that the largest (in the sense of absolute value) possible output mantissa will fit into the allotted space (e.g. 32 bits for xs3_vect_s32_add()). Once \(a\_exp\) is chosen, the input bit-shifts \(b\_shr\) and \(c\_shr\) are calculated to achieve that resulting exponent.

This function chooses \(a\_exp\) to be the minimum exponent known to avoid overflows, given the input exponents ( \(b\_exp\) and \(c\_exp\)) and input headroom ( \(b\_hr\) and \(c\_hr\)).

This function is used calculate the output exponent and input bit-shifts for each of the following functions:

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr and c_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori
right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);
right_shift_t new_c_shr = c_shr + (desired_exp - a_exp);

When applying the above adjustment, the following conditions should be maintained:

b_hr + b_shr >= 0
c_hr + c_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr and c_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Notes

If \(b\_hr\) or \(c\_hr\) are unknown, they can be calculated using the appropriate headroom function (e.g. xs3_vect_complex_s16_headroom() for complex 16-bit vectors) or the value 0 can always be safely used (but may result in reduced precision).

Parameters

[out]	a_exp	Output exponent associated with output mantissa vector \(\bar a\)
[out]	b_shr	Signed arithmetic right-shift to be applied to elements of \(\bar b\). Used by the function which computes the output mantissas \(\bar a\)
[out]	c_shr	Signed arithmetic right-shift to be applied to elements of \(\bar c\). Used by the function which computes the output mantissas \(\bar a\)
[in]	b_exp	Exponent of BFP vector \(\bar b\)
[in]	c_exp	Exponent of BFP vector \(\bar c\)
[in]	b_hr	Headroom of BFP vector \(\bar b\)
[in]	c_hr	Headroom of BFP vector \(\bar c\)

See also: xs3_vect_s16_add, xs3_vect_s32_add, xs3_vect_s16_sub, xs3_vect_s32_sub, xs3_vect_complex_s16_add, xs3_vect_complex_s32_add, xs3_vect_complex_s16_sub, xs3_vect_complex_s32_sub

◆ xs3_vect_s32_clip_prepare()

void xs3_vect_s32_clip_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		int32_t *	lower_bound,
		int32_t *	upper_bound,
		const exponent_t	b_exp,
		const exponent_t	bound_exp,
		const headroom_t	b_hr
	)

Obtain the output exponent, input shift and modified bounds used by xs3_vect_s32_clip().

This function is used in conjunction with xs3_vect_s32_clip() to bound the elements of a 32-bit BFP vector to a specified range.

This function computes a_exp, b_shr, lower_bound and upper_bound.

a_exp is the exponent associated with the 32-bit mantissa vector \(\bar a\) computed by xs3_vect_s32_clip().

b_shr is the shift parameter required by xs3_vect_s32_clip() to achieve the output exponent a_exp.

lower_bound and upper_bound are the 32-bit mantissas which indicate the lower and upper clipping bounds respectively. The values are modified by this function, and the resulting values should be passed along to xs3_vect_s32_clip().

b_exp is the exponent associated with the input mantissa vector \(\bar b\).

bound_exp is the exponent associated with the bound mantissas lower_bound and upper_bound respectively.

b_hr is the headroom of \(\bar b\). If unknown, it can be obtained using xs3_vect_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Parameters

[out]	a_exp	Exponent associated with output mantissa vector \(\bar a\)
[out]	b_shr	Signed arithmetic right-shift for \(\bar b\) used by xs3_vect_s32_clip()
[in,out]	lower_bound	Lower bound of clipping range
[in,out]	upper_bound	Upper bound of clipping range
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	bound_exp	Exponent associated with clipping bounds `lower_bound` and `upper_bound`
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)

See also: xs3_vect_s32_clip

◆ xs3_vect_s32_dot_prepare()

void xs3_vect_s32_dot_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const headroom_t	b_hr,
		const headroom_t	c_hr,
		const unsigned	length
	)

Obtain the output exponent and input shift used by xs3_vect_s32_dot().

This function is used in conjunction with xs3_vect_s32_dot() to compute the inner product of two 32-bit BFP vectors.

This function computes a_exp, b_shr and c_shr.

a_exp is the exponent associated with the 64-bit mantissa \(a\) returned by xs3_vect_s32_dot(), and must be chosen to be large enough to avoid saturation when \(a\) is computed. To maximize precision, this function chooses a_exp to be the smallest exponent known to avoid saturation (see exception below). The a_exp chosen by this function is derived from the exponents and headrooms associated with the input vectors.

b_shr and c_shr are the shift parameters required by xs3_vect_s32_dot() to achieve the chosen output exponent a_exp.

b_exp and c_exp are the exponents associated with the input mantissa vectors \(\bar b\) and \(\bar c\) respectively.

b_hr and c_hr are the headroom of \(\bar b\) and \(\bar c\) respectively. If either is unknown, they can be obtained using xs3_vect_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

length is the number of elements in the input mantissa vectors \(\bar b\) and \(\bar c\).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr and c_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori
right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);
right_shift_t new_c_shr = c_shr + (desired_exp - a_exp);

When applying the above adjustment, the following conditions should be maintained:

b_hr + b_shr >= 0
c_hr + c_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr or c_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Parameters

[out]	a_exp	Exponent associated with output mantissa \(a\)
[out]	b_shr	Signed arithmetic right-shift for \(\bar b\) used by xs3_vect_s32_dot()
[out]	c_shr	Signed arithmetic right-shift for \(\bar c\) used by xs3_vect_s32_dot()
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	c_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)
[in]	c_hr	Headroom of input mantissa vector \(\bar b\)
[in]	length	Number of elements in vectors \(\bar b\) and \(\bar c\)

See also: xs3_vect_s32_dot

◆ xs3_vect_s32_energy_prepare()

void xs3_vect_s32_energy_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		const unsigned	length,
		const exponent_t	b_exp,
		const headroom_t	b_hr
	)

Obtain the output exponent and input shift used by xs3_vect_s32_energy().

This function is used in conjunction with xs3_vect_s32_energy() to compute the inner product of a 32-bit BFP vector with itself.

This function computes a_exp and b_shr.

a_exp is the exponent associated with the 64-bit mantissa \(a\) returned by xs3_vect_s32_energy(), and must be chosen to be large enough to avoid saturation when \(a\) is computed. To maximize precision, this function chooses a_exp to be the smallest exponent known to avoid saturation (see exception below). The a_exp chosen by this function is derived from the exponent and headroom associated with the input vector.

b_shr is the shift parameter required by xs3_vect_s32_energy() to achieve the chosen output exponent a_exp.

b_exp is the exponent associated with the input mantissa vector \(\bar b\).

b_hr is the headroom of \(\bar b\). If it is unknown, it can be obtained using xs3_vect_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

length is the number of elements in the input mantissa vector \(\bar b\).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori

right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);

When applying the above adjustment, the following condition should be maintained:

b_hr + b_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Parameters

[out]	a_exp	Exponent of outputs of xs3_vect_s32_energy()
[out]	b_shr	Right-shift to be applied to elements of \(\bar b\)
[in]	length	Number of elements in vector \(\bar b\)
[in]	b_exp	Exponent of vector{b}
[in]	b_hr	Headroom of vector{b}

See also: xs3_vect_s32_energy

◆ xs3_vect_s32_inverse_prepare()

void xs3_vect_s32_inverse_prepare	(	exponent_t *	a_exp,
		unsigned *	scale,
		const int32_t	b[],
		const exponent_t	b_exp,
		const unsigned	length
	)

Obtain the output exponent and scale used by xs3_vect_s32_inverse().

This function is used in conjunction with xs3_vect_s32_inverse() to compute the inverse of elements of a 32-bit BFP vector.

This function computes a_exp and scale.

a_exp is the exponent associated with output mantissa vector \(\bar a\), and must be chosen to avoid overflow in the smallest element of the input vector, which when inverted becomes the largest output element. To maximize precision, this function chooses a_exp to be the smallest exponent known to avoid saturation. The a_exp chosen by this function is derived from the exponent and smallest element of the input vector.

scale is a scaling parameter used by xs3_vect_s32_inverse() to achieve the chosen output exponent.

b[] is the input mantissa vector \(\bar b\).

b_exp is the exponent associated with the input mantissa vector \(\bar b\).

length is the number of elements in \(\bar b\).

Todo:: In lib_dsp, the inverse function has a floor, which prevents tiny values from completely dominating the output behavior. Perhaps I should include that?

Parameters

[out]	a_exp	Exponent of output vector \(\bar a\)
[out]	scale	Scale factor to be applied when computing inverse
[in]	b	Input vector \(\bar b\)
[in]	b_exp	Exponent of \(\bar b\)
[in]	length	Number of elements in vector \(\bar b\)

See also: xs3_vect_s32_inverse

◆ xs3_vect_s32_macc_prepare()

void xs3_vect_s32_macc_prepare	(	exponent_t *	new_acc_exp,
		right_shift_t *	acc_shr,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	acc_exp,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const headroom_t	acc_hr,
		const headroom_t	b_hr,
		const headroom_t	c_hr
	)

Obtain the output exponent and shifts needed by xs3_vect_s32_macc().

This function is used in conjunction with xs3_vect_s32_macc() to perform an element-wise multiply-accumlate of 32-bit BFP vectors.

This function computes new_acc_exp, acc_shr, b_shr and c_shr, which are selected to maximize precision in the resulting accumulator vector without causing saturation of final or intermediate values. Normally the caller will pass these outputs to their corresponding inputs of xs3_vect_s32_macc().

acc_exp is the exponent associated with the accumulator mantissa vector \(\bar a\) prior to the operation, whereas new_acc_exp is the exponent corresponding to the updated accumulator vector.

b_exp and c_exp are the exponents associated with the complex input mantissa vectors \(\bar b\) and \(\bar c\) respectively.

acc_hr, b_hr and c_hr are the headrooms of \(\bar a\), \(\bar b\) and \(\bar c\) respectively. If the headroom of any of these vectors is unknown, it can be obtained by calling xs3_vect_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the acc_shr and bc_sat produced by this function can be adjusted according to the following:

// Presumed to be set somewhere
exponent_t acc_exp, b_exp, c_exp;
headroom_t acc_hr, b_hr, c_hr;
exponent_t desired_exp;
 
...
 
// Call prepare
right_shift_t acc_shr, b_shr, c_shr;
xs3_vect_s32_macc_prepare(&acc_exp, &acc_shr, &b_shr, &c_shr, 
                          acc_exp, b_exp, c_exp,
                          acc_hr, b_hr, c_hr);
 
// Modify results
right_shift_t mant_shr = desired_exp - acc_exp;
acc_exp += mant_shr;
acc_shr += mant_shr;
b_shr  += mant_shr;
c_shr  += mant_shr;
 
// acc_shr, b_shr and c_shr may now be used in a call to xs3_vect_s32_macc() 

When applying the above adjustment, the following conditions should be maintained:

acc_shr > -acc_hr (Shifting any further left may cause saturation)
b_shr => -b_hr (Shifting any further left may cause saturation)
c_shr => -c_hr (Shifting any further left may cause saturation)

It is up to the user to ensure any such modification does not result in saturation or unacceptable loss of precision.

Parameters

[out]	new_acc_exp	Exponent associated with output mantissa vector \(\bar a\) (after macc)
[out]	acc_shr	Signed arithmetic right-shift used for \(\bar a\) in xs3_vect_s32_macc()
[out]	b_shr	Signed arithmetic right-shift used for \(\bar b\) in xs3_vect_s32_macc()
[out]	c_shr	Signed arithmetic right-shift used for \(\bar c\) in xs3_vect_s32_macc()
[in]	acc_exp	Exponent associated with input mantissa vector \(\bar a\) (before macc)
[in]	b_exp	Exponent associated with input mantissa vector \(\bar b\)
[in]	c_exp	Exponent associated with input mantissa vector \(\bar c\)
[in]	acc_hr	Headroom of input mantissa vector \(\bar a\) (before macc)
[in]	b_hr	Headroom of input mantissa vector \(\bar b\)
[in]	c_hr	Headroom of input mantissa vector \(\bar c\)

See also: xs3_vect_s32_macc

◆ xs3_vect_s32_mul_prepare()

void xs3_vect_s32_mul_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		right_shift_t *	c_shr,
		const exponent_t	b_exp,
		const exponent_t	c_exp,
		const headroom_t	b_hr,
		const headroom_t	c_hr
	)

Obtain the output exponent and input shifts used by xs3_vect_s32_mul().

This function is used in conjunction with xs3_vect_s32_mul() to perform an element-wise multiplication of two 32-bit BFP vectors.

This function computes a_exp, b_shr, c_shr.

a_exp is the exponent associated with mantissa vector \(\bar a\), and must be chosen to be large enough to avoid overflow when elements of \(\bar a\) are computed. To maximize precision, this function chooses a_exp to be the smallest exponent known to avoid saturation (see exception below). The a_exp chosen by this function is derived from the exponents and headrooms of associated with the input vectors.

b_shr and c_shr are the shift parameters required by xs3_vect_complex_s32_mul() to achieve the chosen output exponent a_exp.

b_exp and c_exp are the exponents associated with the input mantissa vectors \(\bar b\) and \(\bar c\) respectively.

b_hr and c_hr are the headroom of \(\bar b\) and \(\bar c\) respectively. If the headroom of \(\bar b\) or \(\bar c\) is unknown, they can be obtained by calling xs3_vect_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr and c_shr produced by this function can be adjusted according to the following:

exponent_t desired_exp = ...; // Value known a priori
right_shift_t new_b_shr = b_shr + (desired_exp - a_exp);
right_shift_t new_c_shr = c_shr + (desired_exp - a_exp);

When applying the above adjustment, the following conditions should be maintained:

b_hr + b_shr >= 0
c_hr + c_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr and c_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Notes

Using the outputs of this function, an output mantissa which would otherwise be INT32_MIN will instead saturate to -INT32_MAX. This is due to the symmetric saturation logic employed by the VPU and is a hardware feature. This is a corner case which is usually unlikely and results in 1 LSb of error when it occurs.

Parameters

[out]	a_exp	Exponent of output elements of xs3_vect_s32_mul()
[out]	b_shr	Right-shift to be applied to elements of \(\bar b\)
[out]	c_shr	Right-shift to be applied to elemetns of \(\bar c\)
[in]	b_exp	Exponent of \(\bar b\)
[in]	c_exp	Exponent of \(\bar c\)
[in]	b_hr	Headroom of \(\bar b\)
[in]	c_hr	Headroom of \(\bar c\)

See also: xs3_vect_s32_mul

◆ xs3_vect_s32_sqrt_prepare()

void xs3_vect_s32_sqrt_prepare	(	exponent_t *	a_exp,
		right_shift_t *	b_shr,
		const exponent_t	b_exp,
		const right_shift_t	b_hr
	)

Obtain the output exponent and shift parameter used by xs3_vect_s32_sqrt().

This function is used in conjunction withx xs3_vect_s32_sqrt() to compute the square root of elements of a 32-bit BFP vector.

This function computes a_exp and b_shr.

a_exp is the exponent associated with output mantissa vector \(\bar a\), and should be chosen to maximize the precision of the results. To that end, this function chooses a_exp to be the smallest exponent known to avoid saturation of the resulting mantissa vector \(\bar a\). It is derived from the exponent and headroom of the input BFP vector.

b_shr is the shift parameter required by xs3_vect_s32_sqrt() to achieve the chosen output exponent a_exp.

b_exp is the exponent associated with the input mantissa vector \(\bar b\).

b_hr is the headroom of \(\bar b\). If it is unknown, it can be obtained using xs3_vect_s32_headroom(). Alternatively, the value 0 can always be safely used (but may result in reduced precision).

Adjusting Output Exponents

If a specific output exponent desired_exp is needed for the result (e.g. for emulating fixed-point arithmetic), the b_shr produced by this function can be adjusted according to the following:

exponent_t a_exp;
right_shift_t b_shr;
xs3_vect_s16_mul_prepare(&a_exp, &b_shr, b_exp, c_exp, b_hr, c_hr);
exponent_t desired_exp = ...; // Value known a priori
b_shr = b_shr + (desired_exp - a_exp);
a_exp = desired_exp;

When applying the above adjustment, the following condition should be maintained:

b_hr + b_shr >= 0

Be aware that using smaller values than strictly necessary for b_shr can result in saturation, and using larger values may result in unnecessary underflows or loss of precision.

Also, if a larger exponent is used than necessary, a larger depth parameter (see xs3_vect_s32_sqrt()) will be required to achieve the same precision, as the results are computed bit by bit, starting with the most significant bit.

Parameters

[out]	a_exp	Exponent of outputs of xs3_vect_s32_sqrt()
[out]	b_shr	Right-shift to be applied to elements of \(\bar b\)
[in]	b_exp	Exponent of vector{b}
[in]	b_hr	Headroom of vector{b}

See also: xs3_vect_s32_sqrt

Macros

Functions

Detailed Description

Macro Definition Documentation

◆ xs3_vect_complex_s32_add_prepare

◆ xs3_vect_complex_s32_add_scalar_prepare

◆ xs3_vect_complex_s32_conj_macc_prepare

◆ xs3_vect_complex_s32_conj_mul_prepare

◆ xs3_vect_complex_s32_conj_nmacc_prepare

◆ xs3_vect_complex_s32_nmacc_prepare

◆ xs3_vect_complex_s32_real_scale_prepare

◆ xs3_vect_complex_s32_sub_prepare

◆ xs3_vect_s32_add_scalar_prepare

◆ xs3_vect_s32_nmacc_prepare

◆ xs3_vect_s32_scale_prepare

◆ xs3_vect_s32_sub_prepare

Function Documentation

◆ xs3_vect_2vec_prepare()

◆ xs3_vect_complex_s32_macc_prepare()

◆ xs3_vect_complex_s32_mag_prepare()

◆ xs3_vect_complex_s32_mul_prepare()

◆ xs3_vect_complex_s32_real_mul_prepare()

◆ xs3_vect_complex_s32_scale_prepare()

◆ xs3_vect_complex_s32_squared_mag_prepare()

◆ xs3_vect_complex_s32_sum_prepare()

◆ xs3_vect_s32_add_prepare()

◆ xs3_vect_s32_clip_prepare()

◆ xs3_vect_s32_dot_prepare()

◆ xs3_vect_s32_energy_prepare()

◆ xs3_vect_s32_inverse_prepare()

◆ xs3_vect_s32_macc_prepare()

◆ xs3_vect_s32_mul_prepare()

◆ xs3_vect_s32_sqrt_prepare()