XCORE SDK
XCORE Software Development Kit
xs3_vect_s32.h
1 // Copyright 2020-2021 XMOS LIMITED.
2 // This Software is subject to the terms of the XMOS Public Licence: Version 1.
3 
4 #pragma once
5 
6 #include "xs3_math_conf.h"
7 #include "xs3_math_types.h"
8 #include "xs3_util.h"
9 
10 #ifdef __XC__
11 extern "C" {
12 #endif
13 
14 
32 #define XS3_VECT_SQRT_S32_MAX_DEPTH (31)
33 
34 
65 C_API
67  int32_t a[],
68  const int32_t b[],
69  const unsigned length);
70 
71 
102 C_API
104  int32_t a[],
105  const int32_t b[],
106  const unsigned length);
107 
108 
153 C_API
154 int64_t xs3_vect_s32_abs_sum(
155  const int32_t b[],
156  const unsigned length);
157 
158 
208 C_API
210  int32_t a[],
211  const int32_t b[],
212  const int32_t c[],
213  const unsigned length,
214  const right_shift_t b_shr,
215  const right_shift_t c_shr);
216 
217 
298 C_API
300  exponent_t* a_exp,
301  right_shift_t* b_shr,
302  right_shift_t* c_shr,
303  const exponent_t b_exp,
304  const exponent_t c_exp,
305  const headroom_t b_hr,
306  const headroom_t c_hr);
307 
308 
364 C_API
366  int32_t a[],
367  const int32_t b[],
368  const int32_t c,
369  const unsigned length,
370  const right_shift_t b_shr);
371 
372 
385 #define xs3_vect_s32_add_scalar_prepare xs3_vect_s32_add_prepare
386 
387 
408 C_API
409 unsigned xs3_vect_s32_argmax(
410  const int32_t b[],
411  const unsigned length);
412 
413 
436 C_API
437 unsigned xs3_vect_s32_argmin(
438  const int32_t b[],
439  const unsigned length);
440 
441 
488 C_API
490  int32_t a[],
491  const int32_t b[],
492  const unsigned length,
493  const int32_t lower_bound,
494  const int32_t upper_bound,
495  const right_shift_t b_shr);
496 
532 C_API
534  exponent_t* a_exp,
535  right_shift_t* b_shr,
536  int32_t* lower_bound,
537  int32_t* upper_bound,
538  const exponent_t b_exp,
539  const exponent_t bound_exp,
540  const headroom_t b_hr);
541 
542 
613 C_API
614 int64_t xs3_vect_s32_dot(
615  const int32_t b[],
616  const int32_t c[],
617  const unsigned length,
618  const right_shift_t b_shr,
619  const right_shift_t c_shr);
620 
621 
678 C_API
680  exponent_t* a_exp,
681  right_shift_t* b_shr,
682  right_shift_t* c_shr,
683  const exponent_t b_exp,
684  const exponent_t c_exp,
685  const headroom_t b_hr,
686  const headroom_t c_hr,
687  const unsigned length);
688 
689 
749 C_API
750 int64_t xs3_vect_s32_energy(
751  const int32_t b[],
752  const unsigned length,
753  const right_shift_t b_shr);
754 
755 
805 C_API
807  exponent_t* a_exp,
808  right_shift_t* b_shr,
809  const unsigned length,
810  const exponent_t b_exp,
811  const headroom_t b_hr);
812 
813 
846 C_API
848  const int32_t x[],
849  const unsigned length);
850 
851 
889 C_API
891  int32_t a[],
892  const int32_t b[],
893  const unsigned length,
894  const unsigned scale);
895 
896 
931 C_API
933  exponent_t* a_exp,
934  unsigned* scale,
935  const int32_t b[],
936  const exponent_t b_exp,
937  const unsigned length);
938 
939 
969 C_API
970 int32_t xs3_vect_s32_max(
971  const int32_t b[],
972  const unsigned length);
973 
974 
1026 C_API
1028  int32_t a[],
1029  const int32_t b[],
1030  const int32_t c[],
1031  const unsigned length,
1032  const right_shift_t b_shr,
1033  const right_shift_t c_shr);
1034 
1035 
1063 C_API
1064 int32_t xs3_vect_s32_min(
1065  const int32_t b[],
1066  const unsigned length);
1067 
1068 
1120 C_API
1122  int32_t a[],
1123  const int32_t b[],
1124  const int32_t c[],
1125  const unsigned length,
1126  const right_shift_t b_shr,
1127  const right_shift_t c_shr);
1128 
1129 
1174 C_API
1176  int32_t a[],
1177  const int32_t b[],
1178  const int32_t c[],
1179  const unsigned length,
1180  const right_shift_t b_shr,
1181  const right_shift_t c_shr);
1183 
1184 
1243 C_API
1245  int32_t acc[],
1246  const int32_t b[],
1247  const int32_t c[],
1248  const unsigned length,
1249  const right_shift_t acc_shr,
1250  const right_shift_t b_shr,
1251  const right_shift_t c_shr);
1252 
1253 
1313 C_API
1315  int32_t acc[],
1316  const int32_t b[],
1317  const int32_t c[],
1318  const unsigned length,
1319  const right_shift_t acc_shr,
1320  const right_shift_t b_shr,
1321  const right_shift_t c_shr);
1322 
1323 
1402 C_API
1404  exponent_t* new_acc_exp,
1405  right_shift_t* acc_shr,
1406  right_shift_t* b_shr,
1407  right_shift_t* c_shr,
1408  const exponent_t acc_exp,
1409  const exponent_t b_exp,
1410  const exponent_t c_exp,
1411  const headroom_t acc_hr,
1412  const headroom_t b_hr,
1413  const headroom_t c_hr);
1414 
1415 
1428 #define xs3_vect_s32_nmacc_prepare xs3_vect_s32_macc_prepare
1429 
1430 
1494 C_API
1496  exponent_t* a_exp,
1497  right_shift_t* b_shr,
1498  right_shift_t* c_shr,
1499  const exponent_t b_exp,
1500  const exponent_t c_exp,
1501  const headroom_t b_hr,
1502  const headroom_t c_hr);
1503 
1504 
1539 C_API
1541  int32_t a[],
1542  const int32_t b[],
1543  const unsigned length);
1544 
1545 
1591 C_API
1593  int32_t a[],
1594  const int32_t b[],
1595  const unsigned length,
1596  const int32_t c,
1597  const right_shift_t b_shr,
1598  const right_shift_t c_shr);
1599 
1600 
1613 #define xs3_vect_s32_scale_prepare xs3_vect_s32_mul_prepare
1614 
1615 
1616 
1644 C_API
1645 void xs3_vect_s32_set(
1646  int32_t a[],
1647  const int32_t b,
1648  const unsigned length);
1649 
1650 
1685 C_API
1687  int32_t a[],
1688  const int32_t b[],
1689  const unsigned length,
1690  const left_shift_t b_shl);
1691 
1692 
1727 C_API
1729  int32_t a[],
1730  const int32_t b[],
1731  const unsigned length,
1732  const right_shift_t b_shr);
1733 
1734 
1781 C_API
1783  int32_t a[],
1784  const int32_t b[],
1785  const unsigned length,
1786  const right_shift_t b_shr,
1787  const unsigned depth);
1788 
1789 
1844 C_API
1846  exponent_t* a_exp,
1847  right_shift_t* b_shr,
1848  const exponent_t b_exp,
1849  const right_shift_t b_hr);
1850 
1851 
1902 C_API
1904  int32_t a[],
1905  const int32_t b[],
1906  const int32_t c[],
1907  const unsigned length,
1908  const right_shift_t b_shr,
1909  const right_shift_t c_shr);
1910 
1911 
1924 #define xs3_vect_s32_sub_prepare xs3_vect_s32_add_prepare
1925 
1973 C_API
1974 int64_t xs3_vect_s32_sum(
1975  const int32_t b[],
1976  const unsigned length);
1977 
1978 
2028 C_API
2029 void xs3_vect_s32_zip(
2030  complex_s32_t a[],
2031  const int32_t b[],
2032  const int32_t c[],
2033  const unsigned length,
2034  const right_shift_t b_shr,
2035  const right_shift_t c_shr);
2036 
2037 
2072 C_API
2073 void xs3_vect_s32_unzip(
2074  int32_t a[],
2075  int32_t b[],
2076  const complex_s32_t c[],
2077  const unsigned length);
2078 
2079 
2133 C_API
2135  int32_t y[],
2136  const int32_t x[],
2137  const int32_t b_q30[],
2138  const unsigned x_length,
2139  const unsigned b_length);
2140 
2141 
2149 typedef enum {
2169  PAD_MODE_REFLECT = (INT32_MAX-0),
2170 
2190  PAD_MODE_EXTEND = (INT32_MAX-1),
2191 
2212 } pad_mode_e;
2213 
2214 
2274 C_API
2276  int32_t y[],
2277  const int32_t x[],
2278  const int32_t b_q30[],
2279  const unsigned x_length,
2280  const unsigned b_length,
2281  const pad_mode_e padding_mode);
2282 
2283 
2306 C_API
2308  int32_t a[],
2309  const xs3_split_acc_s32_t b[],
2310  const unsigned length);
2311 
2312 
2334 C_API
2336  xs3_split_acc_s32_t a[],
2337  const int32_t b[],
2338  const unsigned length);
2339 
2435 C_API
2437  exponent_t* a_exp,
2438  right_shift_t* b_shr,
2439  right_shift_t* c_shr,
2440  const exponent_t b_exp,
2441  const exponent_t c_exp,
2442  const headroom_t b_hr,
2443  const headroom_t c_hr,
2444  const headroom_t extra_operand_hr);
2445 
2446 
2447 
2448 #ifdef __XC__
2449 } //extern "C"
2450 #endif
2451 
int exponent_t
An exponent.
Definition: xs3_math_types.h:76
int left_shift_t
A leftwards arithmetic bit-shift.
Definition: xs3_math_types.h:110
int right_shift_t
A rightwards arithmetic bit-shift.
Definition: xs3_math_types.h:98
unsigned headroom_t
Headroom of some integer or integer array.
Definition: xs3_math_types.h:86
void xs3_vect_s32_unzip(int32_t a[], int32_t b[], const complex_s32_t c[], const unsigned length)
Deinterleave the real and imaginary parts of a complex 32-bit vector into two separate vectors.
Definition: xs3_vect_zip.c:30
void xs3_vect_s32_zip(complex_s32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
Interleave the elements of two vectors into a single vector.
Definition: xs3_vect_zip.c:13
headroom_t xs3_vect_s32_shl(int32_t a[], const int32_t b[], const unsigned length, const left_shift_t b_shl)
Left-shift the elements of a 32-bit vector by a specified number of bits.
Definition: xs3_vect_shl.c:28
headroom_t xs3_vect_s32_macc(int32_t acc[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t b_shr, const right_shift_t c_shr)
[xs3_vect_s32_mul]
Definition: xs3_vect_macc.c:54
headroom_t xs3_vect_s32_clip(int32_t a[], const int32_t b[], const unsigned length, const int32_t lower_bound, const int32_t upper_bound, const right_shift_t b_shr)
Clamp the elements of a 32-bit vector to a specified range.
Definition: xs3_vect_abs_clip_rect.c:60
headroom_t xs3_vect_s32_scale(int32_t a[], const int32_t b[], const unsigned length, const int32_t c, const right_shift_t b_shr, const right_shift_t c_shr)
Multiply a 32-bit vector by a scalar.
Definition: xs3_vect_mul.c:71
void xs3_vect_s32_split_accs(xs3_split_acc_s32_t a[], const int32_t b[], const unsigned length)
Split a vector of int32_t's into a vector of xs3_split_acc_s32_t.
Definition: xs3_misc.c:41
headroom_t xs3_vect_s32_nmacc(int32_t acc[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t acc_shr, const right_shift_t b_shr, const right_shift_t c_shr)
Multiply one 32-bit vector element-wise by another, and subtract the result from an accumulator.
Definition: xs3_vect_macc.c:75
headroom_t xs3_vect_s32_headroom(const int32_t x[], const unsigned length)
Calculate the headroom of a 32-bit vector.
Definition: xs3_vect_headroom.c:54
headroom_t xs3_vect_s32_inverse(int32_t a[], const int32_t b[], const unsigned length, const unsigned scale)
Compute the inverse of elements of a 32-bit vector.
Definition: xs3_vect_inverse.c:32
headroom_t xs3_vect_s32_add_scalar(int32_t a[], const int32_t b[], const int32_t c, const unsigned length, const right_shift_t b_shr)
Add a scalar to a 32-bit vector.
Definition: xs3_vect.c:206
headroom_t xs3_vect_s32_mul(int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
Multiply one 32-bit vector element-wise by another.
Definition: xs3_vect_mul.c:34
int32_t xs3_vect_s32_max(const int32_t b[], const unsigned length)
Find the maximum value in a 32-bit vector.
Definition: xs3_vect_stats.c:42
headroom_t xs3_vect_s32_rect(int32_t a[], const int32_t b[], const unsigned length)
Rectify the elements of a 32-bit vector.
Definition: xs3_vect_abs_clip_rect.c:91
int64_t xs3_vect_s32_abs_sum(const int32_t b[], const unsigned length)
Compute the sum of the absolute values of elements of a 32-bit vector.
Definition: xs3_vect_stats.c:196
headroom_t xs3_vect_s32_shr(int32_t a[], const int32_t b[], const unsigned length, const right_shift_t b_shr)
Right-shift the elements of a 32-bit vector by a specified number of bits.
Definition: xs3_vect.c:194
int64_t xs3_vect_s32_dot(const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
Compute the inner product between two 32-bit vectors.
Definition: xs3_vect_dot.c:35
headroom_t xs3_vect_s32_sqrt(int32_t a[], const int32_t b[], const unsigned length, const right_shift_t b_shr, const unsigned depth)
Compute the square root of elements of a 32-bit vector.
Definition: xs3_vect_sqrt.c:53
headroom_t xs3_vect_s32_abs(int32_t a[], const int32_t b[], const unsigned length)
Compute the element-wise absolute value of a 32-bit vector.
Definition: xs3_vect_abs_clip_rect.c:27
headroom_t xs3_vect_s32_sub(int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
Subtract one 32-bit vector from another.
Definition: xs3_vect_add_sub.c:74
unsigned xs3_vect_s32_argmin(const int32_t b[], const unsigned length)
Obtain the array index of the minimum element of a 32-bit vector.
Definition: xs3_vect_stats.c:161
headroom_t xs3_vect_s32_convolve_same(int32_t y[], const int32_t x[], const int32_t b_q30[], const unsigned x_length, const unsigned b_length, const pad_mode_e padding_mode)
Convolve a 32-bit vector with a short kernel.
Definition: xs3_convolve.c:39
int64_t xs3_vect_s32_energy(const int32_t b[], const unsigned length, const right_shift_t b_shr)
Calculate the energy (sum of squares of elements) of a 32-bit vector.
Definition: xs3_vect_stats.c:237
void xs3_vect_s32_set(int32_t a[], const int32_t b, const unsigned length)
Set all elements of a 32-bit vector to the specified value.
Definition: xs3_vect_set.c:24
int32_t xs3_vect_s32_min(const int32_t b[], const unsigned length)
Find the minimum value in a 32-bit vector.
Definition: xs3_vect_stats.c:97
headroom_t xs3_vect_s32_copy(int32_t a[], const int32_t b[], const unsigned length)
Copy one 32-bit vector to another.
Definition: xs3_vect_copy.c:24
headroom_t xs3_vect_s32_max_elementwise(int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
Get the element-wise maximum of two 32-bit vectors.
Definition: xs3_vect_stats.c:54
headroom_t xs3_vect_s32_min_elementwise(int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
Get the element-wise minimum of two 32-bit vectors.
Definition: xs3_vect_stats.c:109
headroom_t xs3_vect_s32_add(int32_t a[], const int32_t b[], const int32_t c[], const unsigned length, const right_shift_t b_shr, const right_shift_t c_shr)
Add together two 32-bit vectors.
Definition: xs3_vect_add_sub.c:32
headroom_t xs3_vect_s32_convolve_valid(int32_t y[], const int32_t x[], const int32_t b_q30[], const unsigned x_length, const unsigned b_length)
Convolve a 32-bit vector with a short kernel.
Definition: xs3_vect_convolve.c:14
void xs3_vect_s32_merge_accs(int32_t a[], const xs3_split_acc_s32_t b[], const unsigned length)
Merge a vector of split 32-bit accumulators into a vector of int32_t's.
Definition: xs3_misc.c:15
pad_mode_e
Supported padding modes for convolutions in "same" mode.
Definition: xs3_vect_s32.h:2149
int64_t xs3_vect_s32_sum(const int32_t b[], const unsigned length)
Sum the elements of a 32-bit vector.
Definition: xs3_vect_sum.c:29
@ PAD_MODE_REFLECT
Definition: xs3_vect_s32.h:2169
@ PAD_MODE_ZERO
Definition: xs3_vect_s32.h:2211
@ PAD_MODE_EXTEND
Definition: xs3_vect_s32.h:2190
void xs3_vect_s32_energy_prepare(exponent_t *a_exp, right_shift_t *b_shr, const unsigned length, const exponent_t b_exp, const headroom_t b_hr)
Obtain the output exponent and input shift used by xs3_vect_s32_energy().
Definition: xs3_prepare.c:587
void xs3_vect_s32_add_prepare(exponent_t *a_exp, right_shift_t *b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr)
Obtain the output exponent and input shifts to add or subtract two 16- or 32-bit BFP vectors.
Definition: xs3_prepare.c:415
void xs3_vect_s32_dot_prepare(exponent_t *a_exp, right_shift_t *b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr, const unsigned length)
Obtain the output exponent and input shift used by xs3_vect_s32_dot().
Definition: xs3_prepare.c:439
void xs3_vect_s32_mul_prepare(exponent_t *a_exp, right_shift_t *b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr)
Obtain the output exponent and input shifts used by xs3_vect_s32_mul().
Definition: xs3_prepare.c:205
void xs3_vect_s32_inverse_prepare(exponent_t *a_exp, unsigned *scale, const int32_t b[], const exponent_t b_exp, const unsigned length)
Obtain the output exponent and scale used by xs3_vect_s32_inverse().
Definition: xs3_prepare.c:536
void xs3_vect_s32_macc_prepare(exponent_t *new_acc_exp, right_shift_t *acc_shr, right_shift_t *b_shr, right_shift_t *c_shr, const exponent_t acc_exp, const exponent_t b_exp, const exponent_t c_exp, const headroom_t acc_hr, const headroom_t b_hr, const headroom_t c_hr)
Obtain the output exponent and shifts needed by xs3_vect_s32_macc().
Definition: xs3_prepare.c:152
void xs3_vect_s32_sqrt_prepare(exponent_t *a_exp, right_shift_t *b_shr, const exponent_t b_exp, const right_shift_t b_hr)
Obtain the output exponent and shift parameter used by xs3_vect_s32_sqrt().
Definition: xs3_prepare.c:512
void xs3_vect_2vec_prepare(exponent_t *a_exp, right_shift_t *b_shr, right_shift_t *c_shr, const exponent_t b_exp, const exponent_t c_exp, const headroom_t b_hr, const headroom_t c_hr, const headroom_t extra_operand_hr)
Obtain the output exponent and input shifts required to perform a binary add-like operation.
Definition: xs3_prepare.c:709
void xs3_vect_s32_clip_prepare(exponent_t *a_exp, right_shift_t *b_shr, int32_t *lower_bound, int32_t *upper_bound, const exponent_t b_exp, const exponent_t bound_exp, const headroom_t b_hr)
Obtain the output exponent, input shift and modified bounds used by xs3_vect_s32_clip().
Definition: xs3_prepare.c:651
A complex number with a 32-bit real part and 32-bit imaginary part.
Definition: xs3_math_types.h:49
Holds a set of sixteen 32-bit accumulators in the XS3 VPU's internal format.
Definition: xs3_math_types.h:429