// Copyright (c) 2015, XMOS Ltd, All rights reserved
/* WARNING - This is an autogenerated file - it is recommended to edit
 * the templates that it was edited from. Please see the /gen directory
 * of this module for details
 */

/* 
 * This function handles UART port RX (Autogenerated for unrolling)
 */

#ifdef __multi_uart_rx_conf_h_exists__
#include "multi_uart_rx_conf.h"
#else
#define MUART_RX_CHAN_COUNT  8
#endif

#define STACK_SIZE  12
#define CHAN_STRUCT_SIZE (7*4)
// TODO this needs to come from the configuration header
#define CHAN_COUNT MUART_RX_CHAN_COUNT

/* function prototype:
 * extern "C" {
 *  void uart_rx_loop_8(in buffered port:32 pUart, e_uart_rx_chan_state state[],
 *                      int tick_count[], int bit_count[], int uart_word[],
 *                      streaming chanend cUART, unsigned rx_char_slots[],
 *                      unsigned fourBitConfig[],
 *                      multi_uart_rx_info_t rx_slot_info[],
 *                      unsigned startBitLookup[]);
 * }
 */
 
.globl uart_rx_loop_8.nstackwords
.globl uart_rx_loop_8.maxthreads
.globl uart_rx_loop_8.maxtimers
.globl uart_rx_loop_8.maxchanends

.linkset uart_rx_loop_8.nstackwords, STACK_SIZE
.linkset uart_rx_loop_8.maxchanends, 0
.linkset uart_rx_loop_8.maxtimers, 0
.linkset uart_rx_loop_8.maxthreads, 0

.globl uart_rx_loop_8

.text
.cc_top uart_rx_loop_8.func, uart_rx_loop_8

uart_rx_loop_8:
    entsp STACK_SIZE

    /* stack usage:
     * sp[1]  => callee save r4
     * sp[2]  => callee save r5
     * sp[3]  => callee save r6
     * sp[4]  => callee save r7
     * sp[5]  => callee save r8
     * sp[6]  => callee save r9
     * sp[7]  => callee save r10
     * sp[8]  => state pointer
     * sp[9]  => tick_count pointer
     * sp[10] => bit_count pointer
     * sp[11] => potential start indicator
     * sp[STACK_SIZE+1] => uart_word pointer
     * sp[STACK_SIZE+2] => cUART channel
     * sp[STACK_SIZE+3] => rx_char buffer pointer
     * sp[STACK_SIZE+4] => fourBit config table
     * sp[STACK_SIZE+5] => uart rx info table
     * sp[STACK_SIZE+6] => startBit lookup table
     */
#define UART_WORD_POINTER_ARG 1
#define CUART_ARG   2
#define RX_CHAR_POS_ARG  3
#define FOURBIT_CONFIG_ARG 4
#define UART_RX_INFO_ARG 5
#define START_BIT_CONFIG_ARG 6
    /* callee save */
	stw r4, sp[1]	
	stw r5, sp[2]
	stw r6, sp[3]
	stw r7, sp[4]
	stw r8, sp[5]
	stw r9, sp[6]
	stw r10, sp[7]
	
	/* register usage 
	 * r0  => port
	 * r1  => state pointer
	 * r2  => tick_count pointer
	 * r3  => bit_count pointer
	 * r4  => uart_word pointer
	 * r5  => channel id
	 * r6  => port_val
	 * r7  => mask
	 * r8  => fourBit / bit
	 * r9  => scratch
	 * r10 => scratch
	 * r11 => scratch / uart_tx_channel access
	 */
	
	/* clear any events */
	clre
	
	/* set mask */
	ldc r11, 0x0101
	shl r7, r11, 16
	or r7, r7, r11
	
	/* load uart_word pointer */
        ldw r4, sp[STACK_SIZE+UART_WORD_POINTER_ARG]
	
	/* store root pointers */
	stw r1, sp[8]
	stw r2, sp[9]
	stw r3, sp[10]
	
	/* setup pause target */
	ldap r11, finish_uart_rx
        ldw r10, sp[STACK_SIZE+CUART_ARG]
	setv res[r10], r11
	eeu res[r10]
	
rx_bit_ep:
    /* check for pause command on the channel */
    setsr 0x1
    clrsr 0x1
    
    /* get data */
	in r6,res[r0]
	
	
#undef process_loop	
#undef idle_process
#undef data_process
#undef invalid_start
#undef immediate_start_bit
#undef dp_calc_tick
#undef next_channel
#undef channel_num
#define channel_num 0
#define process_loop process_loop_0
#define idle_process idle_process_0
#define data_process data_process_0
#define immediate_start_bit immediate_start_bit_0
#define invalid_start invalid_start_0
#define dp_calc_tick dp_calc_tick_0
#if CHAN_COUNT == 1
#define next_channel rx_bit_ep
#else
#define next_channel process_loop_1
#endif

process_loop:                                   
    /* initialise channel count */              
	ldc r5, channel_num
	
	/* get fourBits */                          
	and r8, r6, r7 // mask off bits             
    mkmsk r11, 4                                
    crc32 r8, r11, r11 // compress bits         

    ldw r11, sp[STACK_SIZE+FOURBIT_CONFIG_ARG] // do lookup
    ldw r11, r11[r5]
    ldw r8, r11[r8] // get fourBits             

	/* check if state is idle (0x0) */
	ldw r11, r1[r5]
	bf r11, idle_process
    
    /* check if tickcount is  =< 4 (same as < 5) */
	ldc r11, 5
	ldw r10, r2[r5]
	lss r11, r10, r11
	bt r11, data_process // WARN - we assume here that we only have two states, adding more states is costly

	/* decerement tick_count */
	sub r10, r10, 4 
	stw r10, r2[r5]
	
	/* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
	
idle_process:
    /* This state detects the start of a start bit by looking for a change in state in the line
     * sampling points are then calculated from this point
     */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* lookup TC value and check for valid start */
    ldw r10, sp[STACK_SIZE + START_BIT_CONFIG_ARG]
    ldw r10, r10[r5] // get address for the startBitLookup we will use
    
    ldc r11, 0xf
    and r8, r8, r11
    ldw r10, r10[r8]
    /* check if we have invalid SB */
    mkmsk r8, 32
    eq r8, r8, r10
    bt r8, invalid_start
    
    /* store new state - at this point r8 == 1*/
    ldc r8, 1
    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
invalid_start:    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
    
data_process:
    /* get potential start bit indicator */
    shr r9, r8, 4 	    // shift fourBit right 4 & store in r9
    stw r9, sp[11]      // store r9 in "potential start indicator" on stack
    
    /* get bit from fourBits - tick_count still in r10 */
    sub r10, r10, 1     // decrement tick_count
    shr r8, r8, r10 	// shift right
    ldc r9, 1
    and r8, r8, r9 		// now got the bit
    
    /* store the bit in uart_word */
    ldw r11, r4[r5]
    shl r11, r11, r9
    or r11, r11, r8
    stw r11, r4[r5]
    
    /* decrement bit_count */
    ldw r10, r3[r5]
    sub r10, r10, r9
    stw r10, r3[r5]
    
check_bit_count_0:
    /* if bit_count == 0 then change state and get new tick count else calculate new tick count */
    bt r10, dp_calc_tick
store_data_0:
    /* store rx'd data in memory - means we don't block */
    ldw r9, sp[STACK_SIZE+RX_CHAR_POS_ARG]
    stw r11, r9[r5] // write data into array
    
    /* send data word out */
    ldw r9, sp[STACK_SIZE+CUART_ARG]
    outt res[r9], r5 // using outt means we won't block for up to 8 tokens
    
    /* check flag for a start bit in this set of samples */
    ldw r10, sp[11]
    bt r10, immediate_start_bit
    
    // set state to idle - r10 is 0
    stw r10, r1[r5]
    
    /* shift port_val and advance to next channel */
    shr r6, r6, 1
    bu next_channel

immediate_start_bit:
    /* this handles a start bit transition detected in the stop bit sample */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE + UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* get number of bits swallowed */
    ldw r10, sp[11]
    
    /* store new state*/
    // state doesn't change ?!
//    ldc r8, 1
//    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
dp_calc_tick:
    
    // get struct pointer for UART channel
    ldw r11, sp[STACK_SIZE + UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r9, r5, r9
    add r11, r11, r9
    
    /* load tick_count[i] */
    ldw r10, r2[r5]
    
    /* get clocks_per_bit, and correct for bits we already swallowed */
    ldw r11, r11[2]
    // new tick_count = cpb - (4-tc) = cpb - 4 + tc
    sub r11, r11, 4
    add r11, r11, r10
    stw r11, r2[r5]
    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    
    #if CHAN_COUNT == 1
    bu rx_bit_ep
    #endif
#undef process_loop	
#undef idle_process
#undef data_process
#undef invalid_start
#undef immediate_start_bit
#undef dp_calc_tick
#undef next_channel
#undef channel_num
#define channel_num 1
#define process_loop process_loop_1
#define idle_process idle_process_1
#define data_process data_process_1
#define immediate_start_bit immediate_start_bit_1
#define invalid_start invalid_start_1
#define dp_calc_tick dp_calc_tick_1
#if CHAN_COUNT == 2
#define next_channel rx_bit_ep
#else
#define next_channel process_loop_2
#endif

process_loop:                                   
    /* initialise channel count */              
	ldc r5, channel_num
	
	/* get fourBits */                          
	and r8, r6, r7 // mask off bits             
    mkmsk r11, 4                                
    crc32 r8, r11, r11 // compress bits         

    ldw r11, sp[STACK_SIZE+FOURBIT_CONFIG_ARG] // do lookup
    ldw r11, r11[r5]
    ldw r8, r11[r8] // get fourBits             

	/* check if state is idle (0x0) */
	ldw r11, r1[r5]
	bf r11, idle_process
    
    /* check if tickcount is  =< 4 (same as < 5) */
	ldc r11, 5
	ldw r10, r2[r5]
	lss r11, r10, r11
	bt r11, data_process // WARN - we assume here that we only have two states, adding more states is costly

	/* decerement tick_count */
	sub r10, r10, 4 
	stw r10, r2[r5]
	
	/* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
	
idle_process:
    /* This state detects the start of a start bit by looking for a change in state in the line
     * sampling points are then calculated from this point
     */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* lookup TC value and check for valid start */
    ldw r10, sp[STACK_SIZE+START_BIT_CONFIG_ARG]
    ldw r10, r10[r5] // get address for the startBitLookup we will use
    
    ldc r11, 0xf
    and r8, r8, r11
    ldw r10, r10[r8]
    /* check if we have invalid SB */
    mkmsk r8, 32
    eq r8, r8, r10
    bt r8, invalid_start
    
    /* store new state - at this point r8 == 1*/
    ldc r8, 1
    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
invalid_start:    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
    
data_process:
    /* get potential start bit indicator */
    shr r9, r8, 4
    stw r9, sp[11]
    
    /* get bit from fourBits - tick_count still in r10 */
    sub r10, r10, 1
    shr r8, r8, r10
    ldc r9, 1
    and r8, r8, r9 // now got the bit
    
    /* store the bit in uart_word */
    ldw r11, r4[r5]
    shl r11, r11, r9
    or r11, r11, r8
    stw r11, r4[r5]
    
    /* decrement bit_count */
    ldw r10, r3[r5]
    sub r10, r10, r9
    stw r10, r3[r5]
    
    /* if bit_count == 0 then change state and get new tick count else calculate new tick count */
    bt r10, dp_calc_tick
    
    /* store rx'd data in memory - means we don't block */
    ldw r9, sp[STACK_SIZE+RX_CHAR_POS_ARG]
    stw r11, r9[r5] // write data into array
    
    /* send data word out */
    ldw r9, sp[STACK_SIZE+CUART_ARG]
    outt res[r9], r5 // using outt means we won't block for up to 8 tokens
    
    /* check flag for a start bit in this set of samples */
    ldw r10, sp[11]    
    bt r10, immediate_start_bit
    
    // set state to idle - r10 is 0
    stw r10, r1[r5] 
    
    /* shift port_val and advance to next channel */
    shr r6, r6, 1
    bu next_channel

immediate_start_bit:
    /* this handles a start bit transition detected in the stop bit sample */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* get number of bits swallowed */
    ldw r10, sp[11]
    
    /* store new state*/
    // state doesn't change ?!
//    ldc r8, 1
//    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
dp_calc_tick:
    
    // get struct pointer for UART channel
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r9, r5, r9
    add r11, r11, r9
    
    /* load tick_count[i] */
    ldw r10, r2[r5]
    
    /* get clocks_per_bit, and correct for bits we already swallowed */
    ldw r11, r11[2]
    // new tick_count = cpb - (4-tc) = cpb - 4 + tc
    sub r11, r11, 4
    add r11, r11, r10
    stw r11, r2[r5]
    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    
    #if CHAN_COUNT == 2
    bu rx_bit_ep
    #endif
#undef process_loop	
#undef idle_process
#undef data_process
#undef invalid_start
#undef immediate_start_bit
#undef dp_calc_tick
#undef next_channel
#undef channel_num
#define channel_num 2
#define process_loop process_loop_2
#define idle_process idle_process_2
#define data_process data_process_2
#define immediate_start_bit immediate_start_bit_2
#define invalid_start invalid_start_2
#define dp_calc_tick dp_calc_tick_2
#if CHAN_COUNT == 3
#define next_channel rx_bit_ep
#else
#define next_channel process_loop_3
#endif

process_loop:                                   
    /* initialise channel count */              
	ldc r5, channel_num
	
	/* get fourBits */                          
	and r8, r6, r7 // mask off bits             
    mkmsk r11, 4                                
    crc32 r8, r11, r11 // compress bits         

    ldw r11, sp[STACK_SIZE+FOURBIT_CONFIG_ARG] // do lookup
    ldw r11, r11[r5]
    ldw r8, r11[r8] // get fourBits             

	/* check if state is idle (0x0) */
	ldw r11, r1[r5]
	bf r11, idle_process
    
    /* check if tickcount is  =< 4 (same as < 5) */
	ldc r11, 5
	ldw r10, r2[r5]
	lss r11, r10, r11
	bt r11, data_process // WARN - we assume here that we only have two states, adding more states is costly

	/* decerement tick_count */
	sub r10, r10, 4 
	stw r10, r2[r5]
	
	/* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
	
idle_process:
    /* This state detects the start of a start bit by looking for a change in state in the line
     * sampling points are then calculated from this point
     */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* lookup TC value and check for valid start */
    ldw r10, sp[STACK_SIZE+START_BIT_CONFIG_ARG]
    ldw r10, r10[r5] // get address for the startBitLookup we will use
    
    ldc r11, 0xf
    and r8, r8, r11
    ldw r10, r10[r8]
    /* check if we have invalid SB */
    mkmsk r8, 32
    eq r8, r8, r10
    bt r8, invalid_start
    
    /* store new state - at this point r8 == 1*/
    ldc r8, 1
    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
invalid_start:    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
    
data_process:
    /* get potential start bit indicator */
    shr r9, r8, 4
    stw r9, sp[11]
    
    /* get bit from fourBits - tick_count still in r10 */
    sub r10, r10, 1
    shr r8, r8, r10
    ldc r9, 1
    and r8, r8, r9 // now got the bit
    
    /* store the bit in uart_word */
    ldw r11, r4[r5]
    shl r11, r11, r9
    or r11, r11, r8
    stw r11, r4[r5]
    
    /* decrement bit_count */
    ldw r10, r3[r5]
    sub r10, r10, r9
    stw r10, r3[r5]
    
    /* if bit_count == 0 then change state and get new tick count else calculate new tick count */
    bt r10, dp_calc_tick
    
    /* store rx'd data in memory - means we don't block */
    ldw r9, sp[STACK_SIZE+RX_CHAR_POS_ARG]
    stw r11, r9[r5] // write data into array
    
    /* send data word out */
    ldw r9, sp[STACK_SIZE+CUART_ARG]
    outt res[r9], r5 // using outt means we won't block for up to 8 tokens
    
    /* check flag for a start bit in this set of samples */
    ldw r10, sp[11]    
    bt r10, immediate_start_bit
    
    // set state to idle - r10 is 0
    stw r10, r1[r5] 
    
    /* shift port_val and advance to next channel */
    shr r6, r6, 1
    bu next_channel

immediate_start_bit:
    /* this handles a start bit transition detected in the stop bit sample */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* get number of bits swallowed */
    ldw r10, sp[11]
    
    /* store new state*/
    // state doesn't change ?!
//    ldc r8, 1
//    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
dp_calc_tick:
    
    // get struct pointer for UART channel
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r9, r5, r9
    add r11, r11, r9
    
    /* load tick_count[i] */
    ldw r10, r2[r5]
    
    /* get clocks_per_bit, and correct for bits we already swallowed */
    ldw r11, r11[2]
    // new tick_count = cpb - (4-tc) = cpb - 4 + tc
    sub r11, r11, 4
    add r11, r11, r10
    stw r11, r2[r5]
    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    
    #if CHAN_COUNT == 3
    bu rx_bit_ep
    #endif
#undef process_loop	
#undef idle_process
#undef data_process
#undef invalid_start
#undef immediate_start_bit
#undef dp_calc_tick
#undef next_channel
#undef channel_num
#define channel_num 3
#define process_loop process_loop_3
#define idle_process idle_process_3
#define data_process data_process_3
#define immediate_start_bit immediate_start_bit_3
#define invalid_start invalid_start_3
#define dp_calc_tick dp_calc_tick_3
#if CHAN_COUNT == 4
#define next_channel rx_bit_ep
#else
#define next_channel process_loop_4
#endif

process_loop:                                   
    /* initialise channel count */              
	ldc r5, channel_num
	
	/* get fourBits */                          
	and r8, r6, r7 // mask off bits             
    mkmsk r11, 4                                
    crc32 r8, r11, r11 // compress bits         

    ldw r11, sp[STACK_SIZE+FOURBIT_CONFIG_ARG] // do lookup
    ldw r11, r11[r5]
    ldw r8, r11[r8] // get fourBits             

	/* check if state is idle (0x0) */
	ldw r11, r1[r5]
	bf r11, idle_process
    
    /* check if tickcount is  =< 4 (same as < 5) */
	ldc r11, 5
	ldw r10, r2[r5]
	lss r11, r10, r11
	bt r11, data_process // WARN - we assume here that we only have two states, adding more states is costly

	/* decerement tick_count */
	sub r10, r10, 4 
	stw r10, r2[r5]
	
	/* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
	
idle_process:
    /* This state detects the start of a start bit by looking for a change in state in the line
     * sampling points are then calculated from this point
     */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* lookup TC value and check for valid start */
    ldw r10, sp[STACK_SIZE+START_BIT_CONFIG_ARG]
    ldw r10, r10[r5] // get address for the startBitLookup we will use
    
    ldc r11, 0xf
    and r8, r8, r11
    ldw r10, r10[r8]
    /* check if we have invalid SB */
    mkmsk r8, 32
    eq r8, r8, r10
    bt r8, invalid_start
    
    /* store new state - at this point r8 == 1*/
    ldc r8, 1
    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
invalid_start:    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
    
data_process:
    /* get potential start bit indicator */
    shr r9, r8, 4
    stw r9, sp[11]
    
    /* get bit from fourBits - tick_count still in r10 */
    sub r10, r10, 1
    shr r8, r8, r10
    ldc r9, 1
    and r8, r8, r9 // now got the bit
    
    /* store the bit in uart_word */
    ldw r11, r4[r5]
    shl r11, r11, r9
    or r11, r11, r8
    stw r11, r4[r5]
    
    /* decrement bit_count */
    ldw r10, r3[r5]
    sub r10, r10, r9
    stw r10, r3[r5]
    
    /* if bit_count == 0 then change state and get new tick count else calculate new tick count */
    bt r10, dp_calc_tick
    
    /* store rx'd data in memory - means we don't block */
    ldw r9, sp[STACK_SIZE+RX_CHAR_POS_ARG]
    stw r11, r9[r5] // write data into array
    
    /* send data word out */
    ldw r9, sp[STACK_SIZE+CUART_ARG]
    outt res[r9], r5 // using outt means we won't block for up to 8 tokens
    
    /* check flag for a start bit in this set of samples */
    ldw r10, sp[11]    
    bt r10, immediate_start_bit
    
    // set state to idle - r10 is 0
    stw r10, r1[r5] 
    
    /* shift port_val and advance to next channel */
    shr r6, r6, 1
    bu next_channel

immediate_start_bit:
    /* this handles a start bit transition detected in the stop bit sample */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* get number of bits swallowed */
    ldw r10, sp[11]
    
    /* store new state*/
    // state doesn't change ?!
//    ldc r8, 1
//    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
dp_calc_tick:
    
    // get struct pointer for UART channel
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r9, r5, r9
    add r11, r11, r9
    
    /* load tick_count[i] */
    ldw r10, r2[r5]
    
    /* get clocks_per_bit, and correct for bits we already swallowed */
    ldw r11, r11[2]
    // new tick_count = cpb - (4-tc) = cpb - 4 + tc
    sub r11, r11, 4
    add r11, r11, r10
    stw r11, r2[r5]
    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    
    #if CHAN_COUNT == 4
    bu rx_bit_ep
    #endif
#undef process_loop	
#undef idle_process
#undef data_process
#undef invalid_start
#undef immediate_start_bit
#undef dp_calc_tick
#undef next_channel
#undef channel_num
#define channel_num 4
#define process_loop process_loop_4
#define idle_process idle_process_4
#define data_process data_process_4
#define immediate_start_bit immediate_start_bit_4
#define invalid_start invalid_start_4
#define dp_calc_tick dp_calc_tick_4
#if CHAN_COUNT == 5
#define next_channel rx_bit_ep
#else
#define next_channel process_loop_5
#endif

process_loop:                                   
    /* initialise channel count */              
	ldc r5, channel_num
	
	/* get fourBits */                          
	and r8, r6, r7 // mask off bits             
    mkmsk r11, 4                                
    crc32 r8, r11, r11 // compress bits         

    ldw r11, sp[STACK_SIZE+FOURBIT_CONFIG_ARG] // do lookup    
    ldw r11, r11[r5]
    ldw r8, r11[r8] // get fourBits             

	/* check if state is idle (0x0) */
	ldw r11, r1[r5]
	bf r11, idle_process
    
    /* check if tickcount is  =< 4 (same as < 5) */
	ldc r11, 5
	ldw r10, r2[r5]
	lss r11, r10, r11
	bt r11, data_process // WARN - we assume here that we only have two states, adding more states is costly

	/* decerement tick_count */
	sub r10, r10, 4 
	stw r10, r2[r5]
	
	/* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
	
idle_process:
    /* This state detects the start of a start bit by looking for a change in state in the line
     * sampling points are then calculated from this point
     */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* lookup TC value and check for valid start */
    ldw r10, sp[STACK_SIZE+START_BIT_CONFIG_ARG]
    ldw r10, r10[r5] // get address for the startBitLookup we will use
    
    ldc r11, 0xf
    and r8, r8, r11
    ldw r10, r10[r8]
    /* check if we have invalid SB */
    mkmsk r8, 32
    eq r8, r8, r10
    bt r8, invalid_start
    
    /* store new state - at this point r8 == 1*/
    ldc r8, 1
    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
invalid_start:    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
    
data_process:
    /* get potential start bit indicator */
    shr r9, r8, 4
    stw r9, sp[11]
    
    /* get bit from fourBits - tick_count still in r10 */
    sub r10, r10, 1
    shr r8, r8, r10
    ldc r9, 1
    and r8, r8, r9 // now got the bit
    
    /* store the bit in uart_word */
    ldw r11, r4[r5]
    shl r11, r11, r9
    or r11, r11, r8
    stw r11, r4[r5]
    
    /* decrement bit_count */
    ldw r10, r3[r5]
    sub r10, r10, r9
    stw r10, r3[r5]
    
    /* if bit_count == 0 then change state and get new tick count else calculate new tick count */
    bt r10, dp_calc_tick
    
    /* store rx'd data in memory - means we don't block */
    ldw r9, sp[STACK_SIZE+RX_CHAR_POS_ARG]
    stw r11, r9[r5] // write data into array
    
    /* send data word out */
    ldw r9, sp[STACK_SIZE+CUART_ARG]
    outt res[r9], r5 // using outt means we won't block for up to 8 tokens
    
    /* check flag for a start bit in this set of samples */
    ldw r10, sp[11]    
    bt r10, immediate_start_bit
    
    // set state to idle - r10 is 0
    stw r10, r1[r5] 
    
    /* shift port_val and advance to next channel */
    shr r6, r6, 1
    bu next_channel

immediate_start_bit:
    /* this handles a start bit transition detected in the stop bit sample */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* get number of bits swallowed */
    ldw r10, sp[11]
    
    /* store new state*/
    // state doesn't change ?!
//    ldc r8, 1
//    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
dp_calc_tick:
    
    // get struct pointer for UART channel
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r9, r5, r9
    add r11, r11, r9
    
    /* load tick_count[i] */
    ldw r10, r2[r5]
    
    /* get clocks_per_bit, and correct for bits we already swallowed */
    ldw r11, r11[2]
    // new tick_count = cpb - (4-tc) = cpb - 4 + tc
    sub r11, r11, 4
    add r11, r11, r10
    stw r11, r2[r5]
    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    
    #if CHAN_COUNT == 5
    bu rx_bit_ep
    #endif
#undef process_loop	
#undef idle_process
#undef data_process
#undef invalid_start
#undef immediate_start_bit
#undef dp_calc_tick
#undef next_channel
#undef channel_num
#define channel_num 5
#define process_loop process_loop_5
#define idle_process idle_process_5
#define data_process data_process_5
#define immediate_start_bit immediate_start_bit_5
#define invalid_start invalid_start_5
#define dp_calc_tick dp_calc_tick_5
#if CHAN_COUNT == 6
#define next_channel rx_bit_ep
#else
#define next_channel process_loop_6
#endif

process_loop:                                   
    /* initialise channel count */              
	ldc r5, channel_num
	
	/* get fourBits */                          
	and r8, r6, r7 // mask off bits             
    mkmsk r11, 4                                
    crc32 r8, r11, r11 // compress bits         

    ldw r11, sp[STACK_SIZE+FOURBIT_CONFIG_ARG] // do lookup    
    ldw r11, r11[r5]
    ldw r8, r11[r8] // get fourBits             

	/* check if state is idle (0x0) */
	ldw r11, r1[r5]
	bf r11, idle_process
    
    /* check if tickcount is  =< 4 (same as < 5) */
	ldc r11, 5
	ldw r10, r2[r5]
	lss r11, r10, r11
	bt r11, data_process // WARN - we assume here that we only have two states, adding more states is costly

	/* decerement tick_count */
	sub r10, r10, 4 
	stw r10, r2[r5]
	
	/* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
	
idle_process:
    /* This state detects the start of a start bit by looking for a change in state in the line
     * sampling points are then calculated from this point
     */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* lookup TC value and check for valid start */
    ldw r10, sp[STACK_SIZE+START_BIT_CONFIG_ARG]
    ldw r10, r10[r5] // get address for the startBitLookup we will use
    
    ldc r11, 0xf
    and r8, r8, r11
    ldw r10, r10[r8]
    /* check if we have invalid SB */
    mkmsk r8, 32
    eq r8, r8, r10
    bt r8, invalid_start
    
    /* store new state - at this point r8 == 1*/
    ldc r8, 1
    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
invalid_start:    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
    
data_process:
    /* get potential start bit indicator */
    shr r9, r8, 4
    stw r9, sp[11]
    
    /* get bit from fourBits - tick_count still in r10 */
    sub r10, r10, 1
    shr r8, r8, r10
    ldc r9, 1
    and r8, r8, r9 // now got the bit
    
    /* store the bit in uart_word */
    ldw r11, r4[r5]
    shl r11, r11, r9
    or r11, r11, r8
    stw r11, r4[r5]
    
    /* decrement bit_count */
    ldw r10, r3[r5]
    sub r10, r10, r9
    stw r10, r3[r5]
    
    /* if bit_count == 0 then change state and get new tick count else calculate new tick count */
    bt r10, dp_calc_tick
    
    /* store rx'd data in memory - means we don't block */
    ldw r9, sp[STACK_SIZE+RX_CHAR_POS_ARG]
    stw r11, r9[r5] // write data into array
    
    /* send data word out */
    ldw r9, sp[STACK_SIZE+CUART_ARG]
    outt res[r9], r5 // using outt means we won't block for up to 8 tokens
    
    /* check flag for a start bit in this set of samples */
    ldw r10, sp[11]    
    bt r10, immediate_start_bit
    
    // set state to idle - r10 is 0
    stw r10, r1[r5] 
    
    /* shift port_val and advance to next channel */
    shr r6, r6, 1
    bu next_channel

immediate_start_bit:
    /* this handles a start bit transition detected in the stop bit sample */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* get number of bits swallowed */
    ldw r10, sp[11]
    
    /* store new state*/
    // state doesn't change ?!
//    ldc r8, 1
//    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
dp_calc_tick:
    
    // get struct pointer for UART channel
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r9, r5, r9
    add r11, r11, r9
    
    /* load tick_count[i] */
    ldw r10, r2[r5]
    
    /* get clocks_per_bit, and correct for bits we already swallowed */
    ldw r11, r11[2]
    // new tick_count = cpb - (4-tc) = cpb - 4 + tc
    sub r11, r11, 4
    add r11, r11, r10
    stw r11, r2[r5]
    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    
    #if CHAN_COUNT == 6
    bu rx_bit_ep
    #endif
#undef process_loop	
#undef idle_process
#undef data_process
#undef invalid_start
#undef immediate_start_bit
#undef dp_calc_tick
#undef next_channel
#undef channel_num
#define channel_num 6
#define process_loop process_loop_6
#define idle_process idle_process_6
#define data_process data_process_6
#define immediate_start_bit immediate_start_bit_6
#define invalid_start invalid_start_6
#define dp_calc_tick dp_calc_tick_6
#if CHAN_COUNT == 7
#define next_channel rx_bit_ep
#else
#define next_channel process_loop_7
#endif

process_loop:                                   
    /* initialise channel count */              
	ldc r5, channel_num
	
	/* get fourBits */                          
	and r8, r6, r7 // mask off bits             
    mkmsk r11, 4                                
    crc32 r8, r11, r11 // compress bits         

    ldw r11, sp[STACK_SIZE+FOURBIT_CONFIG_ARG] // do lookup    
    ldw r11, r11[r5]
    ldw r8, r11[r8] // get fourBits             

	/* check if state is idle (0x0) */
	ldw r11, r1[r5]
	bf r11, idle_process
    
    /* check if tickcount is  =< 4 (same as < 5) */
	ldc r11, 5
	ldw r10, r2[r5]
	lss r11, r10, r11
	bt r11, data_process // WARN - we assume here that we only have two states, adding more states is costly

	/* decerement tick_count */
	sub r10, r10, 4 
	stw r10, r2[r5]
	
	/* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
	
idle_process:
    /* This state detects the start of a start bit by looking for a change in state in the line
     * sampling points are then calculated from this point
     */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* lookup TC value and check for valid start */
    ldw r10, sp[STACK_SIZE+START_BIT_CONFIG_ARG]
    ldw r10, r10[r5] // get address for the startBitLookup we will use
    
    ldc r11, 0xf
    and r8, r8, r11
    ldw r10, r10[r8]
    /* check if we have invalid SB */
    mkmsk r8, 32
    eq r8, r8, r10
    bt r8, invalid_start
    
    /* store new state - at this point r8 == 1*/
    ldc r8, 1
    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
invalid_start:    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
    
data_process:
    /* get potential start bit indicator */
    shr r9, r8, 4
    stw r9, sp[11]
    
    /* get bit from fourBits - tick_count still in r10 */
    sub r10, r10, 1
    shr r8, r8, r10
    ldc r9, 1
    and r8, r8, r9 // now got the bit
    
    /* store the bit in uart_word */
    ldw r11, r4[r5]
    shl r11, r11, r9
    or r11, r11, r8
    stw r11, r4[r5]
    
    /* decrement bit_count */
    ldw r10, r3[r5]
    sub r10, r10, r9
    stw r10, r3[r5]
    
    /* if bit_count == 0 then change state and get new tick count else calculate new tick count */
    bt r10, dp_calc_tick
    
    /* store rx'd data in memory - means we don't block */
    ldw r9, sp[STACK_SIZE+RX_CHAR_POS_ARG]
    stw r11, r9[r5] // write data into array
    
    /* send data word out */
    ldw r9, sp[STACK_SIZE+CUART_ARG]
    outt res[r9], r5 // using outt means we won't block for up to 8 tokens
    
    /* check flag for a start bit in this set of samples */
    ldw r10, sp[11]    
    bt r10, immediate_start_bit
    
    // set state to idle - r10 is 0
    stw r10, r1[r5] 
    
    /* shift port_val and advance to next channel */
    shr r6, r6, 1
    bu next_channel

immediate_start_bit:
    /* this handles a start bit transition detected in the stop bit sample */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* get number of bits swallowed */
    ldw r10, sp[11]
    
    /* store new state*/
    // state doesn't change ?!
//    ldc r8, 1
//    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
dp_calc_tick:
    
    // get struct pointer for UART channel
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r9, r5, r9
    add r11, r11, r9
    
    /* load tick_count[i] */
    ldw r10, r2[r5]
    
    /* get clocks_per_bit, and correct for bits we already swallowed */
    ldw r11, r11[2]
    // new tick_count = cpb - (4-tc) = cpb - 4 + tc
    sub r11, r11, 4
    add r11, r11, r10
    stw r11, r2[r5]
    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    
    #if CHAN_COUNT == 7
    bu rx_bit_ep
    #endif
#undef process_loop	
#undef idle_process
#undef data_process
#undef invalid_start
#undef immediate_start_bit
#undef dp_calc_tick
#undef next_channel
#undef channel_num
#define channel_num 7
#define process_loop process_loop_7
#define idle_process idle_process_7
#define data_process data_process_7
#define immediate_start_bit immediate_start_bit_7
#define invalid_start invalid_start_7
#define dp_calc_tick dp_calc_tick_7
#if CHAN_COUNT == 8
#define next_channel rx_bit_ep
#else
#define next_channel rx_bit_ep
#endif

process_loop:                                   
    /* initialise channel count */              
	ldc r5, channel_num
	
	/* get fourBits */                          
	and r8, r6, r7 // mask off bits             
    mkmsk r11, 4                                
    crc32 r8, r11, r11 // compress bits         

    ldw r11, sp[STACK_SIZE+FOURBIT_CONFIG_ARG] // do lookup    
    ldw r11, r11[r5]
    ldw r8, r11[r8] // get fourBits             

	/* check if state is idle (0x0) */
	ldw r11, r1[r5]
	bf r11, idle_process
    
    /* check if tickcount is  =< 4 (same as < 5) */
	ldc r11, 5
	ldw r10, r2[r5]
	lss r11, r10, r11
	bt r11, data_process // WARN - we assume here that we only have two states, adding more states is costly

	/* decerement tick_count */
	sub r10, r10, 4 
	stw r10, r2[r5]
	
	/* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
	
idle_process:
    /* This state detects the start of a start bit by looking for a change in state in the line
     * sampling points are then calculated from this point
     */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* lookup TC value and check for valid start */
    ldw r10, sp[STACK_SIZE+START_BIT_CONFIG_ARG]
    ldw r10, r10[r5] // get address for the startBitLookup we will use
    
    ldc r11, 0xf
    and r8, r8, r11
    ldw r10, r10[r8]
    /* check if we have invalid SB */
    mkmsk r8, 32
    eq r8, r8, r10
    bt r8, invalid_start
    
    /* store new state - at this point r8 == 1*/
    ldc r8, 1
    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
invalid_start:    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    bu next_channel
    
data_process:
    /* get potential start bit indicator */
    shr r9, r8, 4
    stw r9, sp[11]
    
    /* get bit from fourBits - tick_count still in r10 */
    sub r10, r10, 1
    shr r8, r8, r10
    ldc r9, 1
    and r8, r8, r9 // now got the bit
    
    /* store the bit in uart_word */
    ldw r11, r4[r5]
    shl r11, r11, r9
    or r11, r11, r8
    stw r11, r4[r5]
    
    /* decrement bit_count */
    ldw r10, r3[r5]
    sub r10, r10, r9
    stw r10, r3[r5]
    
    /* if bit_count == 0 then change state and get new tick count else calculate new tick count */
    bt r10, dp_calc_tick
    
    /* store rx'd data in memory - means we don't block */
    ldw r9, sp[STACK_SIZE+RX_CHAR_POS_ARG]
    stw r11, r9[r5] // write data into array
    
    /* send data word out */
    ldw r9, sp[STACK_SIZE+CUART_ARG]
    outt res[r9], r5 // using outt means we won't block for up to 8 tokens
    
    /* check flag for a start bit in this set of samples */
    ldw r10, sp[11]    
    bt r10, immediate_start_bit
    
    // set state to idle - r10 is 0
    stw r10, r1[r5] 
    
    /* shift port_val and advance to next channel */
    shr r6, r6, 1
    bu next_channel

immediate_start_bit:
    /* this handles a start bit transition detected in the stop bit sample */
    
    /* initial tc */
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r10, r5, r9 // get struct pointer for channel
    add r11, r11, r10
    
    /* initialise bit count */
    ldw r9, r11[1] // word_len
    stw r9, r3[r5]
    
    /* clocks_per_bit */
    ldw r10, r11[2]
    
    /* use_sample */
    ldw r9, r11[4]
    
    /* calculate tick_count */
    add r9, r9, r10
    
    /* get number of bits swallowed */
    ldw r10, sp[11]
    
    /* store new state*/
    // state doesn't change ?!
//    ldc r8, 1
//    stw r8, r1[r5] 
    
    /* sub TC value for swallowed bits */
    sub r9, r9, r10
    
    /* zero uart_word entry */
    ldc r10, 0
    stw r10, r4[r5]
    
    /* store tick_count */
    stw r9, r2[r5]
    
    /* shift port_val, check loop and increment or quit loop */
    shr r6, r6, 1
    bu next_channel
    
dp_calc_tick:
    
    // get struct pointer for UART channel
    ldw r11, sp[STACK_SIZE+UART_RX_INFO_ARG]
    ldc r9, CHAN_STRUCT_SIZE
    mul r9, r5, r9
    add r11, r11, r9
    
    /* load tick_count[i] */
    ldw r10, r2[r5]
    
    /* get clocks_per_bit, and correct for bits we already swallowed */
    ldw r11, r11[2]
    // new tick_count = cpb - (4-tc) = cpb - 4 + tc
    sub r11, r11, 4
    add r11, r11, r10
    stw r11, r2[r5]
    
    /* shift port_val, and advance to next channel */
    shr r6, r6, 1
    
    #if CHAN_COUNT == 8
    bu rx_bit_ep
    #endif
	/* check if this is the last channel */
    bu next_channel

finish_uart_rx:
    
    /* read pause trigger out of channel */
    ldw r10, sp[STACK_SIZE+CUART_ARG]
    in r10, res[r10]
    
    /* clear all events */
    clre
    
    /* reload stored reg values */
    ldw r4, sp[1]	
	ldw r5, sp[2]
	ldw r6, sp[3]
	ldw r7, sp[4]
	ldw r8, sp[5]
	ldw r9, sp[6]
	ldw r10, sp[7]
	
	/* return */
	retsp STACK_SIZE

.cc_bottom uart_rx_loop_8.func
