// Copyright (c) 2016, XMOS Ltd, All rights reserved

/**
xCORE-200 devices contain support for zip and unzip instructions, which
facilitate the efficient use of multibit ports as multiple single
bit ports.

This examples shows how to send and receive four 1-bit streams of data.
However, instead of using four 1-bit ports, a single 4-bit port will be used,
treating each pin of the port as if it were a 1-bit port.

Two 4-bit ports are defined, an output and an input port. The output port
will be connected to the input port using the loopbacks in the simulation
environment. We will use the zip instruction to format the data correctly,
then send it to the output port. We will then read it back from the input port and
use unzip to recreate the original data. It will then be automatically checked and
displayed on the console.

**/


#include <xs1.h>
#include <stdio.h>
#include <xclib.h>

out buffered port:32 p_4bit_out = XS1_PORT_4A;
in buffered port:32 p_4bit_in = XS1_PORT_4C;
out port p_bbit_clock = XS1_PORT_1A;

clock bit_clock = XS1_CLKBLK_1;

//#define SW_LOOPBACK
#define DEBUG_PRINT

// zip 4 words (streams) of serial data for output on a 4-bit port
// word 0 will be output on bit0 of the 4-bit port
// word 1 will be output on bit1 of the 4-bit port
// word 2 will be output on bit2 of the 4-bit port
// word 3 will be output on bit3 of the 4-bit port
static inline void zip_4_streams_for_4_bit_port(unsigned int outputs[4]) {
    unsigned long long tmp64_0, tmp64_1;

    // zip x, y, s.
    // zip packs of 1 bit. MSB of x comes first.
    tmp64_0 = zip(outputs[0], outputs[1], 0);
    tmp64_1 = zip(outputs[2], outputs[3], 0);

    outputs[0] = (unsigned int) tmp64_0;
    outputs[1] = (unsigned int) tmp64_1;
    outputs[2] = (unsigned int) (tmp64_0 >> 32);
    outputs[3] = (unsigned int) (tmp64_1 >> 32);

    // zip packs of two bits!
    tmp64_0 = zip(outputs[0], outputs[1], 1);
    tmp64_1 = zip(outputs[2], outputs[3], 1);

    outputs[0] = (unsigned int) tmp64_0;
    outputs[1] = (unsigned int) (tmp64_0 >> 32);
    outputs[2] = (unsigned int) tmp64_1;
    outputs[3] = (unsigned int) (tmp64_1 >> 32);

    //MSB from word 0 (original output[0]) is now MSB of outputs[3]!

}






// unzip from 4 words received from a 4-bit into 4 words (streams)
static inline void unzip_from_4_bit_port_to_4_streams(unsigned int inputs[4]) {
    unsigned long long tmp64_0, tmp64_1;
    // make long longs
    tmp64_0 = (unsigned long long) (inputs[1]) << 32 | inputs[0];
    tmp64_1 = (unsigned long long) (inputs[3]) << 32 | inputs[2];

    // unzip into packs of 2 bits
    {inputs[0], inputs[1]} = unzip(tmp64_0, 1);
    {inputs[2], inputs[3]} = unzip(tmp64_1, 1);

    // make long longs
    tmp64_0 = (unsigned long long) (inputs[2]) << 32 | inputs[0];
    tmp64_1 = (unsigned long long) (inputs[3]) << 32 | inputs[1];

    // unzip into packs of 1 bits
    {inputs[0], inputs[1]} = unzip(tmp64_0, 0);
    {inputs[2], inputs[3]} = unzip(tmp64_1, 0);
}

#pragma unsafe arrays // Disable array range check for better performance
int main() {

  unsigned int outputs[4] = {0x87654321, 0x18765432, 0x21876543, 0x32187654};
  unsigned int inputs[4];

  unsigned expected_data[4];

  unsigned int tmp;

  unsigned int t;

  // generate expected data
  for(int i=0; i<4; i++) {
      expected_data[i] = outputs[i];
  }

  // setup clock
  configure_clock_rate(bit_clock, 100, 8); // 12.5 MHz
  // Note: This proves that a 12.288 MHz bbit_clock works. That's 4 I2S stereo channels at 192kHz
  // 25MHz is too fast : configure_clock_rate(bit_clock, 100, 4); // 25 MHz

  configure_out_port(p_4bit_out, bit_clock, 0);
  configure_in_port(p_4bit_in, bit_clock);
  //configure_clock_out

  start_clock(bit_clock);

#ifdef DEBUG_PRINT
  printf("\noutput data:\n");
  for(int i=0; i<4; i++) {
      printf("%0.8x\n", outputs[i]);
  }
#endif

  // zip output data
  zip_4_streams_for_4_bit_port(outputs);

#ifdef DEBUG_PRINT
  printf("\nzipped output data:\n");

  for(int i=0; i<4; i++) {
      printf("%0.8x\n", outputs[i]);
  }
#endif


  // align port timing
  p_4bit_out <: 0 @ t;
  t += 100;
  // output 0 at t
  p_4bit_out @ t <: 0;
  // To align input with output, set the time of the next input on p_4bit_int.
  asm volatile("setpt res[%0], %1"::"r"(p_4bit_in),"r"(t+15));


  // MSB of channel 0 is MSB of outputs[3]!
  // See zip_4_streams_for_4_bit_port for details

  // Formats like I2S transmit MSB first. bitrev is needed because port outputs LSB first
  p_4bit_out <: bitrev(outputs[3]);
  p_4bit_in :> tmp;
  inputs[3] = bitrev(tmp);

  p_4bit_out <: bitrev(outputs[2]);
  p_4bit_in :> tmp;
  inputs[2] = bitrev(tmp);

  p_4bit_out <: bitrev(outputs[1]);
  p_4bit_in :> tmp;
  inputs[1] = bitrev(tmp);

  p_4bit_out <: bitrev(outputs[0]);
  p_4bit_in :> tmp;
  inputs[0] = bitrev(tmp);

#ifdef SW_LOOPBACK
  for(int i=0; i<4; i++) {
      inputs[i] = outputs[i];
  }
#endif

  // unzip input data
  unzip_from_4_bit_port_to_4_streams(inputs);

#ifdef DEBUG_PRINT
  printf("\nunzipped input data:\n");
  for(int i=0; i<4; i++) {
      printf("%0.8x\n", inputs[i]);
  }
#endif

  // check the data
  unsigned errors=0;
  for(int i=0; i<4; i++) {
      if(inputs[i] != expected_data[i]) {
          printf("ERROR in input data %d. Expected %x, Received %x\n", i, expected_data[i], inputs[i]);
          errors++;
      }
  }
  if(errors==0) {
      printf("Loopback data check PASS :)\n");
  } else {
      printf("Loopback data check FAIL, %d Errors\n",errors);
  }

  return 0;
}
