AU SPI question (slave mode)

I’ve got SPI communication working perfectly well in the following configuration:

Master: Seed Wio Terminal (platformIO using Seeed Arduino SPI library)
Slave: AU + Br using the SPI peripheral component with the following acf:

pin sck A5
pin sdi A6
pin sdo A9
pin cs A2

Now master->slave communication works perfectly well at 24 MHz, but slave->master communication is garbled if the SPI clock is above 12 MHz.

This gives me about 700 KB/sec in both directions simultaneously.

Question: does anyone know if the speed limitation is in the AU, or is it more likely that the WIO can’t keep up?

Lucid code (github):

module alchitry_top (
    input clk,              // 100MHz clock
    input rst_n,            // reset button (active low)
    output led[8],          // 8 user controllable LEDs
    input usb_rx,           // USB->Serial input
    output usb_tx,           // USB->Serial output
    input sck,     // spi clock
    input sdi,     // spi mosi
    output sdo,    // spi miso
    input cs        // spi slave select
) {
    /*
    SPI Mode 0 = CPOL 0 CPHA 0
    SPI Mode 1 = CPOL 0 CPHA 1
    SPI Mode 2 = CPOL 1 CPHA 0
    SPI Mode 3 = CPOL 1 CPHA 1
    */    
    
    sig rst                 // reset signal
    
    .clk(clk) {
        // The reset conditioner is used to synchronize the reset signal to the FPGA
        // clock. This ensures the entire FPGA comes out of reset at the same time.
        reset_conditioner reset_cond
        .rst(rst) {
            fifo echo (#WIDTH(8), #ENTRIES(256))
            fifo forward (#WIDTH(8), #ENTRIES(256))
            uart_rx rx (#BAUD(1000000), #CLK_FREQ(100000000))
            uart_tx tx (#BAUD(1000000), #CLK_FREQ(100000000))   
            spi_peripheral spi (#CPOL(0), #CPHA(0), .cs(cs), .sck(sck), .sdi(sdi))
        }
        dff have_char[1]
        dff next_char[8]
    }
    
    always {
        reset_cond.in = ~rst_n  // input raw inverted reset signal
        rst = reset_cond.out    // conditioned reset
        
        // connect usb
        tx.block = 0
        rx.rx = usb_rx
        usb_tx = tx.tx
        
        // connect spi
        spi.data_in = 8h00
        sdo = spi.sdo
        
        // init echo fifo
        echo.din = 0
        echo.wput = 0
        echo.rget = 0
        
        // init forward fifo
        forward.din =0
        forward.wput = 0
        forward.rget = 0
        
        // leds
        led[0] = echo.empty
        led[1] = echo.full
        led[2] = forward.empty
        led[3] = forward.full
        led[7] = cs
        led[6] = sck
        led[5] = 0
        led[4] = 0
        
        
        
        // echo eceived characters (spi or console) to console
        if (! echo.empty && !tx.busy ) {
            echo.rget = 1
            tx.data = echo.dout 
            tx.new_data = 1
        } else {
            tx.data = 0
            tx.new_data = 0
        }
        
        // store input from console in spi forward buffer
        if (rx.new_data && !echo.full) {
            echo.din = rx.data
            echo.wput = 1
            if (! forward.full)  {
                forward.din = rx.data
                forward.wput = 1
            }
        } 
        
        // SPI data has to be ready to send before SPI transfer starts
        if (!have_char.q) {
            if (!forward.empty) {
                forward.rget = 1
                next_char.d = forward.dout
            } else {
                next_char.d = 8hff
            }
            have_char.d = 1
        }
        spi.data_in = next_char.q
        
        // store received spi inout in console echo buffer
        if (spi.done) {
            have_char.d = 0 // need next char to send
            if (!echo.full) {
                echo.din = spi.data_out
                echo.wput = 1
            }    
        }
    }    
}

Wio code (github):

#include <Arduino.h>

#include <SPI.h>

#include "debug.h"
#include <tft_functions.h>
#include <battery.h>
#include "buttons.h"

// for documentation
const auto spi_ss = PIN_SPI_SS;     // (50ul) = BCM24 = pin 18 on wio breakout
const auto spi_mosi = PIN_SPI_MOSI; // (48ul) = BCM10 = pin 19 on wio breakout
const auto spi_miso = PIN_SPI_MISO; // (47ul) = BCM9  = pin 21 on wio breakout
const auto spi_sck = PIN_SPI_SCK;   // (49ul) = BCM11 = pin 23 on wio breakout

const char *tqbf = "the quick brown fox jumps over the lazy dog\r\n";

// local function declarations here:
static void setup_debug();
static void setup_spi();
static void print_hex(uint8_t x);
static void wait_btn();

void setup()
{
  setup_debug();
  // buttons
  init_buttons();
  // initialize tft
  init_tft();
  tft_clear();
  // initialize battery
  init_battery();
  printBatteryStats();
  // initialize spi
  setup_spi();
}

static unsigned char txbuf[64];
static unsigned char rxbuf[64];

void loop()
{

  memset(txbuf, 0xff, sizeof(txbuf));
  memset(rxbuf, 0xff, sizeof(rxbuf));
  size_t count = strlen(tqbf);
  memcpy(txbuf, tqbf, count);

  tft_println("Start SPI");
  // 24MHz transmit is OK, but receive is max 12 Mhz
  SPI.beginTransaction(SPISettings((int)12000000, MSBFIRST, (uint8_t)SPI_MODE0));
  // TXBUF != NULL => write and read simultaneously
  // TXBUF == NULL => read only
  // SPI.transfer(txbuf, rxbuf, count, false);
  // SPI.waitForTransfer();
  digitalWrite(PIN_SPI_SS, LOW);
  for (size_t i = 0; i < count; i++)
  {
    rxbuf[i] = SPI.transfer(txbuf[i]);
  }
  digitalWrite(PIN_SPI_SS, HIGH);
  SPI.endTransaction();
  tft_println("SPI complete");
#ifdef DEBUG
  for (size_t i = 0; i < count; i++)
  {
    print_hex(rxbuf[i]);
    if (rxbuf[i] != 0xff && rxbuf[i] != 0x00)
    {
      tft_print(String((char)rxbuf[i]));
    }
  }
  Serial.println();
  for (size_t i = 0; i < count; i++)
  {
    print_hex(txbuf[i]);
  }
  Serial.println();
#endif
  wait_btn();
  tft_clear();
}

static void print_hex(uint8_t x)
{
  if (x < 16)
  {
    Serial.print('0');
  }
  Serial.print(x, HEX);
}

static void setup_debug()
{
#ifdef DEBUG
  Serial.begin(115200);
  int n = 250;
  while (!Serial && n-- > 0)
  {
    delay(1);
  }
  delay(100);
#endif
}

static void setup_spi()
{
  tft_println("SPI setup");
  pinMode(PIN_SPI_SS, OUTPUT);
  digitalWrite(PIN_SPI_SS, HIGH);
  SPI.begin();
  /*
  SPI.setBitOrder(MSBFIRST);
  SPI.setDataMode(SPI_MODE0);
  SPI.setClockDivider(1);
  */
  tft_println("SPI initialised");
  wait_btn();
  tft_clear();
}

static void wait_btn()
{
  while (digitalRead(WIO_KEY_B) == HIGH)
  {
    delay(1);
  }
  while (digitalRead(WIO_KEY_B) == LOW)
  {
    delay(1);
  }
}

Taking a look at the spi_periperhal module there might be some sync issues with it.

Try this version and let me know if it works.

module spi_peripheral #(
    // clock polarity, 0 = inactive low, 1 = inactive high
    CPOL = 0 : CPOL == 0 || CPOL == 1,
    
    // clock phase, 0 = valid on leading edge, 1 = valid on trailing edge
    CPHA = 0 : CPHA == 0 || CPHA == 1
)(
    input clk,          // clock
    input rst,          // reset
    input cs,           // SPI chip select
    input sdi,          // SPI data in
    output sdo,         // SPI data out
    input sck,          // SPI SCK
    output done,        // transfer done
    input data_in[8],   // data to send
    output data_out[8]  // data received
) {
    
    .clk(clk) {
        .rst(rst) {
            dff bit_ct[3]    // bit counter
            dff data[8]      // received data
        }
        dff sdi_reg[2]       // input buffer
        dff sdo_reg          // output buffer
        dff sck_reg[3]       // sck buffer
        dff cs_reg[2]        // cs buffer
        dff data_out_reg[8]  // data_out buffer
        dff done_reg         // done buffer
    }
    
    always {
        // connect to buffer output
        sdo = sdo_reg.q
        done = done_reg.q
        data_out = data_out_reg.q
        
        // read in buffered inputs
        cs_reg.d = c{cs_reg.q[0], cs}
        sdi_reg.d = c{sdi_reg.q[0], sdi}
        sck_reg.d = c{sck_reg.q[1:0], sck} // save old sck
        
        done_reg.d = 0 // default to not done
        
        if (cs_reg.q[$width(cs_reg.q)-1]) { // not selected
            bit_ct.d = 3b111        // reset counter
            data.d = data_in        // copy in data for next byte
            sdo_reg.d = data_in[7]  // write first bit out
        } else {
            // When CPOL and CPHA are different, we read on the falling edge.
            // When they are the same we read on the rising edge.
            // Therefore we can use XOR to check that and invert the
            // edge detector. If you XOR with 1, the bit is flipped.
            
            if (sck_reg.q[2:1] == (b01 ^ 2x{CPOL^CPHA})) { // reading edge
                
                // read in the bit
                data_out_reg.d[bit_ct.q] = sdi_reg.q[$width(sdi_reg.q)-1]
                
                // increment the bit counter
                bit_ct.d = bit_ct.q - 1
                
                // if we read the last bit
                if (bit_ct.q == b0) {
                    done_reg.d = 1   // signal we are done
                    data.d = data_in // copy new data in
                }
            } else if (sck_reg.q[2:1] == (b10 ^ 2x{CPOL^CPHA})) { // writing edge
                // write a bit out
                sdo_reg.d = data.q[bit_ct.q]
            }
        }
    }
}

24MHz is probably a bit high for the way this module is written. I’ll have to look into rewriting it better but you could always just boost the Au’s clock to 200MHz using the clock wizard as a quick workaround.

You could also just try this version that moves the write to the reading edge. This will likely work well with 24MHz since the synchronizer delays everything by a few clock cycles.

module spi_peripheral #(
    // clock polarity, 0 = inactive low, 1 = inactive high
    CPOL = 0 : CPOL == 0 || CPOL == 1,
    
    // clock phase, 0 = valid on leading edge, 1 = valid on trailing edge
    CPHA = 0 : CPHA == 0 || CPHA == 1
)(
    input clk,          // clock
    input rst,          // reset
    input cs,           // SPI chip select
    input sdi,          // SPI data in
    output sdo,         // SPI data out
    input sck,          // SPI SCK
    output done,        // transfer done
    input data_in[8],   // data to send
    output data_out[8]  // data received
) {
    
    .clk(clk) {
        .rst(rst) {
            dff bit_ct[3]    // bit counter
            dff data[8]      // received data
        }
        dff sdi_reg[2]       // input buffer
        dff sdo_reg          // output buffer
        dff sck_reg[3]       // sck buffer
        dff cs_reg[2]        // cs buffer
        dff data_out_reg[8]  // data_out buffer
        dff done_reg         // done buffer
    }
    
    always {
        // connect to buffer output
        sdo = sdo_reg.q
        done = done_reg.q
        data_out = data_out_reg.q
        
        // read in buffered inputs
        cs_reg.d = c{cs_reg.q[0], cs}
        sdi_reg.d = c{sdi_reg.q[0], sdi}
        sck_reg.d = c{sck_reg.q[1:0], sck} // save old sck
        
        done_reg.d = 0 // default to not done
        
        if (cs_reg.q[$width(cs_reg.q)-1]) { // not selected
            bit_ct.d = 3b111        // reset counter
            data.d = data_in        // copy in data for next byte
            sdo_reg.d = data_in[7]  // write first bit out
        } else {
            // When CPOL and CPHA are different, we read on the falling edge.
            // When they are the same we read on the rising edge.
            // Therefore we can use XOR to check that and invert the
            // edge detector. If you XOR with 1, the bit is flipped.
            
            if (sck_reg.q[2:1] == (b01 ^ 2x{CPOL^CPHA})) { // reading edge
                
                // read in the bit
                data_out_reg.d[bit_ct.q] = sdi_reg.q[$width(sdi_reg.q)-1]
                
                sig next_bit[3] = bit_ct.q - 1
                // increment the bit counter
                bit_ct.d = next_bit
                
                // if we read the last bit
                if (bit_ct.q == b0) {
                    done_reg.d = 1   // signal we are done
                    data.d = data_in // copy new data in
                    sdo_reg.d = data_in[7]
                } else {
                    // write a bit out
                    sdo_reg.d = data.q[next_bit]
                }
            }
        }
    }
}

Thanks for looking into it!

Meanwhile I had already changed the spi_peripheral because it had a problem that it always prepended 2 empty bytes to the transmission.

I have yet to look at your changes, but this is what I ended up with:

/******************************************************************************

  The MIT License (MIT)

  Copyright (c) 2024 Alchitry

  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the Software is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.

  *****************************************************************************/

module spi_peripheral #(
   // clock polarity, 0 = inactive low, 1 = inactive high
   CPOL = 0 : CPOL == 0 || CPOL == 1,
   
   // clock phase, 0 = valid on leading edge, 1 = valid on trailing edge
   CPHA = 0 : CPHA == 0 || CPHA == 1
)(
   input clk,          // clock
   input rst,          // reset
   input cs,           // SPI chip select
   input sdi,          // SPI data in
   output sdo,         // SPI data out
   input sck,          // SPI SCK
   output done,        // transfer done
   input data_in[8],   // data to send
   output data_out[8]  // data received
) {
   
   .clk(clk) {
       .rst(rst) {
           dff bit_ct[3]    // bit counter
           dff data[8]      // received data
       }
       dff sdi_reg          // input buffer
       dff sdo_reg          // output buffer
       dff sck_reg[2]       // sck buffer
       dff cs_reg           // cs buffer
       dff data_out_reg[8]  // data_out buffer
       dff done_reg         // done buffer
   }
   
   always {
       // connect to buffer output
       sdo = sdo_reg.q
       done = done_reg.q
       data_out = data_out_reg.q
       
       // read in buffered inputs
       cs_reg.d = cs
       sdi_reg.d = sdi
       sck_reg.d = c{sck_reg.q[0], sck} // save old sck
       
       
       if (cs_reg.q) { // not selected
           bit_ct.d = 3b111        // reset counter
           sdo_reg.d = 1bz         // high impedance
           done_reg.d = 1
       } else {
           if (done_reg.q) {
                data.d = data_in        // copy in new data byte
           }
           done_reg.d = 0
           // When CPOL and CPHA are different, we read on the falling edge.
           // When they are the same we read on the rising edge.
           // Therefore we can use XOR to check that and invert the
           // edge detector. If you XOR with 1, the bit is flipped.
           
           if (sck_reg.q == (b01 ^ 2x{CPOL^CPHA})) { // reading edge
               
               // read in the bit
               data_out_reg.d[bit_ct.q] = sdi_reg.q
               
               // increment the bit counter
               bit_ct.d = bit_ct.q - 1
               
               // if we read the last bit
               if (bit_ct.q == b0) {
                   done_reg.d = 1   // signal we are done
               }
           } else if (sck_reg.q == (b10 ^ 2x{CPOL^CPHA})) { // writing edge
               // write a bit out
               sdo_reg.d = data.q[bit_ct.q]
           }
       }
   }
}

I will compare my version with your changed one and give the 200 MHz clock a try.

Thanks again!

The first change (using 3 samples of the sck) does not change anything.
The other one also makes no difference.

Both work well at 12 MHz, but with the inital characters problem.

But then I have no idea if the problem is on the AU side or on the WIO side, I don’t have a logic analyzer…

I’ll see if I can get that 200 MHz clock working.

I just spent a little improving it.

Here’s my new version.

module spi_peripheral #(
    // clock polarity, 0 = inactive low, 1 = inactive high
    CPOL = 0 : CPOL == 0 || CPOL == 1,
    
    // clock phase, 0 = valid on leading edge, 1 = valid on trailing edge
    CPHA = 0 : CPHA == 0 || CPHA == 1
)(
    input clk,          // clock
    input rst,          // reset
    input cs,           // SPI chip select
    input sdi,          // SPI data in
    output sdo,         // SPI data out
    input sck,          // SPI SCK
    output done,        // transfer done
    input data_in[8],   // data to send
    output data_out[8]  // data received
) {
    
    .clk(clk) {
        .rst(rst) {
            dff bit_ct[3]    // bit counter
            dff data[8]      // received data
        }
        dff sdi_reg[2]       // input buffer
        dff sdo_reg          // output buffer
        dff sck_reg[3]       // sck buffer
        dff cs_reg[2]        // cs buffer
        dff data_out_reg[8]  // data_out buffer
        dff done_reg         // done buffer
    }
    
    always {
        // connect to buffer output
        sdo = sdo_reg.q
        done = done_reg.q
        data_out = data_out_reg.q
        
        // read in buffered inputs
        cs_reg.d = c{cs_reg.q[0], cs}
        sdi_reg.d = c{sdi_reg.q[0], sdi}
        sck_reg.d = c{sck_reg.q[1:0], sck} // save old sck
        
        done_reg.d = 0 // default to not done
        
        if (cs_reg.q[$width(cs_reg.q)-1]) { // not selected
            bit_ct.d = 3b111        // reset counter
            data.d = data_in        // copy in data for next byte
            sdo_reg.d = data_in[7]  // write first bit out
        } else {
            // When CPOL and CPHA are different, we read on the falling edge.
            // When they are the same we read on the rising edge.
            // Therefore we can use XOR to check that and invert the
            // edge detector. If you XOR with 1, the bit is flipped.
            
            if (sck_reg.q[2:1] == (b01 ^ 2x{CPOL^CPHA})) { // reading edge
                
                // read in the bit
                data_out_reg.d[bit_ct.q] = sdi_reg.q[$width(sdi_reg.q)-1]
                
                sig next_bit[3] = $resize(bit_ct.q - 1, 3) // drop MSB of subtraction
                // increment the bit counter
                bit_ct.d = next_bit
                
                // if we read the last bit
                if (bit_ct.q == b0) {
                    done_reg.d = 1   // signal we are done
                } else {
                    // write a bit out
                    sdo_reg.d = data.q[next_bit]
                }
            }
        }
        if (done_reg.q) {
            data.d = data_in // copy new data in
            sdo_reg.d = data_in[7]
        }
    }
}

I think this should be good up to 1/4 maybe 1/3 of the main clock. So 25MHz is using 100MHz main clock.

I changed some stuff so that the next byte to send is clocked in when done is high. You MUST present the value during the same cycle that done is high.

EDIT: If you’re in the market for a logic analyzer, I have the Saleae Logic Pro 16 and it is excellent.

My changes fixing the initial bytes have just shifted the problem to the end, now I get twice the last byte received when CS goes low before I get the new bytes.

I’ll try your new version and change my top accordingly and let you know what I see.

I’ll have a look at that logic analyzer!

The latest version doesn’t change anything at 24 MHz (receive OK, transmit garbled), but continues to work correctly at 12 MHz.

EDIT: what is the purpose of shifting out that first bit when cs is still high and done still low (when the first data byte to transmit has not even been read)?

Without actually probing the signals then it’s hard to say why 24mhz isn’t working. I haven’t looked at the rest of your design to know if there’s a mistake there or not.

The first bit is saved when CS is high so that it’s valid the moment CS goes low. You could look for the falling edge of CS but that’s more complicated for no benefit.

I reverted to your original spi_peripheral code but modified it to no longer waste the first transmitted byte, and keep sdo high impedance when not selected.

It now works correctly at 12 MHz with my code (updated github).

EDIT: I think you can ignore this now until I get my hands on a logic analyzer and can provide more relevant information.

If you’re willing to port it back to Lucid V1, you can use the crude built in analyzer by clicking the debug icon in Alchitry Labs V1. This will be ported at some point to V2.

Thanks for that tip!
But I will now try the clockwizard first.

As soon as I insert these lines before .clk(clk) { .rst(rst) { … :slight_smile:

   sig rst                 // reset signal
    sig clk1
    sig clk2
    sig locked
    
    clk_wiz_0 spi_clock(.reset(rst), .clk_in1(clk), .clk_out1(clk1), .clk_out2(clk2), .locked(locked))
    
    spi_peripheral spi (#CPOL(0), #CPHA(1), .rst(!locked), .clk(clk2), .cs(cs), .sck(sck), .sdi(sdi), .sdo(sdo))

I get the following crash:

Signal "clk1" already has a driver!java.lang.IllegalArgumentException: Signal "clk1" already has a driver!	
at com.alchitry.labs2.parsers.hdl.types.SignalOrSubSignal$DefaultImpls.connectTo(SignalOrSubSignal.kt:59)	
at com.alchitry.labs2.parsers.hdl.types.Signal.connectTo(Signal.kt:12)	at com.alchitry.labs2.parsers.hdl.types.ModuleInstance.<init>(ModuleInstance.kt:112)	
at com.alchitry.labs2.parsers.hdl.lucid.parsers.TypesParser.exitModuleInst(TypesParser.kt:324)	
at com.alchitry.labs2.parsers.grammar.LucidParser$ModuleInstContext.exitRule$suspendImpl(LucidParser.kt:3776)	
at com.alchitry.labs2.parsers.grammar.LucidParser$ModuleInstContext.exitRule(LucidParser.kt)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.exitRule(ParseTreeMultiWalker.kt:116)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.walk(ParseTreeMultiWalker.kt:46)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.walk$default(ParseTreeMultiWalker.kt:10)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.walk(ParseTreeMultiWalker.kt:44)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.walk$default(ParseTreeMultiWalker.kt:10)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.walk(ParseTreeMultiWalker.kt:44)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.walk$default(ParseTreeMultiWalker.kt:10)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.walk(ParseTreeMultiWalker.kt:44)	
at com.alchitry.labs2.parsers.ParseTreeMultiWalker.walk$default(ParseTreeMultiWalker.kt:10)	
at com.alchitry.labs2.parsers.hdl.lucid.context.LucidBlockContext.walk(LucidBlockContext.kt:214)	
at com.alchitry.labs2.parsers.hdl.lucid.context.LucidBlockContext.walk$default(LucidBlockContext.kt:213)	
at com.alchitry.labs2.parsers.hdl.lucid.context.LucidBlockContext.initialWalk(LucidBlockContext.kt:200)	
at com.alchitry.labs2.parsers.hdl.types.ModuleInstance.initialWalk(ModuleInstance.kt:48)	
at com.alchitry.labs2.project.Project.buildContext(Project.kt:554)	
at com.alchitry.labs2.project.Project$buildContext$1.invokeSuspend(Project.kt)	
at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:33)	
at kotlinx.coroutines.internal.ScopeCoroutine.afterResume(Scopes.kt:28)	
at kotlinx.coroutines.AbstractCoroutine.resumeWith(AbstractCoroutine.kt:100)	
at kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(ContinuationImpl.kt:46)	
at kotlinx.coroutines.DispatchedTask.run(DispatchedTask.kt:101)	at kotlinx.coroutines.scheduling.CoroutineScheduler.runSafely(CoroutineScheduler.kt:589)	
at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.executeTask(CoroutineScheduler.kt:832)	
at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.runWorker(CoroutineScheduler.kt:720)	
at kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.run(CoroutineScheduler.kt:707)	Suppressed: kotlinx.coroutines.internal.DiagnosticCoroutineContextException: [StandaloneCoroutine{Cancelled}@1c162215, Dispatchers.Default]

EDIT:
I realize now that Lucid does not support connecting outputs.

The following syntax is accepted (but I have to say that I prefer the Verilog way):

    clk_wiz_0 spi_clock(
        .reset(rst), 
        .clk_in1(clk)
    )
    
    spi_peripheral spi (#CPOL(0), #CPHA(1), 
        .rst(!spi_clock.locked), 
        .clk(spi_clock.clk_out2), 
        .cs(cs), 
        .sck(sck), 
        .sdi(sdi)
    )
 

Thanks for the report. I fixed the issue to correctly throw an error instead.

It should complain that you’re trying to connect outputs from the module directly. This isn’t supported in Lucid V2. Instead, use the generated outputs spi_clock.clk_out1 etc.

You have CPHA = 1 but you were using MODE_0 which would imply CPHA = 0. Make sure you have these correct.

Is there any reason why Lucid V2 does not support it? It looks natural and concise to me.

I’m using Mode1 on both sides, but started originally with Mode0.

  SPI.beginTransaction(SPISettings((int)12000000, MSBFIRST, (uint8_t)SPI_MODE1));

I’m now battling with crossing clock domains (how to do it with the least impact on the code).

Not other than I couldn’t think of a good use case for it so didn’t put the time into implementing it. If you wanted to alias the outputs you could do something like the following in the same amount of code.

    clk_wiz_0 spi_clock(.reset(rst), .clk_in1(clk))

    sig clk1 = spi_clock.clk_out1
    sig clk2 = spi_clock.clk_out2
    sig locked = spi_clock.locked

I probably would’ve removed port connections completely but using the block connections for clock and reset inputs is super convenient and directly passing through inouts is important.

As for crossing domains, why not just use the 200MHz everywhere?

200 Mhz Everywhere…

Because I don’t know if that can work, but I can give it a try…

I suspect the WIO, based on the data my test program sees: though not consistently, it mostly misses the first bit of a byte at 24 MHz.

As it is not using DMA I think that emtying the SPI receive register in software takes too long (120 MHz Atmel SAMD 51).

Before I invest in a logic analyzer I will look for another test device, perhaps an ESP32 based one.

I apologize for the trouble I have been causing you with this, and thanks again for your support.

EDIT: I ordered a Teensy 4.0 as an alternative test device.

You could try to open the generated project in Vivado and temporarly hook an ILA (integrated logic analyzer) module to look at the signals.
It require a bit of work but it’s very usefull!

The Teensy 4.0/4.1 are seriously badass microcontrollers. I was using a Teensy 4.1 in my project, with quite a bit of success, before I took the leap to FPGA.

While waiting for the Teensy to arrive I have cleaned up the code a bit and added an extra pin to signal “tx data available” to the Wio, so it’s a bit more useful as an example for SPI slave communication with the AU.

AU code on Github

WIO code on Github