diff --git a/README.md b/README.md
index badbb05b9a1a22d857ae48fa6dd6190b375369a9..d17620c68b8ff560da872c22fd20ac10f9ae6b2f 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## On the Meta
+## SPI 
 
 - actually-realized data rates are often smaller in practice than according to underlying specs, 
   - see even i.e. transfer of some tens-of-megabytes onto an SD card (copying a gcode file) - we observe only ~ 20mb/sec of advertized 80mb/sec or so 
diff --git a/rp2040_uart/2024-03_rp2040-uart.md b/rp2040_uart/2024-03_rp2040-uart.md
index 577fc5e0b4477199fa125ac2d84f9660d70a92c0..130719e78b210d3bf5fdd0a725af085d056ca7fc 100644
--- a/rp2040_uart/2024-03_rp2040-uart.md
+++ b/rp2040_uart/2024-03_rp2040-uart.md
@@ -133,7 +133,17 @@ So, we should go a little more hardo at this.
 
 I'll try it my copy-pasta'ing the earle code, to see if I can remove some of the layers-of-abstraction cruft. 
 
+With some small changes (removing that mutex) and running at 200MHz, we do a little bit better:
 
+| Mbit/s | Misses (errs / total-bytes) | Expected Byte Time (us) | Avg Byte Time |
+| --- | --- | --- | --- |
+| 2.0 | 0.031 |
+| 2.5 | 0.0625 |
+| 5.0 | 0.61 | 
+
+But we're really still not doing as well as we would hope. 
+
+There's a threshold here also, where things clearly go from bad to worse. 
 
 ---
 
diff --git a/rp2040_uart/code/uart_pio_earle_mod_tx_rx/SerialModPIO.cpp b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/SerialModPIO.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..77c1db4348a80026465943b2cfe0140db931e49b
--- /dev/null
+++ b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/SerialModPIO.cpp
@@ -0,0 +1,382 @@
+/*
+    Serial-over-PIO for the Raspberry Pi Pico RP2040
+
+    Copyright (c) 2021 Earle F. Philhower, III <earlephilhower@yahoo.com>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include "SerialModPIO.h"
+#include <hardware/gpio.h>
+#include <map>
+#include "pio_uart_mod.pio.h"
+
+
+// ------------------------------------------------------------------------
+// -- Generates a unique program for differing bit lengths
+static std::map<int, PIOProgram*> _txMap;
+static std::map<int, PIOProgram*> _rxMap;
+
+// Duplicate a program and replace the first insn with a "set x, repl"
+static pio_program_t *pio_make_uart_prog(int repl, const pio_program_t *pg) {
+    pio_program_t *p = new pio_program_t;
+    p->length = pg->length;
+    p->origin = pg->origin;
+    uint16_t *insn = (uint16_t *)malloc(p->length * 2);
+    if (!insn) {
+        delete p;
+        return nullptr;
+    }
+    memcpy(insn, pg->instructions, p->length * 2);
+    insn[0] = pio_encode_set(pio_x, repl);
+    p->instructions = insn;
+    return p;
+}
+
+static PIOProgram *_getTxProgram(int bits, bool inverted) {
+    int key = inverted ? -bits : bits;
+    auto f = _txMap.find(key);
+    if (f == _txMap.end()) {
+        // pio_program_t * p = pio_make_uart_prog(bits, inverted ? &pio_tx_inv_program : &pio_tx_program);
+        pio_program_t * p = pio_make_uart_prog(bits, &pio_tx_program);
+        _txMap.insert({key, new PIOProgram(p)});
+        f = _txMap.find(key);
+    }
+    return f->second;
+}
+
+static PIOProgram *_getRxProgram(int bits, bool inverted) {
+    int key = inverted ? -bits : bits;
+    auto f = _rxMap.find(key);
+    if (f == _rxMap.end()) {
+        // pio_program_t * p = pio_make_uart_prog(bits, inverted ? &pio_rx_inv_program : &pio_rx_program);
+        pio_program_t * p = pio_make_uart_prog(bits, &pio_rx_program);
+        _rxMap.insert({key, new PIOProgram(p)});
+        f = _rxMap.find(key);
+    }
+    return f->second;
+}
+// ------------------------------------------------------------------------
+
+// TODO - this works, but there must be a faster/better way...
+static int _parity(int bits, int data) {
+    int p = 0;
+    for (int b = 0; b < bits; b++) {
+        p ^= (data & (1 << b)) ? 1 : 0;
+    }
+    return p;
+}
+
+// We need to cache generated SerialModPIOs so we can add data to them from
+// the shared handler
+static SerialModPIO *_pioSP[2][4];
+static void __not_in_flash_func(_fifoIRQ)() {
+    for (int p = 0; p < 2; p++) {
+        for (int sm = 0; sm < 4; sm++) {
+            SerialModPIO *s = _pioSP[p][sm];
+            if (s) {
+                s->_handleIRQ();
+                pio_interrupt_clear((p == 0) ? pio0 : pio1, sm);
+            }
+        }
+    }
+}
+
+void __not_in_flash_func(SerialModPIO::_handleIRQ)() {
+    if (_rx == NOPIN) {
+        return;
+    }
+    while (!pio_sm_is_rx_fifo_empty(_rxPIO, _rxSM)) {
+        uint32_t decode = _rxPIO->rxf[_rxSM] ^ (_rxInverted ? 0xffffffff : 0);
+        decode >>= 33 - _rxBits;
+        uint32_t val = 0;
+        for (int b = 0; b < _bits + 1; b++) {
+            val |= (decode & (1 << (b * 2))) ? 1 << b : 0;
+        }
+        if (_parity == UART_PARITY_EVEN) {
+            int p = ::_parity(_bits, val);
+            int r = (val & (1 << _bits)) ? 1 : 0;
+            if (p != r) {
+                // TODO - parity error
+                continue;
+            }
+        } else if (_parity == UART_PARITY_ODD) {
+            int p = ::_parity(_bits, val);
+            int r = (val & (1 << _bits)) ? 1 : 0;
+            if (p == r) {
+                // TODO - parity error
+                continue;
+            }
+        }
+
+        auto next_writer = _writer + 1;
+        if (next_writer == _fifoSize) {
+            next_writer = 0;
+        }
+        if (next_writer != _reader) {
+            _queue[_writer] = val & ((1 << _bits) -  1);
+            asm volatile("" ::: "memory"); // Ensure the queue is written before the written count advances
+            _writer = next_writer;
+        } else {
+            _overflow = true;
+        }
+    }
+}
+
+SerialModPIO::SerialModPIO(pin_size_t tx, pin_size_t rx, size_t fifoSize) {
+    _tx = tx;
+    _rx = rx;
+    _fifoSize = fifoSize + 1; // Always one unused entry
+    _queue = new uint8_t[_fifoSize];
+    mutex_init(&_mutex);
+}
+
+SerialModPIO::~SerialModPIO() {
+    end();
+    delete[] _queue;
+}
+
+void SerialModPIO::begin(unsigned long baud, uint16_t config) {
+    _overflow = false;
+    _baud = baud;
+    switch (config & SERIAL_PARITY_MASK) {
+    case SERIAL_PARITY_EVEN:
+        _parity = UART_PARITY_EVEN;
+        break;
+    case SERIAL_PARITY_ODD:
+        _parity = UART_PARITY_ODD;
+        break;
+    default:
+        _parity = UART_PARITY_NONE;
+        break;
+    }
+    switch (config & SERIAL_STOP_BIT_MASK) {
+    case SERIAL_STOP_BIT_1:
+        _stop = 1;
+        break;
+    default:
+        _stop = 2;
+        break;
+    }
+    switch (config & SERIAL_DATA_MASK) {
+    case SERIAL_DATA_5:
+        _bits = 5;
+        break;
+    case SERIAL_DATA_6:
+        _bits = 6;
+        break;
+    case SERIAL_DATA_7:
+        _bits = 7;
+        break;
+    default:
+        _bits = 8;
+        break;
+    }
+
+    if ((_tx == NOPIN) && (_rx == NOPIN)) {
+        DEBUGCORE("ERROR: No pins specified for SerialModPIO\n");
+        return;
+    }
+
+    if (_tx != NOPIN) {
+        _txBits = _bits + _stop + (_parity != UART_PARITY_NONE ? 1 : 0) + 1/*start bit*/;
+        _txPgm = _getTxProgram(_txBits, _txInverted);
+        int off;
+        if (!_txPgm->prepare(&_txPIO, &_txSM, &off)) {
+            DEBUGCORE("ERROR: Unable to allocate PIO TX UART, out of PIO resources\n");
+            // ERROR, no free slots
+            return;
+        }
+
+        digitalWrite(_tx, HIGH);
+        pinMode(_tx, OUTPUT);
+
+        pio_tx_program_init(_txPIO, _txSM, off, _tx);
+        pio_sm_clear_fifos(_txPIO, _txSM); // Remove any existing data
+
+        // Put the divider into ISR w/o using up program space
+        pio_sm_put_blocking(_txPIO, _txSM, clock_get_hz(clk_sys) / _baud - 2);
+        pio_sm_exec(_txPIO, _txSM, pio_encode_pull(false, false));
+        pio_sm_exec(_txPIO, _txSM, pio_encode_mov(pio_isr, pio_osr));
+
+        // Start running!
+        pio_sm_set_enabled(_txPIO, _txSM, true);
+    }
+    if (_rx != NOPIN) {
+        _writer = 0;
+        _reader = 0;
+
+        _rxBits = 2 * (_bits + _stop + (_parity != UART_PARITY_NONE ? 1 : 0) + 1) - 1;
+        _rxPgm = _getRxProgram(_rxBits, _rxInverted);
+        int off;
+        if (!_rxPgm->prepare(&_rxPIO, &_rxSM, &off)) {
+            DEBUGCORE("ERROR: Unable to allocate PIO RX UART, out of PIO resources\n");
+            return;
+        }
+        // Stash away the created RX port for the IRQ handler
+        _pioSP[pio_get_index(_rxPIO)][_rxSM] = this;
+
+        pinMode(_rx, INPUT);
+        pio_rx_program_init(_rxPIO, _rxSM, off, _rx);
+        pio_sm_clear_fifos(_rxPIO, _rxSM); // Remove any existing data
+
+        // Put phase divider into OSR w/o using add'l program memory
+        pio_sm_put_blocking(_rxPIO, _rxSM, clock_get_hz(clk_sys) / (_baud * 2) - 7 /* insns in PIO halfbit loop */);
+        pio_sm_exec(_rxPIO, _rxSM, pio_encode_pull(false, false));
+
+        // Join the TX FIFO to the RX one now that we don't need it
+        _rxPIO->sm[_rxSM].shiftctrl |= 0x80000000;
+
+        // Enable interrupts on rxfifo
+        switch (_rxSM) {
+        case 0: pio_set_irq0_source_enabled(_rxPIO, pis_sm0_rx_fifo_not_empty, true); break;
+        case 1: pio_set_irq0_source_enabled(_rxPIO, pis_sm1_rx_fifo_not_empty, true); break;
+        case 2: pio_set_irq0_source_enabled(_rxPIO, pis_sm2_rx_fifo_not_empty, true); break;
+        case 3: pio_set_irq0_source_enabled(_rxPIO, pis_sm3_rx_fifo_not_empty, true); break;
+        }
+        auto irqno = pio_get_index(_rxPIO) == 0 ? PIO0_IRQ_0 : PIO1_IRQ_0;
+        irq_set_exclusive_handler(irqno, _fifoIRQ);
+        irq_set_enabled(irqno, true);
+
+        pio_sm_set_enabled(_rxPIO, _rxSM, true);
+    }
+
+    _running = true;
+}
+
+void SerialModPIO::end() {
+    if (!_running) {
+        return;
+    }
+    if (_tx != NOPIN) {
+        pio_sm_set_enabled(_txPIO, _txSM, false);
+        pio_sm_unclaim(_txPIO, _txSM);
+    }
+    if (_rx != NOPIN) {
+        pio_sm_set_enabled(_rxPIO, _rxSM, false);
+        pio_sm_unclaim(_rxPIO, _rxSM);
+        _pioSP[pio_get_index(_rxPIO)][_rxSM] = nullptr;
+        // If no more active, disable the IRQ
+        auto pioNum = pio_get_index(_rxPIO);
+        bool used = false;
+        for (int i = 0; i < 4; i++) {
+            used = used || !!_pioSP[pioNum][i];
+        }
+        if (!used) {
+            auto irqno = pioNum == 0 ? PIO0_IRQ_0 : PIO1_IRQ_0;
+            irq_set_enabled(irqno, false);
+        }
+    }
+    _running = false;
+}
+
+int SerialModPIO::peek() {
+    // CoreMutex m(&_mutex);
+    if (!_running || (_rx == NOPIN)) {
+        return -1;
+    }
+    // If there's something in the FIFO now, just peek at it
+    if (_writer != _reader) {
+        return _queue[_reader];
+    }
+    return -1;
+}
+
+int SerialModPIO::read() {
+    // CoreMutex m(&_mutex);
+    if (!_running || (_rx == NOPIN)) {
+        return -1;
+    }
+    if (_writer != _reader) {
+        auto ret = _queue[_reader];
+        asm volatile("" ::: "memory"); // Ensure the value is read before advancing
+        auto next_reader = (_reader + 1) % _fifoSize;
+        asm volatile("" ::: "memory"); // Ensure the reader value is only written once, correctly
+        _reader = next_reader;
+        return ret;
+    }
+    return -1;
+}
+
+bool SerialModPIO::overflow() {
+    // CoreMutex m(&_mutex);
+    if (!_running || (_rx == NOPIN)) {
+        return false;
+    }
+
+    bool hold = _overflow;
+    _overflow = false;
+    return hold;
+}
+
+int SerialModPIO::available() {
+    // CoreMutex m(&_mutex);
+    if (!_running || (_rx == NOPIN)) {
+        return 0;
+    }
+    return (_writer - _reader) % _fifoSize;
+}
+
+int SerialModPIO::availableForWrite() {
+    // CoreMutex m(&_mutex);
+    if (!_running || (_tx == NOPIN)) {
+        return 0;
+    }
+    return 8 - pio_sm_get_tx_fifo_level(_txPIO, _txSM);
+}
+
+void SerialModPIO::flush() {
+    // CoreMutex m(&_mutex);
+    if (!_running || (_tx == NOPIN)) {
+        return;
+    }
+    while (!pio_sm_is_tx_fifo_empty(_txPIO, _txSM)) {
+        delay(1); // Wait for all FIFO to be read
+    }
+    // Could have 1 byte being transmitted, so wait for bit times
+    delay((1000 * (_txBits + 1)) / _baud);
+}
+
+void SerialModPIO::setInverted(bool invTx, bool invRx) {
+    _txInverted = invTx;
+    _rxInverted = invRx;
+}
+
+size_t SerialModPIO::write(uint8_t c) {
+    // CoreMutex m(&_mutex);
+    if (!_running || (_tx == NOPIN)) {
+        return 0;
+    }
+
+    uint32_t val = c;
+    if (_parity == UART_PARITY_NONE) {
+        val |= 7 << _bits; // Set 2 stop bits, the HW will only transmit the required number
+    } else if (_parity == UART_PARITY_EVEN) {
+        val |= ::_parity(_bits, c) << _bits;
+        val |= 7 << (_bits + 1);
+    } else {
+        val |= (1 ^ ::_parity(_bits, c)) << _bits;
+        val |= 7 << (_bits + 1);
+    }
+    val <<= 1;  // Start bit = low
+
+    pio_sm_put_blocking(_txPIO, _txSM, _txInverted ? ~val : val);
+
+    return 1;
+}
+
+SerialModPIO::operator bool() {
+    return _running;
+}
diff --git a/rp2040_uart/code/uart_pio_earle_mod_tx_rx/SerialModPIO.h b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/SerialModPIO.h
new file mode 100644
index 0000000000000000000000000000000000000000..73d41f98e5cadfb2825de01e7af13f9df09ddca7
--- /dev/null
+++ b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/SerialModPIO.h
@@ -0,0 +1,91 @@
+/*
+    Serial-over-PIO for the Raspberry Pi Pico RP2040
+
+    Copyright (c) 2021 Earle F. Philhower, III <earlephilhower@yahoo.com>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#pragma once
+
+#include <Arduino.h>
+#include "api/HardwareSerial.h"
+#include <stdarg.h>
+#include <queue>
+#include <hardware/uart.h>
+#include "CoreMutex.h"
+
+extern "C" typedef struct uart_inst uart_inst_t;
+
+class SerialModPIO : public HardwareSerial {
+public:
+    static const pin_size_t NOPIN = 0xff; // Use in constructor to disable RX or TX unit
+    SerialModPIO(pin_size_t tx, pin_size_t rx, size_t fifoSize = 32);
+    ~SerialModPIO();
+
+    void begin(unsigned long baud = 115200) override {
+        begin(baud, SERIAL_8N1);
+    };
+    void begin(unsigned long baud, uint16_t config) override;
+    void end() override;
+
+    void setInverted(bool invTx = true, bool invRx = true);
+
+    virtual int peek() override;
+    virtual int read() override;
+    virtual int available() override;
+    virtual int availableForWrite() override;
+    virtual void flush() override;
+    virtual size_t write(uint8_t c) override;
+    bool overflow();
+    using Print::write;
+    operator bool() override;
+
+    // Not to be called by users, only from the IRQ handler.  In public so that the C-language IQR callback can access it
+    void _handleIRQ();
+
+protected:
+    bool _running = false;
+    pin_size_t _tx, _rx;
+    int _baud;
+    int _bits;
+    uart_parity_t _parity;
+    int _stop;
+    bool _overflow;
+    mutex_t _mutex;
+    bool _txInverted = false;
+    bool _rxInverted = false;
+
+    PIOProgram *_txPgm;
+    PIO _txPIO;
+    int _txSM;
+    int _txBits;
+
+    PIOProgram *_rxPgm;
+    PIO _rxPIO;
+    int _rxSM;
+    int _rxBits;
+
+    // Lockless, IRQ-handled circular queue
+    size_t   _fifoSize;
+    uint32_t _writer;
+    uint32_t _reader;
+    uint8_t  *_queue;
+};
+
+#ifdef ARDUINO_NANO_RP2040_CONNECT
+// NINA updates
+extern SerialModPIO Serial3;
+#endif
\ No newline at end of file
diff --git a/rp2040_uart/code/uart_pio_earle_mod_tx_rx/pio_uart.pio b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/pio_uart.pio
new file mode 100644
index 0000000000000000000000000000000000000000..9150f8507208d691f9101598569fb7c1dd138c8d
--- /dev/null
+++ b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/pio_uart.pio
@@ -0,0 +1,109 @@
+; pio_uart for the Raspberry Pi Pico RP2040
+;
+; Based loosely off of the uart_rx/_tx examples in the pico-examples repo,
+; but heavily modified and optimized to not require changing the PIO
+; clocks so that Tone and Servo won't be affected, and multiple different
+; PIO ports can be run at different baud at the same time.
+;
+; Copyright (c) 2021 Earle F. Philhower, III <earlephilhower@yahoo.com>
+;
+; This library is free software; you can redistribute it and/or
+; modify it under the terms of the GNU Lesser General Public
+; License as published by the Free Software Foundation; either
+; version 2.1 of the License, or (at your option) any later version.
+;
+; This library is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; Lesser General Public License for more details.
+;
+; You should have received a copy of the GNU Lesser General Public
+; License along with this library; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+
+.program pio_tx
+.side_set 1 opt
+
+; We shift out the start and stop bit as part of the FIFO
+    set x, 9
+
+    pull               side 1 ; Force stop bit
+
+; Send the bits
+bitloop:
+    out pins, 1
+    mov y, isr                ; ISR is loaded by the setup routine with the period-1 count
+wait_bit:
+    jmp y-- wait_bit
+    jmp x-- bitloop
+
+    
+
+% c-sdk {
+
+static inline void pio_tx_program_init(PIO pio, uint sm, uint offset, uint pin_tx) {
+    // Tell PIO to initially drive output-high on the selected pin, then map PIO
+    // onto that pin with the IO muxes.
+    pio_sm_set_pins_with_mask(pio, sm, 1u << pin_tx, 1u << pin_tx);
+    pio_sm_set_pindirs_with_mask(pio, sm, 1u << pin_tx, 1u << pin_tx);
+    pio_gpio_init(pio, pin_tx);
+
+    pio_sm_config c = pio_tx_program_get_default_config(offset);
+
+    // OUT shifts to right, no autopull
+    sm_config_set_out_shift(&c, true, false, 32);
+
+    // We are mapping both OUT and side-set to the same pin, because sometimes
+    // we need to assert user data onto the pin (with OUT) and sometimes
+    // assert constant values (start/stop bit)
+    sm_config_set_out_pins(&c, pin_tx, 1);
+    sm_config_set_sideset_pins(&c, pin_tx);
+
+    // We only need TX, so get an 8-deep FIFO!
+    sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
+
+    pio_sm_init(pio, sm, offset, &c);
+}
+
+%}
+
+
+.program pio_rx
+
+; IN pin 0 and JMP pin are both mapped to the GPIO used as UART RX.
+
+start:
+    set x, 18           ; Preload bit counter...we'll shift in the start bit and stop bit, and each bit will be double-recorded (to be fixed by RP2040 code)
+    wait 0 pin 0        ; Stall until start bit is asserted
+
+bitloop:
+   ; Delay until 1/2 way into the bit time
+    mov y, osr
+wait_half:
+    jmp y-- wait_half
+
+    ; Read in the bit
+    in pins, 1          ; Shift data bit into ISR
+    jmp x-- bitloop     ; Loop all bits
+    
+    push                ; Stuff it and wait for next start
+
+
+% c-sdk {
+static inline void pio_rx_program_init(PIO pio, uint sm, uint offset, uint pin) {
+    pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
+    pio_gpio_init(pio, pin);
+    gpio_pull_up(pin);
+
+    pio_sm_config c = pio_rx_program_get_default_config(offset);
+    sm_config_set_in_pins(&c, pin); // for WAIT, IN
+    sm_config_set_jmp_pin(&c, pin); // for JMP
+    // Shift to right, autopull disabled
+    sm_config_set_in_shift(&c, true, false, 32);
+
+    pio_sm_init(pio, sm, offset, &c);
+}
+
+%}
\ No newline at end of file
diff --git a/rp2040_uart/code/uart_pio_earle_mod_tx_rx/pio_uart_mod.pio.h b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/pio_uart_mod.pio.h
new file mode 100644
index 0000000000000000000000000000000000000000..040a3c0196ed9f718da4cdb19a43cd72b5e5c826
--- /dev/null
+++ b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/pio_uart_mod.pio.h
@@ -0,0 +1,106 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// ------ //
+// pio_tx //
+// ------ //
+
+#define pio_tx_wrap_target 0
+#define pio_tx_wrap 5
+
+static const uint16_t pio_tx_program_instructions[] = {
+    //     .wrap_target
+    0xe029, //  0: set    x, 9
+    0x98a0, //  1: pull   block           side 1
+    0x6001, //  2: out    pins, 1
+    0xa046, //  3: mov    y, isr
+    0x0084, //  4: jmp    y--, 4
+    0x0042, //  5: jmp    x--, 2
+    //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program pio_tx_program = {
+    .instructions = pio_tx_program_instructions,
+    .length = 6,
+    .origin = -1,
+};
+
+static inline pio_sm_config pio_tx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + pio_tx_wrap_target, offset + pio_tx_wrap);
+    sm_config_set_sideset(&c, 2, true, false);
+    return c;
+}
+
+static inline void pio_tx_program_init(PIO pio, uint sm, uint offset, uint pin_tx) {
+    // Tell PIO to initially drive output-high on the selected pin, then map PIO
+    // onto that pin with the IO muxes.
+    pio_sm_set_pins_with_mask(pio, sm, 1u << pin_tx, 1u << pin_tx);
+    pio_sm_set_pindirs_with_mask(pio, sm, 1u << pin_tx, 1u << pin_tx);
+    pio_gpio_init(pio, pin_tx);
+    pio_sm_config c = pio_tx_program_get_default_config(offset);
+    // OUT shifts to right, no autopull
+    sm_config_set_out_shift(&c, true, false, 32);
+    // We are mapping both OUT and side-set to the same pin, because sometimes
+    // we need to assert user data onto the pin (with OUT) and sometimes
+    // assert constant values (start/stop bit)
+    sm_config_set_out_pins(&c, pin_tx, 1);
+    sm_config_set_sideset_pins(&c, pin_tx);
+    // We only need TX, so get an 8-deep FIFO!
+    sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
+    pio_sm_init(pio, sm, offset, &c);
+}
+
+#endif
+
+// ------ //
+// pio_rx //
+// ------ //
+
+#define pio_rx_wrap_target 0
+#define pio_rx_wrap 6
+
+static const uint16_t pio_rx_program_instructions[] = {
+    //     .wrap_target
+    0xe032, //  0: set    x, 18
+    0x2020, //  1: wait   0 pin, 0
+    0xa047, //  2: mov    y, osr
+    0x0083, //  3: jmp    y--, 3
+    0x4001, //  4: in     pins, 1
+    0x0042, //  5: jmp    x--, 2
+    0x8020, //  6: push   block
+    //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program pio_rx_program = {
+    .instructions = pio_rx_program_instructions,
+    .length = 7,
+    .origin = -1,
+};
+
+static inline pio_sm_config pio_rx_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + pio_rx_wrap_target, offset + pio_rx_wrap);
+    return c;
+}
+
+static inline void pio_rx_program_init(PIO pio, uint sm, uint offset, uint pin) {
+    pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, false);
+    pio_gpio_init(pio, pin);
+    gpio_pull_up(pin);
+    pio_sm_config c = pio_rx_program_get_default_config(offset);
+    sm_config_set_in_pins(&c, pin); // for WAIT, IN
+    sm_config_set_jmp_pin(&c, pin); // for JMP
+    // Shift to right, autopull disabled
+    sm_config_set_in_shift(&c, true, false, 32);
+    pio_sm_init(pio, sm, offset, &c);
+}
+
+#endif
diff --git a/rp2040_uart/code/uart_pio_earle_mod_tx_rx/screen.cpp b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/screen.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..45f33a49e0baa49ed6a53d6334f04f65afcb17b1
--- /dev/null
+++ b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/screen.cpp
@@ -0,0 +1,39 @@
+#include "screen.h"
+#include <Adafruit_GFX.h>
+#include <Adafruit_SSD1306.h>
+#include <Wire.h>
+
+// OLED 
+#define SCREEN_WIDTH 128 // OLED display width, in pixels
+#define SCREEN_HEIGHT 64 // OLED display height, in pixels
+
+#define X_POS 0
+#define Y_POS 0
+#define TXT_SIZE 1
+
+// even for displays with i.e. "0x78" printed on the back, 
+// the address that works is 0x3C, IDK 
+#define SCREEN_ADDRESS 0x3C
+
+Adafruit_SSD1306 display(SCREEN_WIDTH, SCREEN_HEIGHT);
+
+// warning: is blocking, takes ~ 33ms ! 
+void displayPrint(String msg){
+  display.clearDisplay();
+  display.setCursor(X_POS, Y_POS);
+  display.print(msg);
+  display.display();
+}
+
+void displaySetup(void){
+  // initialize the screen,
+  // oddly, SWITCHCAPVCC is the option that works even though OLED is hooked to 5V 
+  display.begin(SSD1306_SWITCHCAPVCC, SCREEN_ADDRESS);
+  display.clearDisplay();
+  display.display();
+  display.setTextColor(SSD1306_WHITE);
+  display.setTextSize(TXT_SIZE);
+  display.setTextWrap(true);
+  display.dim(false);
+  displayPrint("bonjour...");
+}
\ No newline at end of file
diff --git a/rp2040_uart/code/uart_pio_earle_mod_tx_rx/screen.h b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/screen.h
new file mode 100644
index 0000000000000000000000000000000000000000..91fb08f0bd738345349813ee51aec5739694a9f9
--- /dev/null
+++ b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/screen.h
@@ -0,0 +1,9 @@
+#ifndef SCREEN_H_
+#define SCREEN_H_
+
+#include <Arduino.h>
+
+void displaySetup(void);
+void displayPrint(String msg);
+
+#endif 
\ No newline at end of file
diff --git a/rp2040_uart/code/uart_pio_earle_mod_tx_rx/uart_pio_earle_mod_tx_rx.ino b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/uart_pio_earle_mod_tx_rx.ino
new file mode 100644
index 0000000000000000000000000000000000000000..fd7ece3a4947812872618b46ed7c82d5cead83b7
--- /dev/null
+++ b/rp2040_uart/code/uart_pio_earle_mod_tx_rx/uart_pio_earle_mod_tx_rx.ino
@@ -0,0 +1,109 @@
+#include "screen.h"
+#include "SerialModPIO.h"
+
+// using an RP2040 XIAO 
+// with earle philhower core 
+
+// "D10" - GPIO 3 
+#define PIN_DEBUG 3 
+
+#define PIN_RX 1
+#define PIN_TX 0 
+#define PIO_BAUD 5000000
+
+SerialModPIO serial(PIN_TX, PIN_RX);
+
+void setup(void){
+  pinMode(PIN_LED_B, OUTPUT);
+  digitalWrite(PIN_LED_B, LOW);
+
+  pinMode(PIN_DEBUG, OUTPUT);
+  digitalWrite(PIN_DEBUG, LOW);
+
+  // the display setup 
+  displaySetup();
+  displayPrint("bonjour...");
+
+  serial.begin(PIO_BAUD);
+}
+
+uint32_t lastUpdate = 0;
+uint32_t updateInterval = 1000;
+
+uint8_t expectedRx = 0;
+uint32_t missCount = 0;
+uint32_t catchCount = 0;
+
+uint8_t expected_miss = 0;
+uint8_t actual_miss = 0;
+
+uint8_t chars[256];
+
+uint8_t seqNum = 0;
+
+// and let's calculate actual bandwidth (success bytes / sec * 8) and bitrate ( * 10)
+// and record avg per-cycle pickup ? 
+
+uint32_t lastTx = 0;
+uint32_t txInterval = 0;
+float intervalEstimate = 0.0F;
+
+uint32_t lastTransmit = 0;
+uint32_t transmitInterval = 500;
+
+void loop(void){
+
+  // catch 'em AFAP 
+  if(serial.available()){
+    while(serial.available()){
+      int data = serial.read();
+      // earle core throws -1 if we have an error, 
+      if(data < 0) {
+        return;
+      }
+      // count total hits 
+      catchCount ++;
+      // stuff it 
+      chars[catchCount & 255] = data;
+      // check for missus 
+      if(data != expectedRx){
+        actual_miss = data;
+        expected_miss = expectedRx;
+        missCount ++;
+      }
+      expectedRx = data + 1;
+    }
+  }
+
+  // and write AFAP, recording outgoing times 
+  if(serial.availableForWrite()){
+  // if(lastTransmit + transmitInterval < micros()){
+    // lastTransmit = micros();
+    digitalWrite(PIN_DEBUG, !digitalRead(PIN_DEBUG));
+    serial.write(seqNum);
+    seqNum += 1;
+    uint32_t txTime = micros();
+    txInterval = txTime - lastTx;
+    lastTx = txTime;
+    // intervalEstimate = (float)txInterval * 0.01F + intervalEstimate * 0.99F;
+  }
+
+  // ... 
+  if(lastUpdate + updateInterval < millis()){
+    lastUpdate = millis();
+    digitalWrite(PIN_LED_B, !digitalRead(PIN_LED_B));
+    displayPrint(String(missCount) + " / " + String(catchCount) + " \n" + 
+    "miss: " + String((float)missCount / (float)catchCount, 9) + "\n" + 
+    String(expected_miss) + ": " + String(actual_miss) + "\n" +
+    "int: " + String(txInterval) + "\n" 
+    // "avg interval: " + String(intervalEstimate, 4)
+    // String(chars[0]) + ", " + String(chars[1]) + ", " + String(chars[2]) + ", " + String(chars[3]) + ", " + String(chars[4]) + ", " + String(chars[5]) + ", "
+    );
+
+    // displayPrint(spipi_print());
+    // displayPrint(String(rxCount) + "\n" + 
+    //   String(rxSize) 
+    // );
+  }
+}
+