Skip to content

Commit

Permalink
Fix pulse timing and initial state of error flag
Browse files Browse the repository at this point in the history
  • Loading branch information
joeyparrish committed Aug 31, 2024
1 parent 9ab2e87 commit 61d6917
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 35 deletions.
22 changes: 22 additions & 0 deletions firmware/fast-gpio.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,36 @@
#endif


// Without the nops, these pulses happen in about 16ns, but the voltage change
// from the GPIO pins (~8ns/V over 3.3V) is much slower than the CPU speed
// (125MHz or 8ns/cycle). So without nops, at top speed, this signal would
// bounce between ~2V and ~1V, never reaching high (>= 2.3V) or low (<= 0.8V).

// One nop after each pin change adds roughly doubles the duration to 32ns.
// With this timing, the voltage ranges from ~2.4V to ~0.5V. It spends about
// 4ns in the "high" zone and about 6ns in the "low" zone. Our minimum pulse
// widths for most ICs are around 7ns (74AHC74 clear/set, 74LV163A clock,
// 74LV164 clock), so this is not long enough in stable states.

// With two nops, the pin tends to stay in the right range for 12-16ns,
// reaching all the way to VCC and 0V.

#define FAST_PULSE_ACTIVE_LOW(PIN) { \
FAST_CLEAR(PIN); \
asm("nop"); \
asm("nop"); \
FAST_SET(PIN); \
asm("nop"); \
asm("nop"); \
}

#define FAST_PULSE_ACTIVE_HIGH(PIN) { \
FAST_SET(PIN); \
asm("nop"); \
asm("nop"); \
FAST_CLEAR(PIN); \
asm("nop"); \
asm("nop"); \
}

#define FAST_WRITE(PIN, VALUE) { \
Expand Down
4 changes: 1 addition & 3 deletions firmware/firmware.ino
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,7 @@ void setup() {
init_all_hardware();

#ifdef RUN_TESTS
if (!is_error_flagged()) {
run_tests();
}
run_tests();
#endif

// Allow the second core to start its loop.
Expand Down
10 changes: 8 additions & 2 deletions firmware/registers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,13 @@ int is_error_flagged() {
uint8_t read_register(int register_address) {
FAST_WRITE(REG_PIN__A0, register_address & 1);
FAST_WRITE(REG_PIN__A1, register_address & 2);

// FIXME: need to delay here?
// The data sheet for the 74HC670 says the data will be available after at
// most 295ns at 2V, and 59ns at 4.5V. (Not given for 3.3V, no curve
// graphed.) Here we wait 1 us to be sure that we are reading the right
// data. It is okay to read a register a little more slowly. Writing to
// SRAM and reading from the network are the critical operations in terms of
// speed.
// https://www.ti.com/lit/ds/symlink/cd74hc670.pdf
delayMicroseconds(1);
return FAST_READ_MULTIPLE(REG_PIN__D_MASK, REG_PIN__D_SHIFT);
}
82 changes: 52 additions & 30 deletions firmware/speed-tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <Arduino.h>
#include <HardwareSerial.h>

#include "fast-gpio.h"
#include "http.h"
#include "registers.h"
#include "sram.h"
Expand All @@ -22,46 +23,58 @@
// A safe buffer size for these tests.
#define BUFFER_SIZE 100 * 1024

static long test_sram_speed(const uint8_t* buffer, int bytes) {
// 100kB: ~83ms
// 1MB: ~830ms
// 3s video+audio: ~731ms
// Explicitly unrolled loop for 10 repeated statements.
#define X10(a) { a; a; a; a; a; a; a; a; a; a; }
// Explicitly unrolled loop for 100 repeated statements.
#define X100(a) { X10(X10(a)); }
// Explicitly unrolled loop for 1k repeated statements.
#define X1k(a) { X10(X10(X10(a))); }
// A partially unrolled loop that minimizes time spent on incrementing and
// checking, while not exploding the program size to the point that it slows
// down execution (10k/100k unrolled) or overruns the available program space
// (1M unrolled).
#define X1M(a) { for (int i = 0; i < 1'000; ++i) { X1k(a); } }

static long test_fast_gpio_speed() {
// ~75 ns per pulse
long start = millis();
sram_start_bank(0);
sram_write(buffer, bytes);
sram_flush();
X1M(FAST_PULSE_ACTIVE_LOW(SYNC_PIN__CMD_CLEAR));
long end = millis();
return end - start;
}

static long test_sync_token_read_speed() {
// ~114 ns per read
// ~86 ns per read
long start = millis();
int count = 0;
for (int i = 0; i < 1000000; ++i) {
count += is_cmd_set();
}
X1M(is_cmd_set());
long end = millis();
return end - start;
}

static long test_sync_token_clear_speed() {
// ~114 ns per clear
// ~122 ns per clear
long start = millis();
for (int i = 0; i < 1000000; ++i) {
clear_cmd();
}
X1M(clear_cmd());
long end = millis();
return end - start;
}

static long test_register_read_speed() {
// ~228 ns per read
int count = 0;
// ~1543 ns per read
long start = millis();
for (int i = 0; i < 1000000; ++i) {
count += read_register(i & 3);
}
X1M(read_register(i & 3));
long end = millis();
return end - start;
}

static long test_sram_speed(const uint8_t* buffer, int bytes) {
// 100kB: ~116ms
// 1MB: ~1160ms
// 3s video+audio: ~1020ms
long start = millis();
sram_start_bank(0);
sram_write(buffer, bytes);
sram_flush();
long end = millis();
return end - start;
}
Expand Down Expand Up @@ -92,6 +105,11 @@ void run_tests() {
Serial.println("Failed to allocate buffer!");
while (true) { delay(1000); }
}
memset(buffer, 0x55, BUFFER_SIZE);

ms = test_fast_gpio_speed();
Serial.print(ms);
Serial.println(" ns avg per GPIO pulse."); // 1Mx pulses, ms => ns

ms = test_sync_token_read_speed();
Serial.print(ms);
Expand All @@ -111,15 +129,19 @@ void run_tests() {
Serial.print(BUFFER_SIZE);
Serial.println(" bytes to SRAM");

for (int i = 0; i < 10; i++) {
ms = test_download_speed(/* first_byte= */ i, ABOUT_3S_VIDEO_AUDIO_BYTES);
float bits = ABOUT_3S_VIDEO_AUDIO_BYTES * 8.0;
float seconds = ms / 1000.0;
float mbps = bits / seconds / 1024.0 / 1024.0;
Serial.print(ms);
Serial.print(" ms to stream ~3s video to SRAM (");
Serial.print(mbps);
Serial.println(" Mbps vs 2.50 Mbps minimum)");
if (is_error_flagged()) {
Serial.println("Error flagged, skipping network tests.");
} else {
for (int i = 0; i < 10; i++) {
ms = test_download_speed(/* first_byte= */ i, ABOUT_3S_VIDEO_AUDIO_BYTES);
float bits = ABOUT_3S_VIDEO_AUDIO_BYTES * 8.0;
float seconds = ms / 1000.0;
float mbps = bits / seconds / 1024.0 / 1024.0;
Serial.print(ms);
Serial.print(" ms to stream ~3s video to SRAM (");
Serial.print(mbps);
Serial.println(" Mbps vs 2.50 Mbps minimum)");
}
}

Serial.println("\n");
Expand Down
1 change: 1 addition & 0 deletions firmware/sram.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ static int active_bank_pin = -1;
// Explicitly unrolled loop for 16 bits of data.
#define X16(a) { a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; }
static inline void sram_write_word(uint16_t word_data) {
// ~20ns setup time from next data bit to rising edge of clock
X16(
FAST_WRITE(SRAM_PIN__DATA_NEXT_BIT, word_data & 0x8000);

Expand Down
3 changes: 3 additions & 0 deletions software/player/src/segavideo_menu.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,9 @@ static void loadMenuColors() {
void segavideo_menu_init() {
kprintf("segavideo_menu_init\n");

// Clear the error flag, which may boot up in a random state.
clearPendingError();

// Load menu palettes.
loadMenuColors();

Expand Down

0 comments on commit 61d6917

Please sign in to comment.