Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fabric] Support for routing planes #17777

Merged
merged 1 commit into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ void kernel_main() {

// do a noc multicast to tx kernels
uint64_t mcast_dest_addr = get_noc_addr_helper(mcast_encoding, tx_signal_addr);
noc_async_write_multicast_one_packet((uint32_t)mcast_sem, mcast_dest_addr, sizeof(uint32_t), num_mcast_dests);
noc_async_write_multicast_loopback_src((uint32_t)mcast_sem, mcast_dest_addr, sizeof(uint32_t), num_mcast_dests);
noc_async_writes_flushed();
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,8 @@ uint32_t max_packet_size_mask;

auto input_queue_state = select_input_queue<pkt_dest_size_choice>();
volatile local_pull_request_t *local_pull_request = (volatile local_pull_request_t *)(data_buffer_start_addr - 1024);
volatile tt_l1_ptr fabric_router_l1_config_t* routing_table =
reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(routing_table_start_addr);
volatile fabric_client_interface_t* client_interface = (volatile fabric_client_interface_t*)client_interface_addr;
volatile tt_l1_ptr fabric_router_l1_config_t* routing_table;
volatile fabric_client_interface_t* client_interface;

fvc_producer_state_t test_producer __attribute__((aligned(16)));
fvcc_inbound_state_t fvcc_test_producer __attribute__((aligned(16)));
Expand Down Expand Up @@ -385,15 +384,12 @@ bool test_buffer_handler() {
}

void kernel_main() {
tt_fabric_init();

uint32_t rt_args_idx = 0;
time_seed = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
src_endpoint_id = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
controller_noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t router_x = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t router_y = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t routing_plane = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
dest_device = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t rx_buf_size = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
gk_interface_addr_l = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
Expand All @@ -406,11 +402,6 @@ void kernel_main() {
target_address = base_target_address;
rx_addr_hi = base_target_address + rx_buf_size;

uint64_t router_config_addr =
NOC_XY_ADDR(NOC_X(router_x), NOC_Y(router_y), eth_l1_mem::address_map::FABRIC_ROUTER_CONFIG_BASE);
noc_async_read_one_packet(router_config_addr, routing_table_start_addr, sizeof(fabric_router_l1_config_t));
noc_async_read_barrier();

zero_l1_buf(test_results, test_results_size_bytes);
test_results[PQ_TEST_STATUS_INDEX] = PACKET_QUEUE_TEST_STARTED;
test_results[PQ_TEST_STATUS_INDEX+1] = (uint32_t) local_pull_request;
Expand All @@ -421,10 +412,6 @@ void kernel_main() {
zero_l1_buf(reinterpret_cast<tt_l1_ptr uint32_t*>(data_buffer_start_addr), data_buffer_size_words * PACKET_WORD_SIZE_BYTES);
zero_l1_buf((uint32_t*)local_pull_request, sizeof(local_pull_request_t));
zero_l1_buf((uint32_t*)&packet_header, sizeof(packet_header_t));
zero_l1_buf((uint32_t*)client_interface, sizeof(fabric_client_interface_t));
client_interface->gk_interface_addr = ((uint64_t)gk_interface_addr_h << 32) | gk_interface_addr_l;
client_interface->gk_msg_buf_addr =
(((uint64_t)gk_interface_addr_h << 32) | gk_interface_addr_l) + offsetof(gatekeeper_info_t, gk_msg_buf);

if constexpr (pkt_dest_size_choice == pkt_dest_size_choices_t::RANDOM) {
input_queue_state.init(src_endpoint_id, prng_seed);
Expand Down Expand Up @@ -474,8 +461,10 @@ void kernel_main() {
uint32_t curr_packet_words_sent = 0;
uint32_t packet_count = 0;

// make sure fabric node gatekeeper is available.
fabric_endpoint_init();
// initalize client
fabric_endpoint_init(client_interface_addr, gk_interface_addr_l, gk_interface_addr_h);
routing_table = reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(
client_interface->routing_tables_l1_offset + sizeof(fabric_router_l1_config_t) * routing_plane);

while (true) {
iter++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,8 @@ uint32_t max_packet_size_mask;

auto input_queue_state = select_input_queue<pkt_dest_size_choice>();
volatile local_pull_request_t* local_pull_request = (volatile local_pull_request_t*)(data_buffer_start_addr - 1024);
volatile tt_l1_ptr fabric_router_l1_config_t* routing_table =
reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(routing_table_start_addr);
volatile fabric_client_interface_t* client_interface = (volatile fabric_client_interface_t*)client_interface_addr;
volatile tt_l1_ptr fabric_router_l1_config_t* routing_table;
volatile fabric_client_interface_t* client_interface;
volatile tt_l1_ptr chan_req_buf* client_pull_req_buf =
reinterpret_cast<tt_l1_ptr chan_req_buf*>(client_pull_req_buf_addr);

Expand Down Expand Up @@ -328,24 +327,16 @@ bool test_buffer_handler(socket_handle_t* socket_handle) {
}

void kernel_main() {
tt_fabric_init();

// TODO: refactor
src_endpoint_id = get_arg_val<uint32_t>(0);
noc_offset = get_arg_val<uint32_t>(1);
uint32_t router_x = get_arg_val<uint32_t>(2);
uint32_t router_y = get_arg_val<uint32_t>(3);
dest_device = get_arg_val<uint32_t>(4);
uint32_t routing_plane = get_arg_val<uint32_t>(2);
dest_device = get_arg_val<uint32_t>(3);

if (ASYNC_WR == test_command) {
target_address = get_arg_val<uint32_t>(5);
}

uint64_t router_config_addr = NOC_XY_ADDR(router_x, router_y, eth_l1_mem::address_map::FABRIC_ROUTER_CONFIG_BASE);
noc_async_read_one_packet(
router_config_addr, routing_table_start_addr, sizeof(tt::tt_fabric::fabric_router_l1_config_t));
noc_async_read_barrier();

zero_l1_buf(test_results, test_results_size_bytes);
test_results[PQ_TEST_STATUS_INDEX] = PACKET_QUEUE_TEST_STARTED;
test_results[PQ_TEST_STATUS_INDEX + 1] = (uint32_t)local_pull_request;
Expand All @@ -357,15 +348,15 @@ void kernel_main() {
reinterpret_cast<tt_l1_ptr uint32_t*>(data_buffer_start_addr), data_buffer_size_words * PACKET_WORD_SIZE_BYTES);
zero_l1_buf((uint32_t*)local_pull_request, sizeof(local_pull_request_t));
zero_l1_buf((uint32_t*)&packet_header, sizeof(packet_header_t));
zero_l1_buf((uint32_t*)client_interface, sizeof(fabric_client_interface_t));

// initalize client
fabric_endpoint_init(client_interface_addr, gk_interface_addr_l, gk_interface_addr_h);
routing_table = reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(
client_interface->routing_tables_l1_offset + sizeof(fabric_router_l1_config_t) * routing_plane);

zero_l1_buf((uint32_t*)client_pull_req_buf, sizeof(chan_req_buf));
client_interface->gk_interface_addr = ((uint64_t)gk_interface_addr_h << 32) | gk_interface_addr_l;
client_interface->gk_msg_buf_addr = client_interface->gk_interface_addr + offsetof(gatekeeper_info_t, gk_msg_buf);
client_interface->pull_req_buf_addr = xy_local_addr | client_pull_req_buf_addr;

// make sure fabric node gatekeeper is available.
fabric_endpoint_init();

if constexpr (pkt_dest_size_choice == pkt_dest_size_choices_t::RANDOM) {
input_queue_state.init(src_endpoint_id, prng_seed);
} else if constexpr (pkt_dest_size_choice == pkt_dest_size_choices_t::SAME_START_RNDROBIN_FIX_SIZE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ constexpr uint32_t dest_endpoint_start_id = get_compile_time_arg_val(2);
constexpr uint32_t data_buffer_start_addr = get_compile_time_arg_val(3);
constexpr uint32_t data_buffer_size_words = get_compile_time_arg_val(4);

constexpr uint32_t routing_table_start_addr = get_compile_time_arg_val(5);

constexpr uint32_t test_results_addr_arg = get_compile_time_arg_val(6);
constexpr uint32_t test_results_size_bytes = get_compile_time_arg_val(7);

Expand Down Expand Up @@ -53,7 +51,7 @@ uint32_t base_target_address = get_compile_time_arg_val(17);

// atomic increment for the ATOMIC_INC command
constexpr uint32_t atomic_increment = get_compile_time_arg_val(18);
// constexpr uint32_t dest_device = get_compile_time_arg_val(21);

uint32_t dest_device;

constexpr uint32_t signal_address = get_compile_time_arg_val(19);
Expand All @@ -65,10 +63,7 @@ constexpr uint32_t w_depth = get_compile_time_arg_val(25);
constexpr uint32_t n_depth = get_compile_time_arg_val(26);
constexpr uint32_t s_depth = get_compile_time_arg_val(27);

volatile local_pull_request_t* local_pull_request = (volatile local_pull_request_t*)(data_buffer_start_addr - 1024);
volatile tt_l1_ptr fabric_router_l1_config_t* routing_table =
reinterpret_cast<tt_l1_ptr fabric_router_l1_config_t*>(routing_table_start_addr);
volatile fabric_client_interface_t* client_interface = (volatile fabric_client_interface_t*)client_interface_addr;
volatile fabric_client_interface_t* client_interface;

uint64_t xy_local_addr;
uint32_t target_address;
Expand All @@ -94,15 +89,12 @@ inline void notify_traffic_controller() {
}

void kernel_main() {
tt_fabric_init();

uint32_t rt_args_idx = 0;
time_seed = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
src_endpoint_id = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
controller_noc_offset = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t router_x = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t router_y = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t routing_plane = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
dest_device = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
uint32_t rx_buf_size = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
gk_interface_addr_l = get_arg_val<uint32_t>(increment_arg_idx(rt_args_idx));
Expand All @@ -114,26 +106,13 @@ void kernel_main() {

target_address = base_target_address;

// Read in the routing table
uint64_t router_config_addr =
NOC_XY_ADDR(NOC_X(router_x), NOC_Y(router_y), eth_l1_mem::address_map::FABRIC_ROUTER_CONFIG_BASE);
noc_async_read_one_packet(router_config_addr, routing_table_start_addr, sizeof(fabric_router_l1_config_t));
noc_async_read_barrier();

zero_l1_buf(test_results, test_results_size_bytes);
test_results[PQ_TEST_STATUS_INDEX] = PACKET_QUEUE_TEST_STARTED;
test_results[PQ_TEST_STATUS_INDEX + 1] = (uint32_t)local_pull_request;

test_results[PQ_TEST_MISC_INDEX] = 0xff000000;
test_results[PQ_TEST_MISC_INDEX + 1] = 0xcc000000 | src_endpoint_id;

zero_l1_buf(
reinterpret_cast<tt_l1_ptr uint32_t*>(data_buffer_start_addr), data_buffer_size_words * PACKET_WORD_SIZE_BYTES);
zero_l1_buf((uint32_t*)local_pull_request, sizeof(local_pull_request_t));
zero_l1_buf((uint32_t*)client_interface, sizeof(fabric_client_interface_t));
client_interface->gk_interface_addr = ((uint64_t)gk_interface_addr_h << 32) | gk_interface_addr_l;
client_interface->gk_msg_buf_addr =
(((uint64_t)gk_interface_addr_h << 32) | gk_interface_addr_l) + offsetof(gatekeeper_info_t, gk_msg_buf);

uint64_t data_words_sent = 0;
uint32_t packet_count = 0;
Expand All @@ -160,8 +139,8 @@ void kernel_main() {
);
}

// make sure fabric node gatekeeper is available.
fabric_endpoint_init();
// initalize client
fabric_endpoint_init(client_interface_addr, gk_interface_addr_l, gk_interface_addr_h);

// notify the controller kernel that this worker is ready to proceed
notify_traffic_controller();
Expand All @@ -171,17 +150,18 @@ void kernel_main() {
// all tx workers are ready to send data
while (*(volatile tt_l1_ptr uint32_t*)signal_address == 0);

uint64_t start_timestamp = get_timestamp();
fabric_setup_pull_request(
data_buffer_start_addr, // source address in sender’s memory
max_packet_size_words * 16 // number of bytes to write to remote destination
);

uint64_t start_timestamp = get_timestamp();

while (true) {
client_interface->local_pull_request.pull_request.words_read = 0;
if constexpr (mcast_data) {
fabric_async_write_multicast<ASYNC_WR_SEND>(
0, // the network plane to use for this transaction
routing_plane, // the network plane to use for this transaction
data_buffer_start_addr, // source address in sender’s memory
dest_device >> 16,
dest_device & 0xFFFF,
Expand All @@ -190,11 +170,10 @@ void kernel_main() {
e_depth,
w_depth,
n_depth,
s_depth
);
s_depth);
} else {
fabric_async_write<ASYNC_WR_SEND>(
0, // the network plane to use for this transaction
routing_plane, // the network plane to use for this transaction
data_buffer_start_addr, // source address in sender’s memory
dest_device >> 16,
dest_device & 0xFFFF,
Expand Down
Loading
Loading