Skip to content

Commit

Permalink
Implement blackhole cluster discovery
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanevskiTT committed Feb 7, 2025
1 parent 3e6e656 commit 2e367b4
Show file tree
Hide file tree
Showing 15 changed files with 422 additions and 4 deletions.
1 change: 1 addition & 0 deletions device/api/umd/device/blackhole_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ constexpr uint32_t ARC_FW_INT_VAL = 65536;

constexpr uint32_t ARC_MSG_RESPONSE_OK_LIMIT = 240;

static const uint32_t SCRATCH_RAM_2 = 0x80030408;
static const uint32_t SCRATCH_RAM_12 = 0x80030430;
static const uint32_t SCRATCH_RAM_13 = 0x80030434;

Expand Down
11 changes: 11 additions & 0 deletions device/api/umd/device/chip/chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class Chip {
public:
Chip(tt_SocDescriptor soc_descriptor);

Chip(const ChipInfo chip_info, tt_SocDescriptor soc_descriptor);

virtual ~Chip() = default;

tt_SocDescriptor& get_soc_descriptor();
Expand All @@ -29,6 +31,8 @@ class Chip {

void set_barrier_address_params(const barrier_address_params& barrier_address_params_);

const ChipInfo& get_chip_info();

// TODO: This should be private, once enough stuff is moved inside chip.
// Probably also moved to LocalChip.
tt_device_dram_address_params dram_address_params;
Expand All @@ -40,7 +44,14 @@ class Chip {
private:
void set_default_params(ARCH arch);

ChipInfo chip_info_;

tt_SocDescriptor soc_descriptor_;

protected:
void wait_chip_to_be_ready();

virtual void wait_eth_cores_training(const uint32_t timeout_per_core_ms = 1000);
};

} // namespace tt::umd
9 changes: 9 additions & 0 deletions device/api/umd/device/chip/local_chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,20 @@ class LocalChip : public Chip {
public:
LocalChip(tt_SocDescriptor soc_descriptor, int pci_device_id);

LocalChip(std::unique_ptr<TTDevice> tt_device);

TTDevice* get_tt_device() override;

bool is_mmio_capable() const override;

private:
std::unique_ptr<TTDevice> tt_device_;

void initialize_local_chip();

void initialize_tlb_manager();

protected:
void wait_eth_cores_training(const uint32_t timeout_per_core_ms = 1000) override;
};
} // namespace tt::umd
4 changes: 4 additions & 0 deletions device/api/umd/device/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,7 @@ class Cluster : public tt_device {
virtual void dram_membar(
const chip_id_t chip, const std::unordered_set<tt::umd::CoreCoord>& cores, const std::string& fallback_tlb);

static std::unique_ptr<tt_ClusterDescriptor> create_cluster_descriptor();
// Destructor
virtual ~Cluster();

Expand Down Expand Up @@ -1199,6 +1200,9 @@ class Cluster : public tt_device {
// coordinates. This is an internal helper function, until we switch the API to accept translated coordinates.
tt_xy_pair translate_chip_coord_virtual_to_translated(const chip_id_t chip_id, const tt_xy_pair core) const;

static std::unique_ptr<tt_ClusterDescriptor> create_cluster_descriptor(
const std::unordered_map<chip_id_t, std::unique_ptr<tt::umd::Chip>>& chips);

// State variables
std::vector<tt::ARCH> archs_in_cluster = {};
std::set<chip_id_t> all_chip_ids_ = {};
Expand Down
7 changes: 7 additions & 0 deletions device/api/umd/device/tt_cluster_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <unordered_set>
#include <vector>

#include "umd/device/chip/chip.h"
#include "umd/device/cluster.h"
#include "umd/device/tt_xy_pair.h"
#include "umd/device/types/arch.h"
#include "umd/device/types/cluster_descriptor_types.h"
Expand All @@ -25,6 +27,8 @@ class Node;
}

class tt_ClusterDescriptor {
friend class tt::umd::Cluster;

private:
tt_ClusterDescriptor() = default;

Expand All @@ -45,6 +49,7 @@ class tt_ClusterDescriptor {
std::unordered_map<chip_id_t, BoardType> chip_board_type = {};
std::unordered_map<chip_id_t, std::unordered_set<chip_id_t>> chips_grouped_by_closest_mmio;
std::unordered_map<chip_id_t, tt::ARCH> chip_arch = {};
std::map<ChipUID, chip_id_t> chip_uid_to_chip_id = {};

// one-to-many chip connections
struct Chip2ChipConnection {
Expand Down Expand Up @@ -110,6 +115,8 @@ class tt_ClusterDescriptor {
BoardType get_board_type(chip_id_t chip_id) const;
tt::ARCH get_arch(chip_id_t chip_id) const;

chip_id_t get_chip_id(const ChipUID &chip_uid) const;

bool ethernet_core_has_active_ethernet_link(chip_id_t local_chip, ethernet_channel_t local_ethernet_channel) const;
std::tuple<chip_id_t, ethernet_channel_t> get_chip_and_channel_of_remote_ethernet_core(
chip_id_t local_chip, ethernet_channel_t local_ethernet_channel) const;
Expand Down
2 changes: 2 additions & 0 deletions device/api/umd/device/tt_device/blackhole_tt_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class BlackholeTTDevice : public TTDevice {

ChipInfo get_chip_info() override;

void wait_arc_core_start(const tt_xy_pair arc_core, const uint32_t timeout_ms = 1000) override;

private:
static constexpr uint64_t ATU_OFFSET_IN_BH_BAR2 = 0x1200;
std::set<size_t> iatu_regions_;
Expand Down
2 changes: 2 additions & 0 deletions device/api/umd/device/tt_device/tt_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ class TTDevice {

virtual ChipInfo get_chip_info() = 0;

virtual void wait_arc_core_start(const tt_xy_pair arc_core, const uint32_t timeout_ms = 1000);

protected:
std::unique_ptr<PCIDevice> pci_device_;
std::unique_ptr<architecture_implementation> architecture_impl_;
Expand Down
175 changes: 175 additions & 0 deletions device/api/umd/device/types/blackhole_eth.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
* SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc.
*
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include "umd/device/types/cluster_descriptor_types.h"

namespace tt::umd {

namespace blackhole {

static constexpr uint32_t POSTCODE_ETH_INIT_SKIP = 0xC0DE0000;
static constexpr uint32_t POSTCODE_ETH_INIT_SERDES = 0xC0DE1000;
static constexpr uint32_t POSTCODE_ETH_INIT_ETH_CTRL = 0xC0DE2000;
static constexpr uint32_t POSTCODE_ETH_INIT_MACPCS = 0xC0DE3000;
static constexpr uint32_t POSTCODE_ETH_INIT_PACKET = 0xC0DE4000;
static constexpr uint32_t POSTCODE_ETH_INIT_PASS = 0xC0DEA000;
static constexpr uint32_t POSTCODE_ETH_INIT_FAIL = 0xC0DEB000;
static constexpr uint32_t POSTCODE_ETH_INIT_CODE_NOT_FOUND = 0xC0DEFFFF;

static constexpr uint32_t NUM_SERDES_LANES = 8;

typedef enum {
LINK_TRAIN_TRAINING,
LINK_TRAIN_SKIP,
LINK_TRAIN_PASS,
LINK_TRAIN_INT_LB,
LINK_TRAIN_EXT_LB,
LINK_TRAIN_TIMEOUT_MANUAL_EQ,
LINK_TRAIN_TIMEOUT_ANLT,
LINK_TRAIN_TIMEOUT_CDR_LOCK,
LINK_TRAIN_TIMEOUT_BIST_LOCK,
LINK_TRAIN_TIMEOUT_LINK_UP,
LINK_TRAIN_TIMEOUT_CHIP_INFO,
} link_train_status_e;

typedef enum {
PORT_UNKNOWN,
PORT_UP,
PORT_DOWN,
PORT_UNUSED,
} port_status_e;

struct fw_version_t {
uint32_t patch : 8;
uint32_t minor : 8;
uint32_t major : 8;
uint32_t unused : 8;
};

struct chip_info_t {
uint8_t pcb_type; // 0
uint8_t asic_location;
uint8_t eth_id;
uint8_t logical_eth_id;
uint32_t board_id_hi; // 1
uint32_t board_id_lo; // 2
uint32_t mac_addr_org; // 3
uint32_t mac_addr_id; // 4
uint32_t spare[2]; // 5-6
uint32_t ack; // 7

ChipUID get_chip_uid() const {
ChipUID chip_uid;
chip_uid.board_id = ((uint64_t)board_id_hi << 32) | board_id_lo;
chip_uid.asic_location = asic_location;
return chip_uid;
}
};

struct serdes_rx_bist_results_t {
uint32_t bist_mode; // 0
uint32_t test_time; // 1
// test_time in cycles for bist mode 0 and ms for bist mode 1
uint32_t error_cnt_nt[NUM_SERDES_LANES]; // 2-9
uint32_t error_cnt_55t32_nt[NUM_SERDES_LANES]; // 10-17
uint32_t error_cnt_overflow_nt[NUM_SERDES_LANES]; // 18-25
};

struct eth_status_t {
// Basic status
uint32_t postcode; // 0
port_status_e port_status; // 1
link_train_status_e train_status; // 2
uint32_t train_speed; // 3 - Actual resulting speed from training

// Live status/retrain related
uint32_t retrain_count; // 4
uint32_t mac_pcs_errors; // 5
uint32_t corr_dw_hi; // 6
uint32_t corr_dw_lo; // 7
uint32_t uncorr_dw_hi; // 8
uint32_t uncorr_dw_lo; // 9
uint32_t frames_rxd_hi; // 10
uint32_t frames_rxd_lo; // 11
uint32_t bytes_rxd_hi; // 12
uint32_t bytes_rxd_lo; // 13

uint32_t spare[28 - 14]; // 14-27

// Heartbeat
uint32_t heartbeat[4]; // 28-31
};

struct serdes_results_t {
uint32_t postcode; // 0
uint32_t serdes_inst; // 1
uint32_t serdes_lane_mask; // 2
uint32_t target_speed; // 3 - Target speed from the boot params
uint32_t data_rate; // 4
uint32_t data_width; // 5
uint32_t spare_main[8 - 6]; // 6-7

// Training retries
uint32_t lt_retry_cnt; // 8
uint32_t spare[16 - 9]; // 9-15

// BIST
uint32_t bist_mode; // 16
uint32_t bist_test_time; // 17
// test_time in cycles for bist mode 0 and ms for bist mode 1
uint32_t bist_err_cnt_nt[NUM_SERDES_LANES]; // 18-25
uint32_t bist_err_cnt_55t32_nt[NUM_SERDES_LANES]; // 26-33
uint32_t bist_err_cnt_overflow_nt[NUM_SERDES_LANES]; // 34-41

uint32_t spare2[48 - 42]; // 42-47

// Training times
uint32_t man_eq_cmn_pstate_time; // 48
uint32_t man_eq_tx_ack_time; // 49
uint32_t man_eq_rx_ack_time; // 50
uint32_t man_eq_rx_iffsm_time; // 51
uint32_t man_eq_rx_eq_assert_time; // 52
uint32_t man_eq_rx_eq_deassert_time; // 53
uint32_t anlt_auto_neg_time; // 54
uint32_t anlt_link_train_time; // 55
uint32_t cdr_lock_time; // 56
uint32_t bist_lock_time; // 57

uint32_t spare_time[64 - 58]; // 58-63
};

struct macpcs_results_t {
uint32_t postcode; // 0

uint32_t spare[24 - 1]; // 1-23

// Training times
uint32_t link_up_time; // 24
uint32_t chip_info_time; // 25

uint32_t spare_time[32 - 26]; // 26-31
};

struct boot_results_t {
eth_status_t eth_status; // 0-31
serdes_results_t serdes_results; // 32 - 95
macpcs_results_t macpcs_results; // 96 - 127

uint32_t spare[238 - 128]; // 128 - 237

fw_version_t serdes_fw_ver; // 238
fw_version_t eth_fw_ver; // 239
chip_info_t local_info; // 240 - 247
chip_info_t remote_info; // 248 - 255
};

constexpr uint32_t BOOT_RESULTS_ADDR = 0x7CC00;

} // namespace blackhole

} // namespace tt::umd
8 changes: 8 additions & 0 deletions device/api/umd/device/types/cluster_descriptor_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ inline BoardType get_board_type_from_board_id(const uint64_t board_id) {
struct ChipUID {
uint64_t board_id;
uint8_t asic_location;

bool operator<(const ChipUID &other) const {
return std::tie(board_id, asic_location) < std::tie(other.board_id, other.asic_location);
}

bool const operator==(const ChipUID &other) const {
return board_id == other.board_id && asic_location == other.asic_location;
}
};

struct ChipInfo {
Expand Down
11 changes: 11 additions & 0 deletions device/chip/chip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ Chip::Chip(tt_SocDescriptor soc_descriptor) : soc_descriptor_(soc_descriptor) {
set_default_params(soc_descriptor.arch);
}

Chip::Chip(const ChipInfo chip_info, tt_SocDescriptor soc_descriptor) :
chip_info_(chip_info), soc_descriptor_(soc_descriptor) {
set_default_params(soc_descriptor.arch);
}

tt_SocDescriptor& Chip::get_soc_descriptor() { return soc_descriptor_; }

TTDevice* Chip::get_tt_device() { return nullptr; }
Expand Down Expand Up @@ -44,4 +49,10 @@ void Chip::set_barrier_address_params(const barrier_address_params& barrier_addr
dram_address_params.DRAM_BARRIER_BASE = barrier_address_params_.dram_barrier_base;
}

const ChipInfo& Chip::get_chip_info() { return chip_info_; }

void Chip::wait_chip_to_be_ready() { wait_eth_cores_training(); }

void Chip::wait_eth_cores_training(const uint32_t timeout_per_core) {}

} // namespace tt::umd
Loading

0 comments on commit 2e367b4

Please sign in to comment.