Skip to content

Commit

Permalink
[UMD] Switch pci_cores and dram_cores to CoreCoord api. (#17620)
Browse files Browse the repository at this point in the history
### Ticket
Related to #17002

### Problem description
Switched .pci_cores and .dram_cores to new API.

### What's changed
- Changed .pci_cores and .dram_cores to get_cores
- Changed old get_core_for_dram_channel with new
get_dram_core_for_channel

### Checklist
- [x] All post-commit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543882883
- [x] Blackhole post-commit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543884998
- [x] (Single-card) Model perf tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543887484
- [ ] (Single-card) Device perf regressions :
https://github.com/tenstorrent/tt-metal/actions/runs/13543889644
- [ ] (T3K) T3000 unit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543891837
- [ ] (T3K) T3000 demo tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543893764
- [x] (TG) TG unit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543895516
- [ ] (TG) TG demo tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543897570
- [x] (TGG) TGG unit tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543899812
- [x] (TGG) TGG demo tests :
https://github.com/tenstorrent/tt-metal/actions/runs/13543902321
  • Loading branch information
broskoTT authored Feb 26, 2025
1 parent d94c244 commit da63d50
Show file tree
Hide file tree
Showing 9 changed files with 30 additions and 54 deletions.
5 changes: 3 additions & 2 deletions tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,9 @@ DeviceData::DeviceData(
this->amt_written = 0;

const metal_SocDescriptor& soc_d = tt::Cluster::instance().get_soc_desc(device->id());
const std::vector<CoreCoord>& pcie_cores = soc_d.get_pcie_cores();
for (CoreCoord core : pcie_cores) {
const std::vector<tt::umd::CoreCoord>& pcie_cores = soc_d.get_cores(CoreType::PCIE, soc_d.get_umd_coord_system());
for (const CoreCoord& core_coord : pcie_cores) {
CoreCoord core = {core_coord.x, core_coord.y};
// TODO: make this all work w/ phys coords
// this is really annoying
// the PCIE phys core conflicts w/ worker logical cores
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,15 @@ int main(int argc, char** argv) {
case 0:
default: {
src_mem = "FROM_PCIE";
vector<CoreCoord> pcie_cores = soc_d.get_pcie_cores();
vector<tt::umd::CoreCoord> pcie_cores = soc_d.get_cores(CoreType::PCIE, soc_d.get_umd_coord_system());
TT_ASSERT(pcie_cores.size() > 0);
noc_addr_x = pcie_cores[0].x;
noc_addr_y = pcie_cores[0].y;
noc_mem_addr = dev_pcie_base + pcie_offset;
} break;
case 1: {
src_mem = "FROM_DRAM";
vector<CoreCoord> dram_cores = soc_d.get_dram_cores();
vector<tt::umd::CoreCoord> dram_cores = soc_d.get_cores(CoreType::DRAM, soc_d.get_umd_coord_system());
TT_ASSERT(dram_cores.size() > dram_channel_g);
noc_addr_x = dram_cores[dram_channel_g].x;
noc_addr_y = dram_cores[dram_channel_g].y;
Expand Down
6 changes: 0 additions & 6 deletions tt_metal/api/tt-metalium/metal_soc_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ struct metal_SocDescriptor : public tt_SocDescriptor {
size_t get_channel_for_dram_view(int dram_view) const;
size_t get_num_dram_views() const;

const std::vector<CoreCoord>& get_pcie_cores() const;
const std::vector<CoreCoord> get_dram_cores() const;

int get_dram_channel_from_logical_core(const CoreCoord& logical_coord) const;

CoreCoord get_physical_ethernet_core_from_logical(const CoreCoord& logical_coord) const;
Expand All @@ -62,7 +59,4 @@ struct metal_SocDescriptor : public tt_SocDescriptor {
void load_dram_metadata_from_device_descriptor();
void generate_logical_eth_coords_mapping();
void generate_physical_routing_to_profiler_flat_id();
// This is temporary until virtual coordinates are enabled because BH chips on
// different cards use different physical PCIe NoC endpoints
void update_pcie_cores(const BoardType& board_type);
};
33 changes: 0 additions & 33 deletions tt_metal/common/metal_soc_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,6 @@ size_t metal_SocDescriptor::get_channel_for_dram_view(int dram_view) const {

size_t metal_SocDescriptor::get_num_dram_views() const { return this->dram_view_eth_cores.size(); }

const std::vector<CoreCoord>& metal_SocDescriptor::get_pcie_cores() const { return this->pcie_cores; }

const std::vector<CoreCoord> metal_SocDescriptor::get_dram_cores() const {
std::vector<CoreCoord> cores;

// This is inefficient, but is currently not used in a perf path
for (const auto& channel_it : this->dram_cores) {
for (const auto& core_it : channel_it) {
cores.push_back(core_it);
}
}

return cores;
}

int metal_SocDescriptor::get_dram_channel_from_logical_core(const CoreCoord& logical_coord) const {
const uint32_t num_dram_views = this->get_num_dram_views();
TT_FATAL(
Expand Down Expand Up @@ -205,23 +190,6 @@ void metal_SocDescriptor::generate_physical_routing_to_profiler_flat_id() {
#endif
}

// TODO: This should be deleted once we switch to virtual coordinates
void metal_SocDescriptor::update_pcie_cores(const BoardType& board_type) {
if (this->arch != tt::ARCH::BLACKHOLE) {
return;
}
switch (board_type) {
case P100:
case UNKNOWN: { // Workaround for BHs running FW that does not return board type in the cluster yaml
this->pcie_cores = {CoreCoord(11, 0)};
} break;
case P150A: {
this->pcie_cores = {CoreCoord(2, 0)};
} break;
default: TT_THROW("Need to update PCIe core assignment for new Blackhole type, file issue to abhullar");
}
}

// UMD initializes and owns tt_SocDescriptor
// For architectures with translation tables enabled, UMD will remove the last x rows from the descriptors in
// tt_SocDescriptor (workers list and worker_log_to_routing_x/y maps) This creates a virtual coordinate system, where
Expand All @@ -235,5 +203,4 @@ metal_SocDescriptor::metal_SocDescriptor(const tt_SocDescriptor& other, const Bo
this->load_dram_metadata_from_device_descriptor();
this->generate_logical_eth_coords_mapping();
this->generate_physical_routing_to_profiler_flat_id();
this->update_pcie_cores(board_type);
}
10 changes: 5 additions & 5 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -597,14 +597,14 @@ void Device::initialize_and_launch_firmware() {
core_info->noc_dram_addr_base = 0;
core_info->noc_dram_addr_end = soc_d.dram_core_size;

const std::vector<CoreCoord> &pcie_cores = soc_d.get_pcie_cores();
const std::vector<CoreCoord> &dram_cores = soc_d.get_dram_cores();
const std::vector<tt::umd::CoreCoord>& pcie_cores = soc_d.get_cores(CoreType::PCIE, soc_d.get_umd_coord_system());
const std::vector<tt::umd::CoreCoord>& dram_cores = soc_d.get_cores(CoreType::DRAM, soc_d.get_umd_coord_system());
const std::vector<tt::umd::CoreCoord>& eth_cores = soc_d.get_cores(CoreType::ETH, CoordSystem::PHYSICAL);
// The SOC descriptor can list a dram core multiple times, depending on how GDDR is assigned to banks
// Get a list of unique DRAM cores.
std::unordered_set<CoreCoord> unique_dram_cores(dram_cores.begin(), dram_cores.end());
TT_ASSERT(
pcie_cores.size() + unique_dram_cores.size() + eth_cores.size() <= MAX_NON_WORKER_CORES,
pcie_cores.size() + dram_cores.size() + eth_cores.size() <= MAX_NON_WORKER_CORES,
"Detected more pcie/dram/eth cores than fit in the device mailbox.");
TT_ASSERT(
eth_cores.size() <= MAX_VIRTUAL_NON_WORKER_CORES,
Expand All @@ -617,10 +617,10 @@ void Device::initialize_and_launch_firmware() {
}

int non_worker_cores_idx = 0;
for (const CoreCoord &core : pcie_cores) {
for (const tt::umd::CoreCoord& core : pcie_cores) {
core_info->non_worker_cores[non_worker_cores_idx++] = {core.x, core.y, AddressableCoreType::PCIE};
}
for (const CoreCoord &core : unique_dram_cores) {
for (const tt::umd::CoreCoord& core : dram_cores) {
core_info->non_worker_cores[non_worker_cores_idx++] = {core.x, core.y, AddressableCoreType::DRAM};
}
for (const tt::umd::CoreCoord& core : eth_cores) {
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/jit_build/build_env_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ std::map<std::string, std::string> initialize_device_kernel_defines(chip_id_t de

// TODO (abhullar): Until we switch to virtual coordinates, we need to pass physical PCIe coordinates to device
// because Blackhole PCIe endpoint is dependent on board type
auto pcie_cores = soc_d.get_pcie_cores();
auto pcie_cores = soc_d.get_cores(CoreType::PCIE, soc_d.get_umd_coord_system());
CoreCoord pcie_core = pcie_cores.empty() ? soc_d.grid_size : pcie_cores[0];

device_kernel_defines.emplace("PCIE_NOC_X", std::to_string(pcie_core.x));
Expand Down
14 changes: 12 additions & 2 deletions tt_metal/llrt/sanitize_noc_host.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ namespace tt {
((((a) >= HAL_MEM_ETH_BASE) && ((a) + (l) <= HAL_MEM_ETH_BASE + HAL_MEM_ETH_SIZE)) || \
(DEBUG_VALID_REG_ADDR(a) && (l) == 4))

static bool coord_found_p(std::vector<tt::umd::CoreCoord> coords, CoreCoord core) {
for (const tt::umd::CoreCoord& core_coord : coords) {
CoreCoord item = {core_coord.x, core_coord.y};
if (item == core) {
return true;
}
}
return false;
}

static bool coord_found_p(std::vector<CoreCoord> coords, CoreCoord core) {
for (CoreCoord item : coords) {
if (item == core) {
Expand Down Expand Up @@ -68,9 +78,9 @@ static void watcher_sanitize_host_noc(
const CoreCoord& core,
uint64_t addr,
uint32_t lbytes) {
if (coord_found_p(soc_d.get_pcie_cores(), core)) {
if (coord_found_p(soc_d.get_cores(CoreType::PCIE, soc_d.get_umd_coord_system()), core)) {
TT_THROW("Host watcher: bad {} NOC coord {}", what, core.str());
} else if (coord_found_p(soc_d.get_dram_cores(), core)) {
} else if (coord_found_p(soc_d.get_cores(CoreType::DRAM, soc_d.get_umd_coord_system()), core)) {
uint64_t dram_addr_base = 0;
uint64_t dram_addr_size = soc_d.dram_core_size;
uint64_t dram_addr_end = dram_addr_size - dram_addr_base;
Expand Down
8 changes: 6 additions & 2 deletions tt_metal/llrt/tt_cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,9 @@ void Cluster::write_dram_vec(std::vector<uint32_t> &vec, tt_target_dram dram, ui
TT_ASSERT(
d_subchannel < desc_to_use.dram_cores.at(d_chan).size(),
"Trying to address dram sub channel that doesnt exist in the device descriptor");
tt_cxy_pair dram_core = tt_cxy_pair(chip_id, desc_to_use.get_core_for_dram_channel(d_chan, d_subchannel));
tt::umd::CoreCoord dram_core_coord =
desc_to_use.get_dram_core_for_channel(d_chan, d_subchannel, CoordSystem::VIRTUAL);
tt_cxy_pair dram_core = tt_cxy_pair(chip_id, dram_core_coord.x, dram_core_coord.y);
size_t offset = desc_to_use.get_address_offset(d_view);
write_core(vec.data(), vec.size() * sizeof(uint32_t), dram_core, addr + offset, small_access);
}
Expand All @@ -531,7 +533,9 @@ void Cluster::read_dram_vec(
TT_ASSERT(
d_subchannel < desc_to_use.dram_cores.at(d_chan).size(),
"Trying to address dram sub channel that doesnt exist in the device descriptor");
tt_cxy_pair dram_core = tt_cxy_pair(chip_id, desc_to_use.get_core_for_dram_channel(d_chan, d_subchannel));
tt::umd::CoreCoord dram_core_coord =
desc_to_use.get_dram_core_for_channel(d_chan, d_subchannel, CoordSystem::VIRTUAL);
tt_cxy_pair dram_core = tt_cxy_pair(chip_id, dram_core_coord.x, dram_core_coord.y);
size_t offset = desc_to_use.get_address_offset(d_view);
read_core(vec, sz_in_bytes, dram_core, addr + offset, small_access);
}
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/third_party/umd
Submodule umd updated 36 files
+1 −2 .github/workflows/build-and-run-all-tests.yml
+6 −2 device/api/umd/device/blackhole_implementation.h
+11 −0 device/api/umd/device/chip/chip.h
+9 −0 device/api/umd/device/chip/local_chip.h
+37 −41 device/api/umd/device/cluster.h
+1 −1 device/api/umd/device/coordinate_manager.h
+14 −0 device/api/umd/device/tt_cluster_descriptor.h
+2 −0 device/api/umd/device/tt_device/blackhole_tt_device.h
+5 −0 device/api/umd/device/tt_device/tt_device.h
+2 −0 device/api/umd/device/tt_simulation_device.h
+9 −4 device/api/umd/device/tt_soc_descriptor.h
+175 −0 device/api/umd/device/types/blackhole_eth.h
+4 −1 device/api/umd/device/types/blackhole_telemetry.h
+71 −2 device/api/umd/device/types/cluster_descriptor_types.h
+4 −1 device/blackhole/blackhole_coordinate_manager.cpp
+5 −14 device/blackhole/blackhole_implementation.cpp
+11 −0 device/chip/chip.cpp
+63 −0 device/chip/local_chip.cpp
+208 −97 device/cluster.cpp
+2 −2 device/coordinate_manager.cpp
+5 −0 device/mockup/tt_mockup_device.hpp
+4 −0 device/simulation/tt_simulation_device.cpp
+106 −3 device/tt_cluster_descriptor.cpp
+33 −3 device/tt_device/blackhole_tt_device.cpp
+10 −0 device/tt_device/tt_device.cpp
+38 −23 device/tt_soc_descriptor.cpp
+7 −7 tests/api/test_cluster.cpp
+38 −0 tests/api/test_soc_descriptor.cpp
+1 −1 tests/blackhole/test_chip_info_bh.cpp
+4 −5 tests/blackhole/test_cluster_bh.cpp
+10 −13 tests/galaxy/test_umd_concurrent_threads.cpp
+2 −3 tests/galaxy/test_umd_remote_api.cpp
+72 −0 tests/soc_descs/blackhole_140_arch.yaml
+2 −2 tests/soc_descs/blackhole_140_arch_no_eth.yaml
+1 −1 tests/soc_descs/blackhole_140_arch_type2.yaml
+23 −7 tests/wormhole/test_cluster_wh.cpp

0 comments on commit da63d50

Please sign in to comment.