tenstorrent · mywoodstock · Oct 23, 2024 · Oct 18, 2024
@@ -71,8 +71,7 @@ Tensor optimized_conv_new(const Tensor& a, const Tensor &b, std::optional<const
                 auto& a = input_tensors.at(0);
                 auto& b = input_tensors.at(1);
                 auto& bias = optional_input_tensors.at(0);
-                //TT_ASSERT(!untilize_out, "Optimized conv only supports tiled out");
-                TT_ASSERT(b.get_layout() == Layout::TILE); // Weights should already be formatted
+                TT_FATAL(b.get_layout() == Layout::TILE, "Weights should be in TILE layout."); // Weights should already be formatted
                 const auto& ashape = tt::tt_metal::LegacyShape(input_tensor_shape);
                 auto padded_a_shape = ttnn::Shape(std::array<uint32_t,4>{ashape[0], ashape[1], ashape[2], tt::round_up(ashape[3], 16)});
                 FormatParams input_a_format_params = {.pad_shape=padded_a_shape.value, .pad_value=0.0, .target_layout=Layout::ROW_MAJOR};
@@ -98,8 +97,8 @@ Tensor optimized_conv_new(const Tensor& a, const Tensor &b, std::optional<const
 void OptimizedConvNew::validate(const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors) const {
     const auto& input_tensor_a = input_tensors.at(0);
     const auto& input_tensor_b = input_tensors.at(1);
-    // TODO: ...
-    TT_FATAL(!input_tensor_b.memory_config().is_sharded(), "Error");
+    TT_FATAL(input_tensor_a.memory_config().is_sharded(), "Activation tensor should be sharded.");
+    TT_FATAL(!input_tensor_b.memory_config().is_sharded(), "Weights tensor should not be sharded.");
     if (this->untilize_out) {
         TT_FATAL((this->dtype == DataType::BFLOAT16) || (this->dtype == DataType::FLOAT32), "Error");
     }
@@ -213,7 +212,6 @@ operation::ProgramWithCallbacks OptimizedConvNew::create_program(const std::vect
     const auto& input_tensor_b = input_tensors.at(1);
     const auto& input_tensor_bias = optional_input_tensors.at(0);
     auto& output_tensor = output_tensors.at(0);
-    TT_ASSERT(input_tensor_a.memory_config().is_sharded()); // TODO: move this check to validate_input_tensors
     return multi_core_optimized_conv_sharded_v2_new(
         input_tensor_a, input_tensor_b, input_tensor_bias,
         sliding_window_config,

@@ -10,25 +10,25 @@ namespace tt_metal {
 
 template<PoolType pool>
 Tensor pool_2d(const Tensor& input, const MemoryConfig& memory_config, const std::optional<DataType>& output_dtype) {
-    TT_ASSERT(input.storage_type() == StorageType::DEVICE, "Input tensor needs to be on device");
+    TT_FATAL(input.storage_type() == StorageType::DEVICE, "Input tensor needs to be on device");
     auto input_shape = input.get_legacy_shape();
     switch (pool) {
         case PoolType::AVG: {
             uint32_t height_without_padding = input.get_logical_shape()[-2];
             return ttnn::sum(input, int(input_shape.rank() - 2), true, memory_config, std::nullopt, 1 / float(height_without_padding));
         }
         default:
-            TT_ASSERT(false && "Undefined pool type");
+            TT_THROW("Undefined pool type");
     }
 }
 
 Tensor avg_pool2d(const Tensor& input, const MemoryConfig& memory_config, const std::optional<DataType>& output_dtype) {
-    TT_ASSERT(input.storage_type() == StorageType::DEVICE, "Input tensor needs to be on device");
+    TT_FATAL(input.storage_type() == StorageType::DEVICE, "Input tensor needs to be on device");
     auto output = input;
 
     tt::tt_metal::LegacyShape in_shape = input.get_legacy_shape();
     auto input_padding = in_shape.padding();
-    TT_ASSERT(input_padding[1].front == 0 and input_padding[1].back == 0);
+    TT_FATAL(input_padding[1].front == 0 and input_padding[1].back == 0, "Padding along second dim is not supported");
     auto output_padding = Padding({input_padding[0], {0, 0}, {0, input_padding[2].back * in_shape[1]}, input_padding[3]}, input_padding.pad_value());
     auto output_shape = tt::tt_metal::LegacyShape({in_shape[0], 1, in_shape[1] * in_shape[2], in_shape[3]}, output_padding);
     output = output.reshape(output_shape);

@@ -74,7 +74,7 @@ MaxPool2D::MultiCore::cached_program_t max_pool_2d_multi_core_sharded_with_halo_
     const bool is_large_kernel = kernel_size_hw > MAX_SMALL_KERNEL_SIZE_HW;
     const bool is_wide_reduction = in_ntiles_c > MAX_TILES_PER_REDUCTION;
 
-    TT_ASSERT(nblocks == 1, "Multiple blocks not yet supported");
+    TT_FATAL(nblocks == 1, "Multiple blocks not yet supported");
 
     uint32_t tile_w = tt::constants::TILE_WIDTH;
     if (input_shape[3] < tt::constants::TILE_WIDTH) {
@@ -96,7 +96,7 @@ MaxPool2D::MultiCore::cached_program_t max_pool_2d_multi_core_sharded_with_halo_
     uint32_t ncores_w = grid_size.x;
 
     // TODO: support generic nblocks
-    TT_ASSERT(
+    TT_FATAL(
         out_nhw_per_core % nblocks == 0,
         "number of sticks per core ({}) should be divisible by nblocks ({})",
         out_nhw_per_core,

@@ -124,8 +124,8 @@ Tensor halo_op(const Tensor& input_tensor,
                 uint32_t reshard_num_cores_nhw,
                 MemoryConfig output_memory_config,
                 bool is_out_tiled) {
-    TT_ASSERT(input_tensor.memory_config().is_sharded());
-    TT_ASSERT(input_tensor.memory_config().memory_layout == TensorMemoryLayout::HEIGHT_SHARDED || input_tensor.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED || input_tensor.memory_config().memory_layout == TensorMemoryLayout::WIDTH_SHARDED);
+    TT_FATAL(input_tensor.memory_config().is_sharded(), "Halo expects sharded input tensor");
+    TT_FATAL(input_tensor.memory_config().memory_layout == TensorMemoryLayout::HEIGHT_SHARDED || input_tensor.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED || input_tensor.memory_config().memory_layout == TensorMemoryLayout::WIDTH_SHARDED, "Only height, width or block sharded tensors are supported.");
     // NOTE: for HEIGHT_SHARDED, ncores_nhw == ncores
     //       for BLOCK_SHARDED, ncores_nhw is just the ncores along height dim (last tensor dim is split along width)
     bool is_block_sharded = input_tensor.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED;

@@ -13,7 +13,7 @@ namespace tt::tt_metal {
 namespace utils {
 
 inline void init_neighbor_core_xy_mapping(CoreCoord grid_size, std::map<CoreCoord, CoreCoord>& left_neighbor_core, std::map<CoreCoord, CoreCoord>& right_neighbor_core, bool is_twod = false) {
-    TT_ASSERT((grid_size.x == 12 && grid_size.y == 9) || (grid_size.x == 8 && grid_size.y == 8) || (grid_size.x == 8 && grid_size.y == 7));
+    TT_FATAL((grid_size.x == 12 && grid_size.y == 9) || (grid_size.x == 8 && grid_size.y == 8) || (grid_size.x == 8 && grid_size.y == 7));
     if (is_twod) {
         // 2d decomposition case (block sharded)
         // left-right neighbors are calculated along the x dim