Skip to content

Commit

Permalink
#5044: Add optional output ,cq_id to addalpha
Browse files Browse the repository at this point in the history
  • Loading branch information
KalaivaniMCW committed Jun 6, 2024
1 parent 61f5729 commit ffc099b
Show file tree
Hide file tree
Showing 6 changed files with 830 additions and 246 deletions.
4 changes: 4 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/op_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,10 @@
"tt_op": tt_lib_ops.eltwise_addalpha,
"pytorch_op": pytorch_ops.addalpha,
},
"eltwise-addalpha-optional": {
"tt_op": tt_lib_ops.eltwise_addalpha_optional,
"pytorch_op": pytorch_ops.addalpha,
},
"lamb-optimizer": {
"tt_op": tt_lib_ops.lamb_optimizer,
"pytorch_op": pytorch_ops.lamb_optimizer,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

import pytest
import torch
from functools import partial
import tt_lib as ttl
import numpy as np


from tests.tt_eager.python_api_testing.sweep_tests import (
comparison_funcs,
generation_funcs,
)
from tests.tt_eager.python_api_testing.sweep_tests.run_pytorch_ci_tests import (
run_single_pytorch_test,
)
from models.utility_functions import is_wormhole_b0

shapes = [
[[1, 1, 32, 32], [1, 1, 32, 32]], # Single core
[[1, 1, 32, 32], [32, 1, 32, 32]], # Single core
[[64, 1, 32, 32], [1, 1, 32, 32]], # Single core
[[1, 1, 320, 384], [1, 1, 320, 384]], # Multi core
[[1, 3, 320, 384], [1, 3, 320, 384]], # Multi core
]

input_mem_cfgs = generation_funcs.supported_mem_configs
output_mem_cfgs = generation_funcs.supported_mem_configs

if is_wormhole_b0():
shapes = [
shapes[0],
]
input_mem_cfgs = [
input_mem_cfgs[0],
]


@pytest.mark.parametrize(
"input_shapes",
shapes,
)
@pytest.mark.parametrize("input_mem_config", input_mem_cfgs)
@pytest.mark.parametrize("output_mem_config", output_mem_cfgs)
@pytest.mark.parametrize("fn_kind", ["addalpha"])
def test_run_addalpha(
input_shapes,
fn_kind,
input_mem_config,
output_mem_config,
device,
function_level_defaults,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.float32)
] * len(input_shapes)
test_args = list(generation_funcs.gen_default_dtype_layout_device(input_shapes))[0]
test_args.update(
{
"input_mem_config": [input_mem_config, input_mem_config],
"output_mem_config": output_mem_config,
"alpha": np.random.randint(1, 100),
}
)
comparison_func = comparison_funcs.comp_pcc
run_single_pytorch_test(
f"eltwise-{fn_kind}",
input_shapes,
datagen_func,
comparison_func,
device,
test_args,
)


shapes_w_output = [
[[1, 1, 32, 32], [1, 1, 32, 32], [1, 1, 32, 32]], # Single core
[[1, 1, 32, 32], [32, 1, 32, 32], [32, 1, 32, 32]], # Single core
[[64, 1, 32, 32], [1, 1, 32, 32], [64, 1, 32, 32]], # Single core
[[1, 1, 320, 384], [1, 1, 320, 384], [1, 1, 320, 384]], # Multi core
[[1, 3, 320, 384], [1, 3, 320, 384], [1, 3, 320, 384]], # Multi core
]


@pytest.mark.parametrize(
"input_shapes",
shapes_w_output,
)
@pytest.mark.parametrize("input_mem_config", input_mem_cfgs)
@pytest.mark.parametrize("fn_kind", ["addalpha"])
@pytest.mark.parametrize("pass_qid", [True, False])
def test_run_addalpha_optional_output(
input_shapes,
fn_kind,
input_mem_config,
device,
pass_qid,
function_level_defaults,
):
datagen_func = [
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-100, high=100), torch.float32)
] * len(input_shapes)
datagen_func.append(
generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=-80, high=80), torch.bfloat16)
)
test_args = list(generation_funcs.gen_default_dtype_layout_device(input_shapes))[0]
test_args.update(
{
"input_mem_config": [input_mem_config, input_mem_config, input_mem_config],
"alpha": np.random.randint(1, 100),
"queue_id": pass_qid,
}
)
comparison_func = comparison_funcs.comp_pcc
run_single_pytorch_test(
f"eltwise-{fn_kind}-optional",
input_shapes,
datagen_func,
comparison_func,
device,
test_args,
)
27 changes: 27 additions & 0 deletions tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,33 @@ def eltwise_addalpha(
return tt2torch_tensor(t2)


@setup_host_and_device
def eltwise_addalpha_optional(
x,
y,
z,
*args,
alpha,
device,
dtype,
layout,
queue_id,
input_mem_config,
**kwargs,
):
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1])
t2 = setup_tt_tensor(z, device, layout[2], input_mem_config[2], dtype[2])
cq_id = 0

if queue_id:
ttl.tensor.addalpha(cq_id, t0, t1, alpha, output_tensor=t2)
else:
ttl.tensor.addalpha(t0, t1, alpha, output_tensor=t2)

return tt2torch_tensor(t2)


@setup_host_and_device
def eltwise_div(
x,
Expand Down
52 changes: 45 additions & 7 deletions tt_eager/tt_dnn/op_library/composite/composite_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -701,12 +701,52 @@ Tensor subalpha(const Tensor& input_a, const Tensor& input_b, float alpha, const
}

// addalpha(input, other, alpha) = input + (alpha * other)
Tensor _addalpha(const Tensor& input_a, const Tensor& input_b, float alpha, const MemoryConfig& output_mem_config) {
Tensor result = add(mul_unary(input_b, alpha, output_mem_config), input_a, std::nullopt, output_mem_config);
return result;
Tensor _addalpha(
const Tensor& input_a,
const Tensor& input_b,
float alpha,
const MemoryConfig& output_mem_config,
std::optional<Tensor> output_tensor) {
if (output_tensor.has_value()) {
add(mul_unary(input_b, alpha, output_mem_config), input_a, std::nullopt, operation::DEFAULT_OUTPUT_MEMORY_CONFIG, std::nullopt, output_tensor.value());
return output_tensor.value();
}

return add(mul_unary(input_b, alpha, output_mem_config), input_a, std::nullopt, output_mem_config);
}
Tensor addalpha(
const Tensor& input_a,
const Tensor& input_b,
float alpha,
const MemoryConfig& output_mem_config,
std::optional<Tensor> output_tensor) {
return operation::decorate_as_composite(__func__, _addalpha)(
input_a, input_b, alpha, output_mem_config, output_tensor);
}
Tensor addalpha(const Tensor& input_a, const Tensor& input_b, float alpha, const MemoryConfig& output_mem_config) {
return operation::decorate_as_composite(__func__, _addalpha)(input_a, input_b, alpha, output_mem_config);

Tensor _addalpha_overload(
uint8_t cq_id,
const Tensor& input_a,
const Tensor& input_b,
float alpha,
const MemoryConfig& output_mem_config,
std::optional<Tensor> output_tensor) {
if (output_tensor.has_value()) {
add(mul_unary(input_b, alpha, output_mem_config), input_a, std::nullopt, operation::DEFAULT_OUTPUT_MEMORY_CONFIG, std::nullopt, output_tensor.value());
return output_tensor.value();
}

return add(mul_unary(input_b, alpha, output_mem_config), input_a, std::nullopt, output_mem_config);
}
Tensor addalpha(
uint8_t cq_id,
const Tensor& input_a,
const Tensor& input_b,
float alpha,
const MemoryConfig& output_mem_config,
std::optional<Tensor> output_tensor) {
return operation::decorate_as_composite(__func__, _addalpha_overload)(
cq_id, input_a, input_b, alpha, output_mem_config, output_tensor);
}

// repeat interleave supports repeats as 1 to inf, dim between 0 to 2
Expand Down Expand Up @@ -1731,7 +1771,6 @@ Tensor _argmax(const Tensor& input_t, int64_t _dim, bool all, const MemoryConfig
tindex = tt::numpy::index_height<bfloat16>(input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_mem_config);
max_tensor = bcast(max_tensor, max_val, BcastOpMath::ADD, BcastOpDim::H, output_mem_config);
}
tindex = tindex.to(input_a.device());
max_val.deallocate();
Tensor cmp_results = eq(input_a, max_tensor, std::nullopt, output_mem_config);
max_tensor.deallocate();
Expand Down Expand Up @@ -1768,7 +1807,6 @@ Tensor _argmax(const Tensor& input_t, int64_t _dim, bool all, const MemoryConfig
{
tindex = tt::numpy::index_batch<bfloat16>(input_shape, DataType::BFLOAT16, Layout::TILE, input_a.device(), output_mem_config);
}
tindex = tindex.to(input_a.device());
Tensor max_indices = mul(cmp_results, tindex, std::nullopt, output_mem_config);
cmp_results.deallocate();
Tensor midx = full_like(max_indices, size);
Expand Down
18 changes: 12 additions & 6 deletions tt_eager/tt_dnn/op_library/composite/composite_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,7 @@ Tensor selu(
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

Tensor celu(
const Tensor& x,
float alpha,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
const Tensor& x, float alpha, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

// Function Swish = same as SILU
// use transformation y = x * sigmoid( x ) by broadcast
Expand Down Expand Up @@ -230,14 +228,13 @@ Tensor logical_noti(
float immediate,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);

//prod
// prod
Tensor prod(
const Tensor& input_a,
bool all_dimensions = false,
int64_t dim = 0,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);


/*
Returns a new tensor with the signed angles in radians between vectors
Expand Down Expand Up @@ -267,7 +264,16 @@ Tensor addalpha(
const Tensor& input_a,
const Tensor& input_b,
float alpha,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG);
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
std::optional<Tensor> output_tensor = std::nullopt);

Tensor addalpha(
uint8_t cq_id,
const Tensor& input_a,
const Tensor& input_b,
float alpha,
const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
std::optional<Tensor> output_tensor = std::nullopt);

// repeat interleave
Tensor repeat_interleave(
Expand Down
Loading

0 comments on commit ffc099b

Please sign in to comment.