Skip to content

Commit

Permalink
dbg: temp
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <ydrml@hotmail.com>
  • Loading branch information
YdrMaster committed Dec 19, 2023
1 parent cd02cf0 commit 60b8259
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 12 deletions.
18 changes: 13 additions & 5 deletions scripts/compare/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def parse_args():
args.actual,
)


def getDiff(base, test):
absolute_diff = np.subtract(base, test)
max_absolute_diff = np.max(np.abs(absolute_diff))
Expand All @@ -35,16 +36,19 @@ def getDiff(base, test):

return max_absolute_diff, max_relative_diff

def compare_npy(actual_path, expect_path, edge, node):

def compare_npy(meta_file, actual_path, expect_path, edge, node):
actual = np.load(actual_path)
expect = np.load(expect_path)
if np.isnan(actual).any():
print(f"NAN value in node:{node} edge:{edge}")
return

max_absolute_diff, max_relative_diff = getDiff(expect, actual)
if max_absolute_diff != 0.0: ## No need to print tensor with no diff
print(f'{max_absolute_diff}\t{max_relative_diff}\t{node}\t{edge}')
if max_absolute_diff != 0.0: ## No need to print tensor with no diff
print(
f"{max_absolute_diff}\t{max_relative_diff}\t{meta_file}\t{actual_path}\t{expect_path}"
)


def main():
Expand All @@ -71,7 +75,11 @@ def main():
expect_file_path = os.path.join(expect_dir, expect_file)
if os.path.exists(expect_file_path):
compare_npy(
actual_file_path, expect_file_path, edge_name, node_name
meta_file,
actual_file_path,
expect_file_path,
edge_name,
node_name,
)


Expand Down
11 changes: 9 additions & 2 deletions src/04kernel/src/kernels/dynamic_quantize_linear/cuda_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,13 @@ namespace refactor::kernel {

QuantizeMinMax<TI> *nullTyped = nullptr;
size_t tempStorageBytes = 0;
cub::DeviceReduce::Reduce(
auto e = cub::DeviceReduce::Reduce(
nullptr, tempStorageBytes,
nullTyped, nullTyped, 0,
QuantizeReduceMinMaxFunctor<TI>{},
QuantizeMinMax<TI>{});
fmt::println("error: {} {}", (int) e, cudaGetErrorString(e));
fmt::println("tempStorageBytes: {}", tempStorageBytes);

auto offset0 = workspaceSize;
workspaceSize += tempStorageBytes;
Expand All @@ -117,12 +119,17 @@ namespace refactor::kernel {
QuantizeMapMinMaxFunctor<TI>{});

auto tempStorageSize_ = tempStorageBytes;
cub::DeviceReduce::Reduce(
fmt::println("tempStorage: {}, tempStorageSize: {}, doubled: {}, minmax: {}, params.n: {}",
(void *) tempStorage, tempStorageSize_, (void *) doubled, (void *) minmax, params.n);

auto e = cub::DeviceReduce::Reduce(
tempStorage, tempStorageSize_,
doubled, minmax, params.n,
QuantizeReduceMinMaxFunctor<TI>{},
QuantizeMinMax<TI>{_MAX, _MIN});

fmt::println("error: {} {}", (int) e, cudaGetErrorString(e));

kernel<<<params.gridSize, params.blockSize>>>(
params.n, minmax, x, y, scale, zp);
};
Expand Down
15 changes: 10 additions & 5 deletions src/04kernel/test/kernels/dynamic_quantize_linear/test_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ using namespace kernel;
using namespace hardware;

TEST(kernel, DynamicQuantizeLinearCuda) {
auto size = 20;
auto size = 4096;
// build routine
auto kernel = DynamicQuantizeLinearCuda::build(size),
kCpu = DynamicQuantizeLinearCpu::build(size);
Expand All @@ -26,13 +26,12 @@ TEST(kernel, DynamicQuantizeLinearCuda) {
zpGpu = dev.malloc(sizeof(uint8_t)),
workspace = dev.malloc(workspaceSize);
// put input data
std::vector<float> x(size);
std::vector<float> x(size, 1);
std::vector<uint8_t> y(size);
float scale;
uint8_t zeroPoint;
for (auto i : range0_(size)) {
x[i] = i * 3 + 15;
}
x[0] = 1.7181609;
x[1] = 0.00011457229;
xGpu->copyFromHost(x.data());
// inference
{
Expand All @@ -50,6 +49,12 @@ TEST(kernel, DynamicQuantizeLinearCuda) {
std::vector<uint8_t> result(size);
yGpu->copyToHost(result.data());
EXPECT_EQ(result, y);
float scale_;
scaleGpu->copyToHost(&scale_);
EXPECT_EQ(scale_, scale);
uint8_t zp_;
zpGpu->copyToHost(&zp_);
EXPECT_EQ(zp_, zeroPoint);
}
}

Expand Down

0 comments on commit 60b8259

Please sign in to comment.