Skip to content

Commit

Permalink
Expand circle schema to represent chunk quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
hseok-oh committed Aug 27, 2024
1 parent b751d11 commit b23a54b
Show file tree
Hide file tree
Showing 8 changed files with 330 additions and 10 deletions.
7 changes: 7 additions & 0 deletions compiler/circle-quantizer/src/QuantizeWeightsLLM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ luci::CircleConst *quantize_q8_block(luci::CircleConst *node)

// Set real data type
new_weights->dtype(loco::DataType::S8);
auto quantparam = std::make_unique<luci::CircleQuantParam>();
quantparam->chunk_data_size = 8;
new_weights->quantparam(std::move(quantparam));

return new_weights;
}

Expand Down Expand Up @@ -107,6 +111,9 @@ luci::CircleConst *quantize_q4_block(luci::CircleConst *node)

// Set real data type
new_weights->dtype(loco::DataType::U4);
auto quantparam = std::make_unique<luci::CircleQuantParam>();
quantparam->chunk_data_size = 4;
new_weights->quantparam(std::move(quantparam));

return new_weights;
}
Expand Down
17 changes: 12 additions & 5 deletions compiler/luci/export/src/CircleTensorExporter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, l
case loco::DataType::S4:
return encodeOpBufferPack4bit<loco::DataType::S4>(builder, c);
case loco::DataType::S8:
if (c->quantparam() == nullptr)
if (c->quantparam() != nullptr && c->quantparam()->chunk_data_size > 0)
return encodeOpBufferBlocked<loco::DataType::S8>(builder, c);
return encodeOpBufferByDType<loco::DataType::S8>(builder, c);
case loco::DataType::S16:
Expand All @@ -477,7 +477,7 @@ flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, l
case loco::DataType::S64:
return encodeOpBufferByDType<loco::DataType::S64>(builder, c);
case loco::DataType::U4:
if (c->quantparam() == nullptr)
if (c->quantparam() != nullptr && c->quantparam()->chunk_data_size > 0)
return encodeOpBufferBlocked<loco::DataType::U4>(builder, c);
return encodeOpBufferPack4bit<loco::DataType::U4>(builder, c);
case loco::DataType::U8:
Expand Down Expand Up @@ -505,6 +505,14 @@ encodeQuantizationParameters(FlatBufferBuilder &builder, luci::CircleQuantParam
if (quantparam == nullptr)
return 0;

::flatbuffers::Offset<void> details = 0;
circle::QuantizationDetails details_type = circle::QuantizationDetails_NONE;
if (quantparam->chunk_data_size > 0)
{
details = circle::CreateCircleChunkQuantization(builder, 0, 32, false, false).Union();
details_type = circle::QuantizationDetails_CircleChunkQuantization;
}

flatbuffers::Offset<flatbuffers::Vector<float>> min;
flatbuffers::Offset<flatbuffers::Vector<float>> max;
flatbuffers::Offset<flatbuffers::Vector<float>> scale;
Expand All @@ -520,9 +528,8 @@ encodeQuantizationParameters(FlatBufferBuilder &builder, luci::CircleQuantParam
zero_point = builder.CreateVector(quantparam->zerop);
}
// Note: QuantizationDetails is not supported
return circle::CreateQuantizationParameters(builder, min, max, scale, zero_point,
circle::QuantizationDetails::QuantizationDetails_NONE,
0, quantparam->quantized_dimension);
return circle::CreateQuantizationParameters(builder, min, max, scale, zero_point, details_type,
details, quantparam->quantized_dimension);
}

flatbuffers::Offset<circle::SparsityParameters>
Expand Down
35 changes: 35 additions & 0 deletions compiler/luci/lang/include/luci/IR/CircleChunkParam.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __LUCI_IR_CHUNKPARAM_H__
#define __LUCI_IR_CHUNKPARAM_H__

#include <string>

namespace luci
{

struct CircleChunkParam
{
std::string name;
int32_t size;
bool has_min_or_sum;
bool is_super;
};

} // namespace luci

#endif // __LUCI_IR_CHUNKPARAM_H__
8 changes: 8 additions & 0 deletions compiler/luci/lang/include/luci/IR/CircleNodeDecl.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "CircleNodeVisitor.forward.h"
#include "CircleQuantParam.h"
#include "SparsityParam.h"
#include "CircleChunkParam.h"

#include <memory>

Expand Down Expand Up @@ -61,6 +62,12 @@ struct CircleNode : public loco::Node,
_sparsityparam = std::move(sparsityparam);
}

CircleChunkParam *chunkparam(void) const { return _chunkparam.get(); }
void chunkparam(std::unique_ptr<CircleChunkParam> &&chunkparam)
{
_chunkparam = std::move(chunkparam);
}

ShapeStatus shape_status(void) const { return _shape_status; }
void shape_status(ShapeStatus ss) { _shape_status = ss; }

Expand All @@ -71,6 +78,7 @@ struct CircleNode : public loco::Node,
NodeName _name;
std::unique_ptr<CircleQuantParam> _quantparam;
std::unique_ptr<SparsityParam> _sparsityparam;
std::unique_ptr<CircleChunkParam> _chunkparam;
ShapeStatus _shape_status{ShapeStatus::UNDEFINED};
int32_t _op_version = 1;
};
Expand Down
1 change: 1 addition & 0 deletions compiler/luci/lang/include/luci/IR/CircleQuantParam.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct CircleQuantParam
std::vector<float> scale;
std::vector<int64_t> zerop;
int32_t quantized_dimension{0};
uint32_t chunk_data_size{0}; // workaround to represent chunk quant type
};

struct CircleNode;
Expand Down
13 changes: 13 additions & 0 deletions nnpackage/schema/circle_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,22 @@ table CustomQuantization {
custom:[ubyte] (force_align: 16);
}

// Chunk(block) quantization
table CircleChunkQuantization {
// for debugging, optional
name:string;
// chunk size
size:uint;
// min on 4,5 bit. sum on 8bit
has_min_or_sum:bool;
// super-block quantization
is_super:bool;
}

// Represents a specific quantization technique's parameters.
union QuantizationDetails {
CustomQuantization,
CircleChunkQuantization
}

// Parameters for converting a quantized tensor back to float.
Expand Down
13 changes: 13 additions & 0 deletions res/CircleSchema/0.8/circle_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,22 @@ table CustomQuantization {
custom:[ubyte] (force_align: 16);
}

// Chunk(block) quantization
table CircleChunkQuantization {
// for debugging, optional
name:string;
// chunk size
size:uint;
// min on 4,5 bit. sum on 8bit
has_min_or_sum:bool;
// super-block quantization
is_super:bool;
}

// Represents a specific quantization technique's parameters.
union QuantizationDetails {
CustomQuantization,
CircleChunkQuantization
}

// Parameters for converting a quantized tensor back to float.
Expand Down
Loading

0 comments on commit b23a54b

Please sign in to comment.