Skip to content

Commit

Permalink
adding all mmap array view
Browse files Browse the repository at this point in the history
  • Loading branch information
MrPresent-Han committed Jan 8, 2025
1 parent 4d80704 commit dea6494
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 27 deletions.
8 changes: 0 additions & 8 deletions internal/core/src/common/Chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,4 @@ ArrayChunk::ConstructViews() {
}
}

SpanBase
ArrayChunk::Span() const {
return SpanBase(views_.data(),
nullable_ ? valid_.data() : nullptr,
views_.size(),
sizeof(ArrayView));
}

} // namespace milvus
28 changes: 20 additions & 8 deletions internal/core/src/common/Chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,16 +202,29 @@ class ArrayChunk : public Chunk {
: Chunk(row_nums, data, size, nullable), element_type_(element_type) {
auto null_bitmap_bytes_num = (row_nums + 7) / 8;
offsets_lens_ =
reinterpret_cast<uint64_t*>(data + null_bitmap_bytes_num);
reinterpret_cast<uint32_t*>(data + null_bitmap_bytes_num);
ConstructViews();
}

SpanBase
Span() const;

ArrayView
View(int64_t idx) const {
return views_[idx];
View(int idx) const {
int idx_off = 2 * idx;
auto offset = offsets_lens_[idx_off];
auto len = offsets_lens_[idx_off + 1];
auto next_offset = offsets_lens_[idx_off + 2];
auto data_ptr = data_ + offset;
uint32_t offsets_bytes_len = 0;
uint32_t* offsets_ptr = nullptr;
if (IsStringDataType(element_type_)) {
offsets_bytes_len = len * sizeof(uint32_t);
offsets_ptr = reinterpret_cast<uint32_t*>(data_ptr);
}

return ArrayView(data_ptr + offsets_bytes_len,
len,
next_offset - offset - offsets_bytes_len,
element_type_,
offsets_ptr);
}

void
Expand All @@ -225,8 +238,7 @@ class ArrayChunk : public Chunk {

private:
milvus::DataType element_type_;
uint64_t* offsets_lens_;
std::vector<ArrayView> views_;
uint32_t* offsets_lens_;
};

class SparseFloatVectorChunk : public Chunk {
Expand Down
20 changes: 19 additions & 1 deletion internal/core/src/mmap/ChunkedColumn.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,25 @@ class ChunkedColumnBase : public ColumnBase {
"StringViews only supported for VariableColumn");
}

virtual std::pair<std::vector<ArrayView>, FixedVector<bool>>
ArrayViews(int64_t chunk_id) const {
PanicInfo(ErrorCode::Unsupported,
"ArrayViews only supported for ArrayChunkedColumn");
}

virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
ViewsByOffsets(int64_t chunk_id,
const FixedVector<int32_t>& offsets) const {
PanicInfo(ErrorCode::Unsupported,
"viewsbyoffsets only supported for VariableColumn");
}

virtual std::pair<std::vector<ArrayView>, FixedVector<bool>>
chunk_array_view_impl(int64_t chunk_id) const {
PanicInfo(ErrorCode::Unsupported,
"StringViews only supported for VariableColumn");
}

std::pair<size_t, size_t>
GetChunkIDByOffset(int64_t offset) const {
AssertInfo(offset < num_rows_,
Expand Down Expand Up @@ -406,7 +418,13 @@ class ChunkedArrayColumn : public ChunkedColumnBase {

SpanBase
Span(int64_t chunk_id) const override {
return std::dynamic_pointer_cast<ArrayChunk>(chunks_[chunk_id])->Span();
PanicInfo(ErrorCode::NotImplemented,
"span() interface is not implemented for arr chunk column");
}

std::pair<std::vector<ArrayView>, FixedVector<bool>>
chunk_array_view_impl(int64_t chunk_id) const {
return std::dynamic_pointer_cast<ArrayChunk>(chunks_[chunk_id])->
}

ArrayView
Expand Down
16 changes: 14 additions & 2 deletions internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -771,12 +771,24 @@ ChunkedSegmentSealedImpl::chunk_data_impl(FieldId field_id,
return field_data->Span(chunk_id);
}
auto field_data = insert_record_.get_data_base(field_id);
AssertInfo(field_data->num_chunk() == 1,
"num chunk not equal to 1 for sealed segment");
// system field
return field_data->get_span_base(0);
}

std::pair<std::vector<ArrayView>, FixedVector<bool>>
ChunkedSegmentSealedImpl::chunk_array_view_impl(FieldId field_id,
int64_t chunk_id) const {
std::shared_lock lck(mutex_);
AssertInfo(get_bit(field_data_ready_bitset_, field_id),
"Can't get bitset element at " + std::to_string(field_id.get()));
if (auto it = fields_.find(field_id); it != fields_.end()) {
auto& field_data = it->second;
return field_data->StringViews(chunk_id);
}
PanicInfo(ErrorCode::UnexpectedError,
"chunk_view_impl only used for variable column field ");
}

std::pair<std::vector<std::string_view>, FixedVector<bool>>
ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id,
int64_t chunk_id) const {
Expand Down
3 changes: 3 additions & 0 deletions internal/core/src/segcore/ChunkedSegmentSealedImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ class ChunkedSegmentSealedImpl : public SegmentSealed {
std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const override;

std::pair<std::vector<ArrayView>, FixedVector<bool>>
chunk_array_view_impl(FieldId field_id, int64_t chunk_id) const override;

std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_by_offsets(FieldId field_id,
int64_t chunk_id,
Expand Down
16 changes: 8 additions & 8 deletions internal/core/src/segcore/SegmentInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,17 +148,14 @@ class SegmentInternalInterface : public SegmentInterface {
template <typename ViewType>
std::pair<std::vector<ViewType>, FixedVector<bool>>
chunk_view(FieldId field_id, int64_t chunk_id) const {
auto [string_views, valid_data] = chunk_view_impl(field_id, chunk_id);

if constexpr (std::is_same_v<ViewType, std::string_view>) {
auto [string_views, valid_data] = chunk_view_impl(field_id, chunk_id);
return std::make_pair(std::move(string_views),
std::move(valid_data));
} else {
std::vector<ViewType> res;
res.reserve(string_views.size());
for (const auto& view : string_views) {
res.emplace_back(view);
}
return std::make_pair(res, valid_data);
} else if constexpr (std::is_same_v<ViewType, ArrayView>){
auto [array_views, valid_data] = chunk_array_view_impl(field_id, chunk_id);
return std::make_pair(array_views, valid_data);
}
}

Expand Down Expand Up @@ -428,6 +425,9 @@ class SegmentInternalInterface : public SegmentInterface {
virtual std::pair<std::vector<std::string_view>, FixedVector<bool>>
chunk_view_impl(FieldId field_id, int64_t chunk_id) const = 0;

virtual std::pair<std::vector<ArrayView>, FixedVector<bool>>
chunk_array_view_impl(FieldId field_id, int64_t chunk_id) const = 0;

// internal API: return buffer reference to field chunk data located from start_offset
virtual std::pair<BufferView, FixedVector<bool>>
get_chunk_buffer(FieldId field_id,
Expand Down

0 comments on commit dea6494

Please sign in to comment.