Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle nested list vector updates #108

Merged
merged 1 commit into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 74 additions & 57 deletions api/src/DuckDBVector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2709,27 +2709,33 @@ export class DuckDBEnum32Vector extends DuckDBVector<string> {
}

export class DuckDBListVector extends DuckDBVector<DuckDBListValue> {
private readonly parentList: DuckDBListVector | null;
private readonly listType: DuckDBListType;
private readonly entryData: BigUint64Array;
private readonly validity: DuckDBValidity;
private readonly vector: duckdb.Vector;
private childData: DuckDBVector;
private readonly itemOffset: number;
private readonly _itemCount: number;
private readonly itemCache: (DuckDBListValue | null | undefined)[];
constructor(
parentList: DuckDBListVector | null,
listType: DuckDBListType,
entryData: BigUint64Array,
validity: DuckDBValidity,
vector: duckdb.Vector,
childData: DuckDBVector,
itemOffset: number,
itemCount: number
) {
super();
this.parentList = parentList;
this.listType = listType;
this.entryData = entryData;
this.validity = validity;
this.vector = vector;
this.childData = childData;
this.itemOffset = itemOffset,
this._itemCount = itemCount;
this.itemCache = [];
}
Expand Down Expand Up @@ -2759,11 +2765,13 @@ export class DuckDBListVector extends DuckDBVector<DuckDBListValue> {
);

return new DuckDBListVector(
null,
listType,
entryData,
validity,
vector,
childData,
0,
itemCount
);
}
Expand Down Expand Up @@ -2796,78 +2804,86 @@ export class DuckDBListVector extends DuckDBVector<DuckDBListValue> {
return item;
}
public setItem(itemIndex: number, value: DuckDBListValue | null) {
// TODO: don't allow for non-root vectors

this.itemCache[itemIndex] = value;
this.validity.setItemValid(itemIndex, value != null);
if (this.parentList) {
this.parentList.setItem(this.itemOffset + itemIndex, value);
} else {
this.validity.setItemValid(itemIndex, value != null);
}
}
public flush() {
// TODO: don't allow for non-root vectors

// update entryData offset & lengths
// calculate new child vector size (sum of all item lengths)
let totalLength = 0;
for (let itemIndex = 0; itemIndex < this._itemCount; itemIndex++) {
const entryDataStartIndex = itemIndex * 2;
this.entryData[entryDataStartIndex] = BigInt(totalLength);
// ensure the cache is populated for all items
const item = this.getItem(itemIndex);
if (item) {
this.entryData[entryDataStartIndex + 1] = BigInt(item.items.length);
totalLength += item.items.length;
} else {
this.entryData[entryDataStartIndex + 1] = 0n;
if (this.parentList) {
this.parentList.flush();
for (let i = 0; i < this.itemCount; i++) {
this.itemCache[i] = undefined;
}
} else {
// update entryData offset & lengths
// calculate new child vector size (sum of all item lengths)
let totalLength = 0;
for (let itemIndex = 0; itemIndex < this._itemCount; itemIndex++) {
const entryDataStartIndex = itemIndex * 2;
this.entryData[entryDataStartIndex] = BigInt(totalLength);
// ensure the cache is populated for all items
const item = this.getItem(itemIndex);
if (item) {
this.entryData[entryDataStartIndex + 1] = BigInt(item.items.length);
totalLength += item.items.length;
} else {
this.entryData[entryDataStartIndex + 1] = 0n;
}
}
}

// set new child vector size
duckdb.list_vector_set_size(this.vector, totalLength);
// set new child vector size
duckdb.list_vector_set_size(this.vector, totalLength);

// recreate childData after resize
const child_vector = duckdb.list_vector_get_child(this.vector);
const child_vector_size = duckdb.list_vector_get_size(this.vector);
this.childData = DuckDBVector.create(
child_vector,
child_vector_size,
this.listType.valueType
);

// set all childData items
let childItemAbsoluteIndex = 0;
for (let listIndex = 0; listIndex < this._itemCount; listIndex++) {
const list = this.getItem(listIndex);
if (list) {
for (
let childItemRelativeIndex = 0;
childItemRelativeIndex < list.items.length;
childItemRelativeIndex++
) {
this.childData.setItem(
childItemAbsoluteIndex++,
list.items[childItemRelativeIndex]
);
// recreate childData after resize
const child_vector = duckdb.list_vector_get_child(this.vector);
const child_vector_size = duckdb.list_vector_get_size(this.vector);
this.childData = DuckDBVector.create(
child_vector,
child_vector_size,
this.listType.valueType
);

// set all childData items
let childItemAbsoluteIndex = 0;
for (let listIndex = 0; listIndex < this._itemCount; listIndex++) {
const list = this.getItem(listIndex);
if (list) {
for (
let childItemRelativeIndex = 0;
childItemRelativeIndex < list.items.length;
childItemRelativeIndex++
) {
this.childData.setItem(
childItemAbsoluteIndex++,
list.items[childItemRelativeIndex]
);
}
}
}
}

// copy childData to child vector
this.childData.flush();
// copy childData to child vector
this.childData.flush();

// copy entryData to vector
duckdb.copy_data_to_vector(
this.vector,
0,
this.entryData.buffer as ArrayBuffer,
this.entryData.byteOffset,
this.entryData.byteLength
);
// copy entryData to vector
duckdb.copy_data_to_vector(
this.vector,
0,
this.entryData.buffer as ArrayBuffer,
this.entryData.byteOffset,
this.entryData.byteLength
);

// flush validity
this.validity.flush(this.vector);
// flush validity
this.validity.flush(this.vector);
}
}
public override slice(offset: number, length: number): DuckDBListVector {
const entryDataStartIndex = offset * 2;
return new DuckDBListVector(
this,
this.listType,
this.entryData.slice(
entryDataStartIndex,
Expand All @@ -2876,6 +2892,7 @@ export class DuckDBListVector extends DuckDBVector<DuckDBListValue> {
this.validity.slice(offset, length),
this.vector,
this.childData,
offset,
length
);
}
Expand Down
57 changes: 57 additions & 0 deletions api/test/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,17 @@ describe('api', () => {
assert.equal(vector.getItem(1), 12345);
assert.equal(vector.getItem(2), null);
});
test('write list vector', () => {
const chunk = DuckDBDataChunk.create([LIST(INTEGER)], 3);
const vector = chunk.getColumnVector(0) as DuckDBListVector;
assert.equal(vector.itemCount, 3);
vector.setItem(0, listValue([10, 11, 12]));
vector.setItem(1, listValue([20, 21, 22]));
vector.setItem(2, null);
assert.deepEqual(vector.getItem(0), listValue([10, 11, 12]));
assert.deepEqual(vector.getItem(1), listValue([20, 21, 22]));
assert.equal(vector.getItem(2), null);
});
test('create and append data chunk', async () => {
await withConnection(async (connection) => {
const values = [42, 12345, null];
Expand Down Expand Up @@ -1280,6 +1291,52 @@ describe('api', () => {
}
});
});
test('create and append data chunk , modify nested list vector', async () => {
await withConnection(async (connection) => {
const originalValues = [
listValue([
listValue([110, 111]),
listValue([]),
listValue([130]),
]),
listValue([]),
listValue([
listValue([310, 311, 312]),
listValue([320, 321]),
listValue([330, 331, 332, 333]),
]),
];

const chunk = DuckDBDataChunk.create([LIST(LIST(INTEGER))], originalValues.length);
chunk.setColumnValues(0, originalValues);

const outerListVector = chunk.getColumnVector(0) as DuckDBListVector;
const innerListVector = outerListVector.getItemVector(2) as DuckDBListVector;
innerListVector.setItem(1, listValue([350, 351, 352, 353, 354]));
innerListVector.flush();

const modifiedValues = [...originalValues];
modifiedValues[2] = listValue([
listValue([310, 311, 312]),
listValue([350, 351, 352, 353, 354]),
listValue([330, 331, 332, 333]),
]);

await connection.run('create table target(col0 integer[][])');
const appender = await connection.createAppender('main', 'target');
appender.appendDataChunk(chunk);
appender.flush();

const result = await connection.run('from target');
const resultChunk = await result.fetchChunk();
assert.isDefined(resultChunk);
if (resultChunk) {
assert.equal(resultChunk.columnCount, 1);
assert.equal(resultChunk.rowCount, modifiedValues.length);
assertValues(resultChunk, 0, DuckDBListVector, modifiedValues);
}
});
});
test('create and append data chunk with arrays of integers', async () => {
await withConnection(async (connection) => {
const values = [
Expand Down
Loading