diff --git a/api/src/DuckDBVector.ts b/api/src/DuckDBVector.ts index f9279cb..86c96ba 100644 --- a/api/src/DuckDBVector.ts +++ b/api/src/DuckDBVector.ts @@ -2709,27 +2709,33 @@ export class DuckDBEnum32Vector extends DuckDBVector { } export class DuckDBListVector extends DuckDBVector { + private readonly parentList: DuckDBListVector | null; private readonly listType: DuckDBListType; private readonly entryData: BigUint64Array; private readonly validity: DuckDBValidity; private readonly vector: duckdb.Vector; private childData: DuckDBVector; + private readonly itemOffset: number; private readonly _itemCount: number; private readonly itemCache: (DuckDBListValue | null | undefined)[]; constructor( + parentList: DuckDBListVector | null, listType: DuckDBListType, entryData: BigUint64Array, validity: DuckDBValidity, vector: duckdb.Vector, childData: DuckDBVector, + itemOffset: number, itemCount: number ) { super(); + this.parentList = parentList; this.listType = listType; this.entryData = entryData; this.validity = validity; this.vector = vector; this.childData = childData; + this.itemOffset = itemOffset, this._itemCount = itemCount; this.itemCache = []; } @@ -2759,11 +2765,13 @@ export class DuckDBListVector extends DuckDBVector { ); return new DuckDBListVector( + null, listType, entryData, validity, vector, childData, + 0, itemCount ); } @@ -2796,78 +2804,86 @@ export class DuckDBListVector extends DuckDBVector { return item; } public setItem(itemIndex: number, value: DuckDBListValue | null) { - // TODO: don't allow for non-root vectors - this.itemCache[itemIndex] = value; - this.validity.setItemValid(itemIndex, value != null); + if (this.parentList) { + this.parentList.setItem(this.itemOffset + itemIndex, value); + } else { + this.validity.setItemValid(itemIndex, value != null); + } } public flush() { - // TODO: don't allow for non-root vectors - - // update entryData offset & lengths - // calculate new child vector size (sum of all item lengths) - let totalLength = 0; - for (let itemIndex = 0; itemIndex < this._itemCount; itemIndex++) { - const entryDataStartIndex = itemIndex * 2; - this.entryData[entryDataStartIndex] = BigInt(totalLength); - // ensure the cache is populated for all items - const item = this.getItem(itemIndex); - if (item) { - this.entryData[entryDataStartIndex + 1] = BigInt(item.items.length); - totalLength += item.items.length; - } else { - this.entryData[entryDataStartIndex + 1] = 0n; + if (this.parentList) { + this.parentList.flush(); + for (let i = 0; i < this.itemCount; i++) { + this.itemCache[i] = undefined; + } + } else { + // update entryData offset & lengths + // calculate new child vector size (sum of all item lengths) + let totalLength = 0; + for (let itemIndex = 0; itemIndex < this._itemCount; itemIndex++) { + const entryDataStartIndex = itemIndex * 2; + this.entryData[entryDataStartIndex] = BigInt(totalLength); + // ensure the cache is populated for all items + const item = this.getItem(itemIndex); + if (item) { + this.entryData[entryDataStartIndex + 1] = BigInt(item.items.length); + totalLength += item.items.length; + } else { + this.entryData[entryDataStartIndex + 1] = 0n; + } } - } - // set new child vector size - duckdb.list_vector_set_size(this.vector, totalLength); + // set new child vector size + duckdb.list_vector_set_size(this.vector, totalLength); - // recreate childData after resize - const child_vector = duckdb.list_vector_get_child(this.vector); - const child_vector_size = duckdb.list_vector_get_size(this.vector); - this.childData = DuckDBVector.create( - child_vector, - child_vector_size, - this.listType.valueType - ); - - // set all childData items - let childItemAbsoluteIndex = 0; - for (let listIndex = 0; listIndex < this._itemCount; listIndex++) { - const list = this.getItem(listIndex); - if (list) { - for ( - let childItemRelativeIndex = 0; - childItemRelativeIndex < list.items.length; - childItemRelativeIndex++ - ) { - this.childData.setItem( - childItemAbsoluteIndex++, - list.items[childItemRelativeIndex] - ); + // recreate childData after resize + const child_vector = duckdb.list_vector_get_child(this.vector); + const child_vector_size = duckdb.list_vector_get_size(this.vector); + this.childData = DuckDBVector.create( + child_vector, + child_vector_size, + this.listType.valueType + ); + + // set all childData items + let childItemAbsoluteIndex = 0; + for (let listIndex = 0; listIndex < this._itemCount; listIndex++) { + const list = this.getItem(listIndex); + if (list) { + for ( + let childItemRelativeIndex = 0; + childItemRelativeIndex < list.items.length; + childItemRelativeIndex++ + ) { + this.childData.setItem( + childItemAbsoluteIndex++, + list.items[childItemRelativeIndex] + ); + } } } - } - // copy childData to child vector - this.childData.flush(); + // copy childData to child vector + this.childData.flush(); - // copy entryData to vector - duckdb.copy_data_to_vector( - this.vector, - 0, - this.entryData.buffer as ArrayBuffer, - this.entryData.byteOffset, - this.entryData.byteLength - ); + // copy entryData to vector + duckdb.copy_data_to_vector( + this.vector, + 0, + this.entryData.buffer as ArrayBuffer, + this.entryData.byteOffset, + this.entryData.byteLength + ); - // flush validity - this.validity.flush(this.vector); + // flush validity + this.validity.flush(this.vector); + } } public override slice(offset: number, length: number): DuckDBListVector { const entryDataStartIndex = offset * 2; return new DuckDBListVector( + this, this.listType, this.entryData.slice( entryDataStartIndex, @@ -2876,6 +2892,7 @@ export class DuckDBListVector extends DuckDBVector { this.validity.slice(offset, length), this.vector, this.childData, + offset, length ); } diff --git a/api/test/api.test.ts b/api/test/api.test.ts index 4e8a7ba..b128c49 100644 --- a/api/test/api.test.ts +++ b/api/test/api.test.ts @@ -1122,6 +1122,17 @@ describe('api', () => { assert.equal(vector.getItem(1), 12345); assert.equal(vector.getItem(2), null); }); + test('write list vector', () => { + const chunk = DuckDBDataChunk.create([LIST(INTEGER)], 3); + const vector = chunk.getColumnVector(0) as DuckDBListVector; + assert.equal(vector.itemCount, 3); + vector.setItem(0, listValue([10, 11, 12])); + vector.setItem(1, listValue([20, 21, 22])); + vector.setItem(2, null); + assert.deepEqual(vector.getItem(0), listValue([10, 11, 12])); + assert.deepEqual(vector.getItem(1), listValue([20, 21, 22])); + assert.equal(vector.getItem(2), null); + }); test('create and append data chunk', async () => { await withConnection(async (connection) => { const values = [42, 12345, null]; @@ -1280,6 +1291,52 @@ describe('api', () => { } }); }); + test('create and append data chunk , modify nested list vector', async () => { + await withConnection(async (connection) => { + const originalValues = [ + listValue([ + listValue([110, 111]), + listValue([]), + listValue([130]), + ]), + listValue([]), + listValue([ + listValue([310, 311, 312]), + listValue([320, 321]), + listValue([330, 331, 332, 333]), + ]), + ]; + + const chunk = DuckDBDataChunk.create([LIST(LIST(INTEGER))], originalValues.length); + chunk.setColumnValues(0, originalValues); + + const outerListVector = chunk.getColumnVector(0) as DuckDBListVector; + const innerListVector = outerListVector.getItemVector(2) as DuckDBListVector; + innerListVector.setItem(1, listValue([350, 351, 352, 353, 354])); + innerListVector.flush(); + + const modifiedValues = [...originalValues]; + modifiedValues[2] = listValue([ + listValue([310, 311, 312]), + listValue([350, 351, 352, 353, 354]), + listValue([330, 331, 332, 333]), + ]); + + await connection.run('create table target(col0 integer[][])'); + const appender = await connection.createAppender('main', 'target'); + appender.appendDataChunk(chunk); + appender.flush(); + + const result = await connection.run('from target'); + const resultChunk = await result.fetchChunk(); + assert.isDefined(resultChunk); + if (resultChunk) { + assert.equal(resultChunk.columnCount, 1); + assert.equal(resultChunk.rowCount, modifiedValues.length); + assertValues(resultChunk, 0, DuckDBListVector, modifiedValues); + } + }); + }); test('create and append data chunk with arrays of integers', async () => { await withConnection(async (connection) => { const values = [