From e3580d7ea20e6640c9595cc148710b19799399d5 Mon Sep 17 00:00:00 2001 From: Benjamin Schmidt Date: Thu, 10 Oct 2024 21:38:32 -0400 Subject: [PATCH] allow subfield access (#156) This allows using struct columns in Arrow through an optional 'subfield' column. 'field' remains the unit of laziness, but now nested columns can hold multiple data, which is useful for expensive transformations that return data about multiple numbers (especially x and y at once, like geoprojections.) I've included a test in SwitchPositions.svelte that demonstrates working. ---- > [!IMPORTANT] > Add subfield access for struct columns in Arrow, enhancing nested data handling in Deepscatter with updates across behavior, aesthetics, rendering, and types. > > - **Behavior**: > - Allow subfield access in struct columns in Arrow using an optional `subfield` parameter. > - Demonstrated with a test in `SwitchPositions.svelte`. > - **Deeptable**: > - Updated `get_column()` to support subfields in `Deeptable.ts`. > - Modified `domain()` to handle subfields for extent calculations. > - **Aesthetics**: > - Added `subfield` handling in `Aesthetic.ts` and `AestheticSet.ts`. > - Updated `StatefulAesthetic` to track needed fields with subfields. > - **Rendering**: > - Adjusted buffer management in `regl_rendering.ts` to accommodate subfields. > - Updated `BufferManager` to handle nested vectors with subfields. > - **Types**: > - Extended `ChannelType` and related types to include `subfield` property in `types.ts`. > > This description was created by [Ellipsis](https://www.ellipsis.dev?ref=nomic-ai%2Fdeepscatter&utm_source=github&utm_medium=referral) for 181fa0a55d6d32f52f6e65ccb7e77a2c9fd85191. It will automatically update as commits are pushed. --- dev/svelte/SwitchPositions.svelte | 87 ++++++++++++++++++--- src/Deeptable.ts | 44 ++++++++--- src/aesthetics/Aesthetic.ts | 44 +++++++++-- src/aesthetics/AestheticSet.ts | 18 +++++ src/aesthetics/ScaledAesthetic.ts | 2 +- src/aesthetics/StatefulAesthetic.ts | 8 +- src/regl_rendering.ts | 114 ++++++++++++++++------------ src/rendering.ts | 15 ---- src/scatterplot.ts | 71 +++++++++-------- src/selection.ts | 17 ++++- src/tile.ts | 42 +++++++--- src/tixrixqid.ts | 18 +++-- src/types.ts | 15 +++- 13 files changed, 346 insertions(+), 149 deletions(-) diff --git a/dev/svelte/SwitchPositions.svelte b/dev/svelte/SwitchPositions.svelte index 22d1431e0..a5db8d477 100644 --- a/dev/svelte/SwitchPositions.svelte +++ b/dev/svelte/SwitchPositions.svelte @@ -1,23 +1,88 @@ - \ No newline at end of file + diff --git a/src/Deeptable.ts b/src/Deeptable.ts index 9d07536d5..4d7134a22 100644 --- a/src/Deeptable.ts +++ b/src/Deeptable.ts @@ -24,6 +24,7 @@ import { Int32, Int8, tableToIPC, + Struct, } from 'apache-arrow'; import { Scatterplot } from './scatterplot'; import { wrapArrowTable } from './wrap_arrow'; @@ -34,6 +35,8 @@ import type { IdSelectParams, } from './selection'; import { DataSelection } from './selection'; +import { Some, TupleMap } from './utilityFunctions'; +import { getNestedVector } from './regl_rendering'; type TransformationStatus = 'queued' | 'in progress' | 'complete' | 'failed'; @@ -70,7 +73,8 @@ export class Deeptable { ...defaultTransformations, }; public _plot: Scatterplot | null; - private extents: Record = {}; + private extents: TupleMap = + new TupleMap(); // A 3d identifier for the tile. Usually [z, x, y] private _extent?: Rectangle; public _ix_seed = 0; @@ -134,6 +138,9 @@ export class Deeptable { this.root_tile = new Tile(defaultManifest, null, this); const preProcessRootTile = this.root_tile.preprocessRootTileInfo(); + // At instantiation, the deeptable isn't ready; only once this + // async stuff is done can the deeptable be used. + // TODO: Add an async static method as the preferred initialization method. this.promise = preProcessRootTile.then(async () => { const batch = await this.root_tile.get_arrow(null); const schema = batch.schema; @@ -346,13 +353,24 @@ export class Deeptable { domain( columnName: string, + subfield?: string[], ): [T[1], T[1]] { - if (this.extents[columnName]) { - return this.extents[columnName]; + const key = [columnName, ...(subfield || [])] as Some; + if (this.extents.get(key)) { + return this.extents.get(key); + } + + // First -- look at the schema metadata. + let dim = this._schema?.fields.find((d) => d.name === columnName); + for (const sub in subfield) { + if (dim === undefined) { + continue; + } + console.log({ dim }); + dim = (dim as Field>).type.children.find( + (d) => d.name === sub, + ); } - const dim = this._schema?.fields.find( - (d) => d.name === columnName, - ) as Field; if (dim !== undefined) { let min: T[0] | undefined = undefined; let max: T[0] | undefined = undefined; @@ -375,24 +393,30 @@ export class Deeptable { 'Date field extents in metadata must be passed as strings', ); } - return (this.extents[columnName] = [new Date(min), new Date(max)]); + this.extents.set(key, [new Date(min), new Date(max)]); + return this.extents.get(key); } if (typeof max === 'string') { throw new Error('Failed to parse min-max as numbers'); } if (min !== undefined) { - return (this.extents[columnName] = [min as T[1], max as T[1]] as + this.extents.set(key, [min as T[1], max as T[1]] as | [number, number] | [Date, Date]); + return this.extents.get(key); } } + const vectors: Vector[] = this.map((tile) => tile) .filter((d) => d.hasLoadedColumn(columnName)) - .map((d) => d.record_batch.getChild(columnName) as Vector); + .map((d) => getNestedVector(d, [columnName, ...(subfield || [])])); + const extented = extent([...new Vector(vectors)]) as [T[1], T[1]] as | [number, number] | [Date, Date]; - return (this.extents[columnName] = extented); + + this.extents.set(key, extented); + return this.extents.get(key); } *points(bbox: Rectangle | undefined, max_ix = 1e99) { diff --git a/src/aesthetics/Aesthetic.ts b/src/aesthetics/Aesthetic.ts index 9dd1ae15d..4c1ef3b49 100644 --- a/src/aesthetics/Aesthetic.ts +++ b/src/aesthetics/Aesthetic.ts @@ -1,10 +1,11 @@ import type { TextureSet } from './AestheticSet'; import { isConstantChannel } from '../typing'; -import { Type, Vector } from 'apache-arrow'; +import { Struct, Type, Vector } from 'apache-arrow'; import { StructRowProxy } from 'apache-arrow/row/struct'; import { isNumber } from 'lodash'; import type * as DS from '../types'; import { Scatterplot } from '../scatterplot'; +import { Some } from '../utilityFunctions'; /** * An Aesthetic bundles all operations in mapping from user dataspace to webGL based aesthetics. @@ -26,6 +27,7 @@ export abstract class Aesthetic< public abstract default_range: [Output['rangeType'], Output['rangeType']]; public scatterplot: Scatterplot; public field: string | null = null; + public subfield: string[] = []; public _texture_buffer: Float32Array | Uint8Array | null = null; protected abstract _func?: (d: Input['domainType']) => Output['rangeType']; public aesthetic_map: TextureSet; @@ -76,9 +78,25 @@ export abstract class Aesthetic< this.field = null; } else { this.field = encoding.field; + if (encoding.subfield) { + this.subfield = Array.isArray(encoding.subfield) + ? encoding.subfield + : [encoding.subfield]; + } } } + /** + * Returns the keys that are used to access the data in the record batch, + * including with any nesting. + */ + get columnKeys(): null | Some { + if (this.field === null) { + return null; + } + return [this.field, ...this.subfield] as Some; + } + get deeptable() { return this.scatterplot.deeptable; } @@ -100,10 +118,14 @@ export abstract class Aesthetic< value_for(point: Datum): Input['domainType'] | null { if (this.field && point[this.field]) { - return point[this.field] as Input['domainType']; + let v = point[this.field] as Input['domainType']; + for (let i = 0; i < this.subfield.length; i++) { + v = v[this.subfield[i]] as Input['domainType']; + } + return v; + // Needs a default perhaps? + return null; } - // Needs a default perhaps? - return null; } get map_position() { @@ -136,9 +158,17 @@ export abstract class Aesthetic< if (this.field === null || this.field === undefined) { return (this.column = null); } - return (this.column = this.deeptable.root_tile.record_batch.getChild( - this.field, - ) as Vector); + let output: Vector | Vector | null = null; + for (const f of [this.field, ...this.subfield]) { + if (output === null) { + output = this.deeptable.root_tile.record_batch.getChild(f) as Vector< + Input['arrowType'] + >; + } else { + output = (output as Vector).getChild(f) as Vector; + } + } + return (this.column = output as Vector); } is_dictionary(): boolean { diff --git a/src/aesthetics/AestheticSet.ts b/src/aesthetics/AestheticSet.ts index 04daf719d..20273a172 100644 --- a/src/aesthetics/AestheticSet.ts +++ b/src/aesthetics/AestheticSet.ts @@ -6,6 +6,7 @@ import type { Deeptable } from '../Deeptable'; import { StatefulAesthetic } from './StatefulAesthetic'; import type { Encoding } from '../types'; import type * as DS from '../types'; +import { TupleSet } from '../utilityFunctions'; type AesMap = { [K in keyof typeof dimensions]: StatefulAesthetic< @@ -83,6 +84,12 @@ export class AestheticSet { } } + _neededFields: TupleSet = new TupleSet(); + + get neededFields(): string[][] { + return [...this._neededFields.values()]; + } + apply_encoding(encoding: Encoding) { if ( encoding['jitter_radius'] && @@ -107,6 +114,17 @@ export class AestheticSet { this.dim(k).update(encoding[k] as DS.ChannelType | null); } + // Update the needed fields. + this._neededFields.clear(); + + for (const v of Object.values(this.store)) { + if (v instanceof StatefulAesthetic) { + for (const f of v.neededFields) { + this._neededFields.add(f); + } + } + } + // Apply settings that are not full-on aesthetics. for (const setting of ['jitter_method'] as const) { this.options[setting].last = this.options[setting].current; diff --git a/src/aesthetics/ScaledAesthetic.ts b/src/aesthetics/ScaledAesthetic.ts index 072d6ff68..af31072ec 100644 --- a/src/aesthetics/ScaledAesthetic.ts +++ b/src/aesthetics/ScaledAesthetic.ts @@ -203,7 +203,7 @@ export abstract class ScaledAesthetic< Input['domainType'], ]; } else { - return this.scatterplot.deeptable.domain(this.field); + return this.scatterplot.deeptable.domain(this.field, this.subfield); } } diff --git a/src/aesthetics/StatefulAesthetic.ts b/src/aesthetics/StatefulAesthetic.ts index c631acb32..c1138965c 100644 --- a/src/aesthetics/StatefulAesthetic.ts +++ b/src/aesthetics/StatefulAesthetic.ts @@ -55,6 +55,7 @@ export type ConcreteScaledAesthetic = import type { Deeptable } from '../Deeptable'; import type { Regl } from 'regl'; import type { TextureSet } from './AestheticSet'; +import { Some } from '../utilityFunctions'; export class StatefulAesthetic { /** @@ -97,11 +98,12 @@ export class StatefulAesthetic { ] as [T, T]; } - get neededFields(): string[] { - return [this.current.field, this.last.field].filter( + get neededFields(): Some[] { + return [this.current.columnKeys, this.last.columnKeys].filter( (f) => f !== null, - ) as string[]; + ); } + get current() { return this.states[0]; } diff --git a/src/regl_rendering.ts b/src/regl_rendering.ts index 965580d57..85c020818 100644 --- a/src/regl_rendering.ts +++ b/src/regl_rendering.ts @@ -32,6 +32,7 @@ import { Scatterplot } from './scatterplot'; import { Data, Dictionary, + Struct, StructRowProxy, Type, Utf8, @@ -41,7 +42,7 @@ import { Color } from './aesthetics/ColorAesthetic'; import { StatefulAesthetic } from './aesthetics/StatefulAesthetic'; import { Filter, Foreground } from './aesthetics/BooleanAesthetic'; import { ZoomTransform } from 'd3-zoom'; -import { TupleMap } from './utilityFunctions'; +import { Some, TupleMap, TupleSet } from './utilityFunctions'; // eslint-disable-next-line import/prefer-default-export export class ReglRenderer extends Renderer { public regl: Regl; @@ -65,6 +66,11 @@ export class ReglRenderer extends Renderer { public tick_num?: number; public reglframe?: REGL.Cancellable; public bufferManager: BufferManager; + + private aes_to_buffer_num?: Record; + private variable_to_buffer_num?: TupleMap; + private buffer_num_to_variable?: string[][]; + // public _renderer : Renderer; constructor( @@ -159,7 +165,7 @@ export class ReglRenderer extends Renderer { ] as [number, number]; const props: DS.GlobalDrawProps = { // Copy the aesthetic as a string. - aes: { encoding: this.aes.encoding }, + // aes: { encoding: this.aes.encoding }, colors_as_grid: 0, corners: this.zoom.current_corners(), zoom_balance: prefs.zoom_balance, @@ -224,12 +230,7 @@ export class ReglRenderer extends Renderer { props.background_draw_needed[0] || props.background_draw_needed[1]; for (const tile of this.visible_tiles()) { // Do the binding operation; returns truthy if it's already done. - if ( - !this.bufferManager.ready( - tile, - this.needeedFields.map((d) => [d]), - ) - ) { + if (!this.bufferManager.ready(tile, this.aes.neededFields)) { continue; } @@ -259,6 +260,7 @@ export class ReglRenderer extends Renderer { a.tile_id ); }); + // console.log({ prop_list }); this._renderer(prop_list); } @@ -282,7 +284,7 @@ export class ReglRenderer extends Renderer { this.zoom.current_corners(), this.props.max_ix, 5, - this.needeedFields, + this.aes.neededFields.map((x) => x[0]), 'high', ); } else { @@ -291,7 +293,7 @@ export class ReglRenderer extends Renderer { undefined, prefs.max_points, 5, - this.needeedFields, + this.aes.neededFields.map((x) => x[0]), 'high', ); } @@ -886,7 +888,7 @@ export class ReglRenderer extends Renderer { type BufferSummary = { aesthetic: keyof typeof dimensions; time: time; - field: string; + field: [string, ...string[]]; }; const buffers: BufferSummary[] = []; const priorities = [ @@ -910,7 +912,10 @@ export class ReglRenderer extends Renderer { buffers.push({ aesthetic, time, - field: this.aes.dim(aesthetic)[time].field, + field: [ + this.aes.dim(aesthetic)[time].field, + ...this.aes.dim(aesthetic)[time].subfield, + ], }); } } catch (error) { @@ -935,20 +940,20 @@ export class ReglRenderer extends Renderer { const aes_to_buffer_num: Record = {}; // eg 'x' => 3 // Pre-allocate the 'ix' buffer and the 'ix_in_tile' buffers. - const variable_to_buffer_num: Record = { - ix: 0, - ix_in_tile: 1, - }; // eg 'year' => 3 + const variable_to_buffer_num: TupleMap = new TupleMap([ + [['ix'], 0], + [['ix_in_tile'], 1], + ]); // eg 'year' => 3 let num = 1; for (const { aesthetic, time, field } of buffers) { const k = `${aesthetic}--${time}`; - if (variable_to_buffer_num[field] !== undefined) { - aes_to_buffer_num[k] = variable_to_buffer_num[field]; + if (variable_to_buffer_num.get(field) !== undefined) { + aes_to_buffer_num[k] = variable_to_buffer_num.get(field); continue; } if (num++ < 16) { aes_to_buffer_num[k] = num; - variable_to_buffer_num[field] = num; + variable_to_buffer_num.set(field, num); continue; } else { // Don't use the last value, use the current value. @@ -959,18 +964,12 @@ export class ReglRenderer extends Renderer { } } - const buffer_num_to_variable = [ - ...Object.keys(variable_to_buffer_num).map((k) => [k]), - ]; + const buffer_num_to_variable = [...variable_to_buffer_num.keys()]; this.aes_to_buffer_num = aes_to_buffer_num; this.variable_to_buffer_num = variable_to_buffer_num; this.buffer_num_to_variable = buffer_num_to_variable; } - aes_to_buffer_num?: Record; - variable_to_buffer_num?: Record; - buffer_num_to_variable?: string[][]; - get discard_share() { // If jitter is temporal, e.g., or filters are in place, // it may make sense to estimate the number of hidden points. @@ -1029,7 +1028,7 @@ export class BufferManager { return this._integer_buffer; } - get(k: (string | Tile)[]): DS.BufferLocation | null { + get(k: Some): DS.BufferLocation | null { const a = this.bufferMap.get(this.arrayMap.get(k)); return a; } @@ -1041,17 +1040,22 @@ export class BufferManager { */ ready(tile: Tile, needed_dimensions: Iterable): boolean { // We don't allocate buffers for dimensions until they're needed. + for (const keyset of [['ix'], ['ix_in_tile'], ...needed_dimensions] as Some< + string[] + >) { + const current = this.get([tile, ...keyset]); - for (const key of [['ix'], ['ix_in_tile'], ...needed_dimensions]) { - const current = this.get([tile, ...key]); if (current === null || current === undefined) { - if (tile.hasLoadedColumn(key[0])) { - this.create_regl_buffer(tile, key); + if (tile.hasLoadedColumn(keyset[0])) { + this.create_regl_buffer(tile, keyset); } else { - // console.log('not ready because of', key); - if (key[0] === 'ix_in_tile') { - this.create_regl_buffer(tile, key); + if (keyset[0] === 'ix_in_tile') { + this.create_regl_buffer(tile, keyset); } else { + if (tile.readyToUse) { + // tile.get_column(keyset[0]); + } else { + } return false; } } @@ -1145,11 +1149,10 @@ export class BufferManager { create_regl_buffer(tile: Tile, keys: string[]): void { const { renderer } = this; - const key = [tile, ...keys]; + const key = [tile, ...keys] as Some; if (this.arrayMap.has(key)) { return; } - // console.log({ keys }); if (keys[0] === 'ix_in_tile') { this.arrayMap.set(key, this.integer_array); if (!this.bufferMap.has(this.integer_array)) { @@ -1170,6 +1173,7 @@ export class BufferManager { const data_length = data.length; const buffer_desc = renderer.buffers.allocate_block(data_length, item_size); + buffer_desc.buffer.subdata(data, buffer_desc.offset); this.bufferMap.set(vector.data[0].values, buffer_desc); @@ -1177,7 +1181,7 @@ export class BufferManager { } // TODO: Build this out in next PR. -function getNestedVector( +export function getNestedVector( tile: Tile, key: string[], ): Vector { @@ -1203,9 +1207,8 @@ function getNestedVector( } } - let column: Vector = tile.record_batch.getChild( - key[0], - ); + let column: Vector | Vector = + tile.record_batch.getChild(key[0]); for (const k of key.slice(1)) { column = column.getChild(k); } @@ -1216,7 +1219,17 @@ function getNestedVector( if (!column.type || !column.type.typeId) { throw new Error(`Column ${key} has no type.`); } + function assertNotStruct( + value: Vector | Vector, + ): asserts value is Vector { + if (column.type.typeId === Type.Struct) { + throw new Error( + 'Structs are not supported for buffer data on column ' + key.join('->'), + ); + } + } + assertNotStruct(column); return column; } @@ -1323,7 +1336,6 @@ class MultipurposeBufferSet { byte_size: items * bytes_per_item, }; - // add a listener for GC on the value. this.pointer += items * bytes_per_item; return value; } @@ -1334,16 +1346,24 @@ class MultipurposeBufferSet { * @param prefs The preferences object to be used. * * @returns The fields that need to be allocated in the buffers for - * a tile to be drawn. + * a tile to be drawn. Returns a map of columns and any subfields in them that are needed. */ -export function neededFieldsToPlot(prefs: DS.CompletePrefs): Set { - const needed_keys: Set = new Set(); - if (!prefs.encoding) { +export function neededFieldsToPlot( + encoding: DS.Encoding | undefined, +): TupleSet { + const needed_keys = new TupleSet([['ix']]); + if (!encoding) { return needed_keys; } - for (const [_, v] of Object.entries(prefs.encoding)) { + for (const [_, v] of Object.entries(encoding)) { if (v && typeof v !== 'string' && v['field'] !== undefined) { - needed_keys.add(v['field'] as string); + const needed_key: Some = [v['field']]; + if (v['subfield'] !== undefined) { + const subfield = v['subfield']; + const asArray = Array.isArray(subfield) ? [...subfield] : [subfield]; + needed_key.push(...asArray); + } + needed_keys.add(needed_key); } } return needed_keys; diff --git a/src/rendering.ts b/src/rendering.ts index 39674ab61..10a0262c2 100644 --- a/src/rendering.ts +++ b/src/rendering.ts @@ -168,21 +168,6 @@ export class Renderer { return this.render_props.alpha; } - get needeedFields(): string[] { - const { aes } = this; - const needed = new Set(); - if (aes) { - for (const v of Object.values(aes.store)) { - if (v instanceof StatefulAesthetic) { - for (const f of v.neededFields) { - needed.add(f); - } - } - } - } - return [...needed, 'ix']; - } - get optimal_alpha() { // This extends a formula suggested by Ricky Reusser to include // discard share. diff --git a/src/scatterplot.ts b/src/scatterplot.ts index 96fd2ed1d..b1a932f07 100644 --- a/src/scatterplot.ts +++ b/src/scatterplot.ts @@ -121,6 +121,7 @@ export class Scatterplot { this.ready = new Promise((resolve) => { this.mark_ready = resolve; }); + this.click_handler = new ClickFunction(this); this.tooltip_handler = new TooltipHTML(this); this.label_click_handler = new LabelClick(this); @@ -389,7 +390,7 @@ export class Scatterplot { async reinitialize() { const { prefs } = this; - await this.deeptable.ready; + await this.deeptable.promise; await this.deeptable.root_tile.get_column('x'); this._renderer = new ReglRenderer( '#container-for-webgl-canvas', @@ -417,8 +418,11 @@ export class Scatterplot { ctx.fillRect(0, 0, window.innerWidth * 2, window.innerHeight * 2); void this._renderer.initialize(); - void this.deeptable.promise.then(() => this.mark_ready()); - return this.ready; + await this.deeptable.promise.then(() => { + this.mark_ready(); + }); + this.mark_ready(); + return; } /* @@ -632,11 +636,28 @@ export class Scatterplot { } await this.plot_queue; + // Ensure that the deeptable exists. + if (this._root === undefined) { + const { source_url, arrow_table, arrow_buffer } = + prefs as DS.InitialAPICall; + const dataSpec = { source_url, arrow_table, arrow_buffer } as DS.DataSpec; + if (Object.values(dataSpec).filter((x) => x !== undefined).length !== 1) { + throw new Error( + 'The initial API call specify exactly one of source_url, arrow_table, or arrow_buffer', + ); + } + await this.load_deeptable(dataSpec); + } + this.update_prefs(prefs); + // Then ensure the renderer and interaction handlers exist. + if (this._zoom === undefined || this._renderer === undefined) { + await this.reinitialize(); + } if (prefs) { await this.start_transformations(prefs); } + this.plot_queue = this.unsafe_plotAPI(prefs); - await this.plot_queue; // eslint-disable-next-line @typescript-eslint/no-unused-vars for (const [_, hook] of Object.entries(this.hooks)) { @@ -666,23 +687,23 @@ export class Scatterplot { if (this.prefs.duration < delay) { delay = this.prefs.duration; } - const needed_keys: Set = neededFieldsToPlot(this.prefs); if (!prefs.encoding) { resolve(); } - if (this._renderer) { - this.deeptable.root_tile.require_columns(needed_keys); - // Immediately start loading what we can onto the GPUs, too. - for (const tile of this.renderer.visible_tiles()) { - this._renderer.bufferManager.ready( - tile, - [...needed_keys].map((k) => [k]), - ); - } - resolve(); - } else { - resolve(); + if (!this._renderer) { + throw new Error('No renderer has been initialized'); } + // + const needed_keys = neededFieldsToPlot(prefs.encoding); + this.deeptable.root_tile.require_columns( + [...needed_keys].map((k) => k[0]), + ); + // Immediately start loading what we can onto the GPUs, too. + for (const tile of this.renderer.visible_tiles()) { + this._renderer.bufferManager.ready(tile, needed_keys); + } + // TODO: There should be a setTimeout here before the resolution + resolve(); }); } /** @@ -724,18 +745,6 @@ export class Scatterplot { this.update_prefs(prefs); - if (this._root === undefined) { - const { source_url, arrow_table, arrow_buffer } = - prefs as DS.InitialAPICall; - const dataSpec = { source_url, arrow_table, arrow_buffer } as DS.DataSpec; - if (Object.values(dataSpec).filter((x) => x !== undefined).length !== 1) { - throw new Error( - 'The initial API call specify exactly one of source_url, arrow_table, or arrow_buffer', - ); - } - await this.load_deeptable(dataSpec); - } - if (prefs.transformations) { for (const [k, func] of Object.entries(prefs.transformations)) { if (!this.deeptable.transformations[k]) { @@ -743,9 +752,7 @@ export class Scatterplot { } } } - if (this._zoom === undefined || this._renderer === undefined) { - await this.reinitialize(); - } + const renderer = this._renderer; const zoom = this._zoom; diff --git a/src/selection.ts b/src/selection.ts index 3abd9b888..970086b38 100644 --- a/src/selection.ts +++ b/src/selection.ts @@ -885,10 +885,14 @@ export class DataSelection { } } -function bigintmatcher(field: string, matches: bigint[]) { +function bigintmatcher( + field: string, + matches: bigint[], + subfield: string | string[] | null = null, +) { const matchings = new Set(matches); return async function (tile: Tile) { - const col = (await tile.get_column(field)).data[0]; + const col = (await tile.get_column(field, subfield)).data[0]; const values = col.values as bigint[]; const bitmask = new Bitmask(tile.record_batch.numRows); for (let i = 0; i < tile.record_batch.numRows; i++) { @@ -920,7 +924,11 @@ function bigintmatcher(field: string, matches: bigint[]) { * @param matches A list of strings to match in that column * @returns */ -function stringmatcher(field: string, matches: string[]) { +function stringmatcher( + field: string, + matches: string[], + subfield: string | string[] | null = null, +) { if (field === undefined) { throw new Error('Field must be defined'); } @@ -958,7 +966,8 @@ function stringmatcher(field: string, matches: string[]) { * The Deepscatter transformation function. */ return async function (tile: Tile) { - const col = ((await tile.get_column(field)) as Vector).data[0]; + const col = ((await tile.get_column(field, subfield)) as Vector) + .data[0]; const bytes = col.values; const offsets = col.valueOffsets; diff --git a/src/tile.ts b/src/tile.ts index 36b187d03..95e4c572d 100644 --- a/src/tile.ts +++ b/src/tile.ts @@ -64,6 +64,9 @@ export class Tile { private _partialManifest: Partial | Partial; private _manifest?: TileManifest | LazyTileManifest; + // Does the tile have a loaded manifest and other features sufficient to plot. + public readyToUse = false; + // A cache of fetchCalls for downloaded arrow tables, including any table schema metadata. // Tables may contain more than a single column, so this prevents multiple dispatch. //private _promiseOfChildren: Promise; @@ -134,22 +137,38 @@ export class Tile { * * * @param colname The name of the column to retrive. + * @param subfield If the column is a struct vector, the subfield to retrieve. When a string, retrieves a single + * subfield. When an array, treats it as a nesting order. * @returns An Arrow Vector of the column. */ - async get_column(colname: string): Promise { - const existing = this._batch?.getChild(colname); - if (existing) { - return existing; + async get_column( + colname: string, + subfield: string | string[] | null = null, + ): Promise { + const subfields = + subfield === null ? [] : Array.isArray(subfield) ? subfield : [subfield]; + let existing = this._batch?.getChild(colname); + + if (!existing) { + if (this.deeptable.transformations[colname]) { + await this.apply_transformation(colname); + existing = this.record_batch.getChild(colname); + if (existing === null) { + throw new Error(`Column ${colname} not found after transformation`); + } + } } - if (this.deeptable.transformations[colname]) { - await this.apply_transformation(colname); - const vector = this.record_batch.getChild(colname); - if (vector === null) { - throw new Error(`Column ${colname} not found after transformation`); + + // If subfields are passed, use them. + for (let i = 0; i < subfields.length; i++) { + existing = existing.getChild(subfields[i]); + if (existing === null) { + throw new Error( + `Column ${colname} lacks subfield ${subfields.slice(0, i).join(' >> ')}`, + ); } - return vector; } - throw new Error(`Column ${colname} not found`); + return existing; } /** @@ -286,6 +305,7 @@ export class Tile { }); this.highest_known_ix = manifest.max_ix; this._manifest = manifest; + this.readyToUse = true; } set highest_known_ix(val) { diff --git a/src/tixrixqid.ts b/src/tixrixqid.ts index 1522b6343..402a160fd 100644 --- a/src/tixrixqid.ts +++ b/src/tixrixqid.ts @@ -1,4 +1,11 @@ -import type { Bool, Data, Field, Struct, StructRowProxy, Vector } from 'apache-arrow'; +import type { + Bool, + Data, + Field, + Struct, + StructRowProxy, + Vector, +} from 'apache-arrow'; import type { Tile } from './deepscatter'; import { Bitmask, DataSelection, Deeptable } from './deepscatter'; @@ -102,7 +109,7 @@ export function tixToZxy(tix: Tix): [number, number, number] { */ export function getQidFromRow( row: StructRowProxy, - dataset: Deeptable + dataset: Deeptable, ): [number, number] { const tile = getTileFromRow(row, dataset); const rix = row[Symbol.for('rowIndex')] as number; @@ -110,7 +117,6 @@ export function getQidFromRow( } export function getTileFromRow(row: StructRowProxy, dataset: Deeptable): Tile { - const parent = row[Symbol.for('parent')] as Data; const parentsColumns = parent.children; @@ -119,8 +125,8 @@ export function getTileFromRow(row: StructRowProxy, dataset: Deeptable): Tile { // need to find the tile that matches the most columns, not assume // that every column matches exactly. let best_match: [Tile | null, number] = [null, 0]; - const parentNames : [string, Data][] = parent.type.children.map( - (d: Field, i: number) => [d.name, parentsColumns[i]] + const parentNames: [string, Data][] = parent.type.children.map( + (d: Field, i: number) => [d.name, parentsColumns[i]], ); dataset.map((t: Tile) => { @@ -144,7 +150,7 @@ export function getTileFromRow(row: StructRowProxy, dataset: Deeptable): Tile { }); if (best_match[0] === undefined) { throw new Error( - 'No tiles found for this row.' + JSON.stringify({ ...row }) + 'No tiles found for this row.' + JSON.stringify({ ...row }), ); } return best_match[0]; diff --git a/src/types.ts b/src/types.ts index 32ee18c02..a173d0811 100644 --- a/src/types.ts +++ b/src/types.ts @@ -21,6 +21,7 @@ import { Scatterplot } from './scatterplot'; import { ZoomTransform } from 'd3-zoom'; import type { Tile } from './tile'; import type { Rectangle } from './tile'; +import { TupleMap } from './utilityFunctions'; export type { Renderer, Deeptable, ConcreteAesthetic }; /** @@ -294,6 +295,8 @@ export type NumericScaleChannel< > = { /** The name of a column in the data table to be encoded. */ field: string; + // If field is a struct, subfield indicates which child to extract. + subfield?: string | string[]; /** * A transformation to apply on the field. * 'literal' maps in the implied dataspace set by 'x', 'y', while @@ -312,6 +315,8 @@ export type LambdaChannel< > = { lambda?: (v: DomainType) => RangeType; field: string; + // If field is a struct, subfield indicates which child to extract. + subfield?: string | string[]; }; /** @@ -342,6 +347,8 @@ type TwoArgumentOp = { export type OpChannel = { field: string; + // If field is a struct, subfield indicates which child to extract. + subfield?: string | string[]; } & (OneArgumentOp | TwoArgumentOp); export type ConstantChannel = { @@ -376,12 +383,16 @@ export type ChannelType = export type CategoricalColorScale = { field: string; + // If field is a struct, subfield indicates which child to extract. + subfield?: string | string[]; domain: string | [string, string, ...string[]]; range: Colorname[]; }; export type LinearColorScale = { field: string; + // If field is a struct, subfield indicates which child to extract. + subfield?: string | string[]; domain: [number, number]; // TODO: | [number, number, number] // TODO: implement some codegen for these values range: 'viridis' | 'magma' | 'ylorrd'; @@ -605,7 +616,7 @@ export type RowFunction = ( // Props that are needed for all the draws in a single tick. export type GlobalDrawProps = { - aes: { encoding: Encoding }; + // aes: { encoding: Encoding }; colors_as_grid: 0 | 1; corners: Rectangle; zoom_balance: number; @@ -627,7 +638,7 @@ export type GlobalDrawProps = { grid_mode: 1 | 0; buffer_num_to_variable: string[][]; aes_to_buffer_num: Record; - variable_to_buffer_num: Record; + variable_to_buffer_num: TupleMap; color_picker_mode: 0 | 1 | 2 | 3; zoom_matrix: [ number,