Skip to content

Commit

Permalink
allow subfield access (#156)
Browse files Browse the repository at this point in the history
This allows using struct columns in Arrow through an optional 'subfield' column. 'field' remains the unit of laziness, but now nested columns can hold multiple data, which is useful for expensive transformations that return data about multiple numbers (especially x and y at once, like geoprojections.)

I've included a test in SwitchPositions.svelte that demonstrates working.
<!-- ELLIPSIS_HIDDEN -->


----

> [!IMPORTANT]
> Add subfield access for struct columns in Arrow, enhancing nested data handling in Deepscatter with updates across behavior, aesthetics, rendering, and types.
> 
>   - **Behavior**:
>     - Allow subfield access in struct columns in Arrow using an optional `subfield` parameter.
>     - Demonstrated with a test in `SwitchPositions.svelte`.
>   - **Deeptable**:
>     - Updated `get_column()` to support subfields in `Deeptable.ts`.
>     - Modified `domain()` to handle subfields for extent calculations.
>   - **Aesthetics**:
>     - Added `subfield` handling in `Aesthetic.ts` and `AestheticSet.ts`.
>     - Updated `StatefulAesthetic` to track needed fields with subfields.
>   - **Rendering**:
>     - Adjusted buffer management in `regl_rendering.ts` to accommodate subfields.
>     - Updated `BufferManager` to handle nested vectors with subfields.
>   - **Types**:
>     - Extended `ChannelType` and related types to include `subfield` property in `types.ts`.
> 
> <sup>This description was created by </sup>[<img alt="Ellipsis" src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=nomic-ai%2Fdeepscatter&utm_source=github&utm_medium=referral)<sup> for 181fa0a. It will automatically update as commits are pushed.</sup>


<!-- ELLIPSIS_HIDDEN -->
  • Loading branch information
bmschmidt authored Oct 11, 2024
1 parent 75f04d6 commit e3580d7
Show file tree
Hide file tree
Showing 13 changed files with 346 additions and 149 deletions.
87 changes: 76 additions & 11 deletions dev/svelte/SwitchPositions.svelte
Original file line number Diff line number Diff line change
@@ -1,23 +1,88 @@
<script>
import {
makeData,
Float32,
Vector,
vectorFromArray,
Struct,
makeVector,
Field,
} from 'apache-arrow';
export let scatterplot;
let positionNum = 0;
async function click() {
console.log(scatterplot.prefs.encoding.x)
for (let i = 0; i < 10; i++) {
if (scatterplot.deeptable.transformations['struct' + i]) {
continue;
}
scatterplot.deeptable.transformations['struct' + i] = async function (
tile,
) {
// Create a nested struct with a change.
const x = (await tile.get_column('x')).toArray();
const y = (await tile.get_column('y')).toArray();
const x_ = new Float32Array(x.length);
const y_ = new Float32Array(y.length);
for (let i = 0; i < x.length; i++) {
const r = (Math.random() + Math.random()) / 3;
const theta = Math.random() * Math.PI * 2;
x_[i] = x[i] + Math.cos(theta) * r;
y_[i] = y[i] + Math.sin(theta) * r;
}
const d = makeData({
type: new Struct([
new Field('x', new Float32()),
new Field('y', new Float32()),
]),
children: [vectorFromArray(x_).data[0], vectorFromArray(y_).data[0]],
});
const r = new Vector([d]);
return r;
};
scatterplot.deeptable.map((d) => d.get_column('struct' + i));
}
await new Promise((resolve) => {
setTimeout(() => resolve());
}, 100);
let r = 'struct' + (positionNum++ % 10);
await scatterplot.plotAPI({
duration: 1000,
encoding: {
x: {
field: scatterplot.prefs.encoding.x.field === 'x' ? 'y' : 'x',
transform: scatterplot.prefs.encoding.x.field === 'x' ? 'linear': 'literal'
field: r,
subfield: ['x'],
transform: 'literal',
domain: [-10, 10],
},
y: {
field: scatterplot.prefs.encoding.y.field === 'y' ? 'x' : 'y',
transform: scatterplot.prefs.encoding.y.field === 'y' ? 'linear': 'literal'
}
}
})
field: r,
subfield: ['y'],
transform: 'literal',
domain: [-10, 10],
},
},
});
// await scatterplot.plotAPI({
// encoding: {
// x: {
// field: scatterplot.prefs.encoding.x.field === 'x' ? 'y' : 'x',
// transform:
// scatterplot.prefs.encoding.x.field === 'x' ? 'linear' : 'literal',
// },
// y: {
// field: scatterplot.prefs.encoding.y.field === 'y' ? 'x' : 'y',
// transform:
// scatterplot.prefs.encoding.y.field === 'y' ? 'linear' : 'literal',
// },
// },
// });
}
</script>

<button on:click={click}>
Switch positions
</button>
<button on:click={click}> Switch positions </button>
44 changes: 34 additions & 10 deletions src/Deeptable.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
Int32,
Int8,
tableToIPC,
Struct,
} from 'apache-arrow';
import { Scatterplot } from './scatterplot';
import { wrapArrowTable } from './wrap_arrow';
Expand All @@ -34,6 +35,8 @@ import type {
IdSelectParams,
} from './selection';
import { DataSelection } from './selection';
import { Some, TupleMap } from './utilityFunctions';
import { getNestedVector } from './regl_rendering';

type TransformationStatus = 'queued' | 'in progress' | 'complete' | 'failed';

Expand Down Expand Up @@ -70,7 +73,8 @@ export class Deeptable {
...defaultTransformations,
};
public _plot: Scatterplot | null;
private extents: Record<string, [number, number] | [Date, Date]> = {};
private extents: TupleMap<string, [number, number] | [Date, Date]> =
new TupleMap();
// A 3d identifier for the tile. Usually [z, x, y]
private _extent?: Rectangle;
public _ix_seed = 0;
Expand Down Expand Up @@ -134,6 +138,9 @@ export class Deeptable {
this.root_tile = new Tile(defaultManifest, null, this);
const preProcessRootTile = this.root_tile.preprocessRootTileInfo();

// At instantiation, the deeptable isn't ready; only once this
// async stuff is done can the deeptable be used.
// TODO: Add an async static method as the preferred initialization method.
this.promise = preProcessRootTile.then(async () => {
const batch = await this.root_tile.get_arrow(null);
const schema = batch.schema;
Expand Down Expand Up @@ -346,13 +353,24 @@ export class Deeptable {

domain<T extends [number, number] | [string, Date] = [number, number]>(
columnName: string,
subfield?: string[],
): [T[1], T[1]] {
if (this.extents[columnName]) {
return this.extents[columnName];
const key = [columnName, ...(subfield || [])] as Some<string>;
if (this.extents.get(key)) {
return this.extents.get(key);
}

// First -- look at the schema metadata.
let dim = this._schema?.fields.find((d) => d.name === columnName);
for (const sub in subfield) {
if (dim === undefined) {
continue;
}
console.log({ dim });
dim = (dim as Field<Struct<any>>).type.children.find(
(d) => d.name === sub,
);
}
const dim = this._schema?.fields.find(
(d) => d.name === columnName,
) as Field<DS.SupportedArrowTypes>;
if (dim !== undefined) {
let min: T[0] | undefined = undefined;
let max: T[0] | undefined = undefined;
Expand All @@ -375,24 +393,30 @@ export class Deeptable {
'Date field extents in metadata must be passed as strings',
);
}
return (this.extents[columnName] = [new Date(min), new Date(max)]);
this.extents.set(key, [new Date(min), new Date(max)]);
return this.extents.get(key);
}
if (typeof max === 'string') {
throw new Error('Failed to parse min-max as numbers');
}
if (min !== undefined) {
return (this.extents[columnName] = [min as T[1], max as T[1]] as
this.extents.set(key, [min as T[1], max as T[1]] as
| [number, number]
| [Date, Date]);
return this.extents.get(key);
}
}

const vectors: Vector[] = this.map((tile) => tile)
.filter((d) => d.hasLoadedColumn(columnName))
.map((d) => d.record_batch.getChild(columnName) as Vector<Float32>);
.map((d) => getNestedVector(d, [columnName, ...(subfield || [])]));

const extented = extent([...new Vector(vectors)]) as [T[1], T[1]] as
| [number, number]
| [Date, Date];
return (this.extents[columnName] = extented);

this.extents.set(key, extented);
return this.extents.get(key);
}

*points(bbox: Rectangle | undefined, max_ix = 1e99) {
Expand Down
44 changes: 37 additions & 7 deletions src/aesthetics/Aesthetic.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import type { TextureSet } from './AestheticSet';
import { isConstantChannel } from '../typing';
import { Type, Vector } from 'apache-arrow';
import { Struct, Type, Vector } from 'apache-arrow';
import { StructRowProxy } from 'apache-arrow/row/struct';
import { isNumber } from 'lodash';
import type * as DS from '../types';
import { Scatterplot } from '../scatterplot';
import { Some } from '../utilityFunctions';

/**
* An Aesthetic bundles all operations in mapping from user dataspace to webGL based aesthetics.
Expand All @@ -26,6 +27,7 @@ export abstract class Aesthetic<
public abstract default_range: [Output['rangeType'], Output['rangeType']];
public scatterplot: Scatterplot;
public field: string | null = null;
public subfield: string[] = [];
public _texture_buffer: Float32Array | Uint8Array | null = null;
protected abstract _func?: (d: Input['domainType']) => Output['rangeType'];
public aesthetic_map: TextureSet;
Expand Down Expand Up @@ -76,9 +78,25 @@ export abstract class Aesthetic<
this.field = null;
} else {
this.field = encoding.field;
if (encoding.subfield) {
this.subfield = Array.isArray(encoding.subfield)
? encoding.subfield
: [encoding.subfield];
}
}
}

/**
* Returns the keys that are used to access the data in the record batch,
* including with any nesting.
*/
get columnKeys(): null | Some<string> {
if (this.field === null) {
return null;
}
return [this.field, ...this.subfield] as Some<string>;
}

get deeptable() {
return this.scatterplot.deeptable;
}
Expand All @@ -100,10 +118,14 @@ export abstract class Aesthetic<

value_for(point: Datum): Input['domainType'] | null {
if (this.field && point[this.field]) {
return point[this.field] as Input['domainType'];
let v = point[this.field] as Input['domainType'];
for (let i = 0; i < this.subfield.length; i++) {
v = v[this.subfield[i]] as Input['domainType'];
}
return v;
// Needs a default perhaps?
return null;
}
// Needs a default perhaps?
return null;
}

get map_position() {
Expand Down Expand Up @@ -136,9 +158,17 @@ export abstract class Aesthetic<
if (this.field === null || this.field === undefined) {
return (this.column = null);
}
return (this.column = this.deeptable.root_tile.record_batch.getChild(
this.field,
) as Vector<Input['arrowType']>);
let output: Vector<Input['arrowType']> | Vector<Struct> | null = null;
for (const f of [this.field, ...this.subfield]) {
if (output === null) {
output = this.deeptable.root_tile.record_batch.getChild(f) as Vector<
Input['arrowType']
>;
} else {
output = (output as Vector<Struct>).getChild(f) as Vector<Struct>;
}
}
return (this.column = output as Vector<Input['arrowType']>);
}

is_dictionary(): boolean {
Expand Down
18 changes: 18 additions & 0 deletions src/aesthetics/AestheticSet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import type { Deeptable } from '../Deeptable';
import { StatefulAesthetic } from './StatefulAesthetic';
import type { Encoding } from '../types';
import type * as DS from '../types';
import { TupleSet } from '../utilityFunctions';

type AesMap = {
[K in keyof typeof dimensions]: StatefulAesthetic<
Expand Down Expand Up @@ -83,6 +84,12 @@ export class AestheticSet {
}
}

_neededFields: TupleSet<string> = new TupleSet();

get neededFields(): string[][] {
return [...this._neededFields.values()];
}

apply_encoding(encoding: Encoding) {
if (
encoding['jitter_radius'] &&
Expand All @@ -107,6 +114,17 @@ export class AestheticSet {
this.dim(k).update(encoding[k] as DS.ChannelType | null);
}

// Update the needed fields.
this._neededFields.clear();

for (const v of Object.values(this.store)) {
if (v instanceof StatefulAesthetic) {
for (const f of v.neededFields) {
this._neededFields.add(f);
}
}
}

// Apply settings that are not full-on aesthetics.
for (const setting of ['jitter_method'] as const) {
this.options[setting].last = this.options[setting].current;
Expand Down
2 changes: 1 addition & 1 deletion src/aesthetics/ScaledAesthetic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ export abstract class ScaledAesthetic<
Input['domainType'],
];
} else {
return this.scatterplot.deeptable.domain(this.field);
return this.scatterplot.deeptable.domain(this.field, this.subfield);
}
}

Expand Down
8 changes: 5 additions & 3 deletions src/aesthetics/StatefulAesthetic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ export type ConcreteScaledAesthetic =
import type { Deeptable } from '../Deeptable';
import type { Regl } from 'regl';
import type { TextureSet } from './AestheticSet';
import { Some } from '../utilityFunctions';

export class StatefulAesthetic<T extends ConcreteAesthetic> {
/**
Expand Down Expand Up @@ -97,11 +98,12 @@ export class StatefulAesthetic<T extends ConcreteAesthetic> {
] as [T, T];
}

get neededFields(): string[] {
return [this.current.field, this.last.field].filter(
get neededFields(): Some<string>[] {
return [this.current.columnKeys, this.last.columnKeys].filter(
(f) => f !== null,
) as string[];
);
}

get current() {
return this.states[0];
}
Expand Down
Loading

0 comments on commit e3580d7

Please sign in to comment.