diff --git a/README.md b/README.md index d284711..051f91f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@ Uniform Manifold Approximation and Projection (UMAP) is a dimension reduction te There are a few important differences between the python implementation and the JS port. - The optimization step is seeded with a random embedding rather than a spectral embedding. This gives comparable results for smaller datasets. The spectral embedding computation relies on efficient eigenvalue / eigenvector computations that are not easily done in JS. -- There is no implementation of any supervised dimension reduction or adding new points to an existing embedding. - The only distance function used is euclidean distance. - There is no specialized functionality for angular distances or sparse data representations. @@ -62,15 +61,25 @@ umap.setSupervisedProjection(labels); const embedding = umap.fit(data); ``` +#### Transforming additional points after fitting + +```typescript +import { UMAP } from 'umap-js'; + +const umap = new UMAP(); +umap.fit(data); +const transformed = umap.transform(additionalData); +``` + #### Parameters -The UMAP constructor can accept a number of parameters via a `UMAPParameters` object: +The UMAP constructor can accept a number of hyperparameters via a `UMAPParameters` object, with the most common described below. See [umap.ts](./src/umap.ts) for more details. | Parameter | Description | default | | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ------------------------ | | `nComponents` | The number of components (dimensions) to project the data to | 2 | | `nEpochs` | The number of epochs to optimize embeddings via SGD | (computed automatically) | -| `nNeighbors` | The number of nearest neighbors to construct the fuzzy manifold | 15 | +| `nNeighbors` | The number of nearest neighbors to construct the fuzzy manifold | 15 | | `minDist` | The effective minimum distance between embedded points, used with `spread` to control the clumped/dispersed nature of the embedding | 0.1 | | `spread` | The effective scale of embedded points, used with `minDist` to control the clumped/dispersed nature of the embedding | 1.0 | | `random` | A pseudo-random-number generator for controlling stochastic processes | `Math.random` | diff --git a/lib/umap-js.js b/lib/umap-js.js index 6430a44..1b13db5 100644 --- a/lib/umap-js.js +++ b/lib/umap-js.js @@ -81,7 +81,7 @@ /******/ /******/ /******/ // Load entry module and return exports -/******/ return __webpack_require__(__webpack_require__.s = 2); +/******/ return __webpack_require__(__webpack_require__.s = 5); /******/ }) /************************************************************************/ /******/ ([ @@ -223,6 +223,47 @@ function max2d(input) { return max; } exports.max2d = max2d; +function rejectionSample(nSamples, poolSize) { + var result = zeros(nSamples); + for (var i = 0; i < nSamples; i++) { + var rejectSample = true; + while (rejectSample) { + var j = tauRandInt(poolSize); + var broken = false; + for (var k = 0; k < i; k++) { + if (j === result[k]) { + broken = true; + break; + } + } + if (!broken) { + rejectSample = false; + } + result[i] = j; + } + } + return result; +} +exports.rejectionSample = rejectionSample; +function reshape2d(x, a, b) { + var rows = []; + var count = 0; + var index = 0; + if (x.length !== a * b) { + throw new Error('Array dimensions must match input length.'); + } + for (var i = 0; i < a; i++) { + var col = []; + for (var j = 0; j < b; j++) { + col.push(x[index]); + index += 1; + } + rows.push(col); + count += 1; + } + return rows; +} +exports.reshape2d = reshape2d; /***/ }), @@ -232,8 +273,197 @@ exports.max2d = max2d; "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); -var umap_1 = __webpack_require__(3); -window.UMAP = umap_1.UMAP; +var utils = __webpack_require__(1); +function makeHeap(nPoints, size) { + var makeArrays = function (fillValue) { + return utils.empty(nPoints).map(function () { + return utils.filled(size, fillValue); + }); + }; + var heap = []; + heap.push(makeArrays(-1)); + heap.push(makeArrays(Infinity)); + heap.push(makeArrays(0)); + return heap; +} +exports.makeHeap = makeHeap; +function rejectionSample(nSamples, poolSize, random) { + var result = utils.zeros(nSamples); + for (var i = 0; i < nSamples; i++) { + var rejectSample = true; + var j = 0; + while (rejectSample) { + j = utils.tauRandInt(poolSize, random); + var broken = false; + for (var k = 0; k < i; k++) { + if (j === result[k]) { + broken = true; + break; + } + } + if (!broken) + rejectSample = false; + } + result[i] = j; + } + return result; +} +exports.rejectionSample = rejectionSample; +function heapPush(heap, row, weight, index, flag) { + row = Math.floor(row); + var indices = heap[0][row]; + var weights = heap[1][row]; + var isNew = heap[2][row]; + if (weight >= weights[0]) { + return 0; + } + for (var i = 0; i < indices.length; i++) { + if (index === indices[i]) { + return 0; + } + } + return uncheckedHeapPush(heap, row, weight, index, flag); +} +exports.heapPush = heapPush; +function uncheckedHeapPush(heap, row, weight, index, flag) { + var indices = heap[0][row]; + var weights = heap[1][row]; + var isNew = heap[2][row]; + if (weight >= weights[0]) { + return 0; + } + weights[0] = weight; + indices[0] = index; + isNew[0] = flag; + var i = 0; + var iSwap = 0; + while (true) { + var ic1 = 2 * i + 1; + var ic2 = ic1 + 1; + var heapShape2 = heap[0][0].length; + if (ic1 >= heapShape2) { + break; + } + else if (ic2 >= heapShape2) { + if (weights[ic1] > weight) { + iSwap = ic1; + } + else { + break; + } + } + else if (weights[ic1] >= weights[ic2]) { + if (weight < weights[ic1]) { + iSwap = ic1; + } + else { + break; + } + } + else { + if (weight < weights[ic2]) { + iSwap = ic2; + } + else { + break; + } + } + weights[i] = weights[iSwap]; + indices[i] = indices[iSwap]; + isNew[i] = isNew[iSwap]; + i = iSwap; + } + weights[i] = weight; + indices[i] = index; + isNew[i] = flag; + return 1; +} +exports.uncheckedHeapPush = uncheckedHeapPush; +function buildCandidates(currentGraph, nVertices, nNeighbors, maxCandidates, random) { + var candidateNeighbors = makeHeap(nVertices, maxCandidates); + for (var i = 0; i < nVertices; i++) { + for (var j = 0; j < nNeighbors; j++) { + if (currentGraph[0][i][j] < 0) { + continue; + } + var idx = currentGraph[0][i][j]; + var isn = currentGraph[2][i][j]; + var d = utils.tauRand(random); + heapPush(candidateNeighbors, i, d, idx, isn); + heapPush(candidateNeighbors, idx, d, i, isn); + currentGraph[2][i][j] = 0; + } + } + return candidateNeighbors; +} +exports.buildCandidates = buildCandidates; +function deheapSort(heap) { + var indices = heap[0]; + var weights = heap[1]; + for (var i = 0; i < indices.length; i++) { + var indHeap = indices[i]; + var distHeap = weights[i]; + for (var j = 0; j < indHeap.length - 1; j++) { + var indHeapIndex = indHeap.length - j - 1; + var distHeapIndex = distHeap.length - j - 1; + var temp1 = indHeap[0]; + indHeap[0] = indHeap[indHeapIndex]; + indHeap[indHeapIndex] = temp1; + var temp2 = distHeap[0]; + distHeap[0] = distHeap[distHeapIndex]; + distHeap[distHeapIndex] = temp2; + siftDown(distHeap, indHeap, distHeapIndex, 0); + } + } + return { indices: indices, weights: weights }; +} +exports.deheapSort = deheapSort; +function siftDown(heap1, heap2, ceiling, elt) { + while (elt * 2 + 1 < ceiling) { + var leftChild = elt * 2 + 1; + var rightChild = leftChild + 1; + var swap = elt; + if (heap1[swap] < heap1[leftChild]) { + swap = leftChild; + } + if (rightChild < ceiling && heap1[swap] < heap1[rightChild]) { + swap = rightChild; + } + if (swap === elt) { + break; + } + else { + var temp1 = heap1[elt]; + heap1[elt] = heap1[swap]; + heap1[swap] = temp1; + var temp2 = heap2[elt]; + heap2[elt] = heap2[swap]; + heap2[swap] = temp2; + elt = swap; + } + } +} +function smallestFlagged(heap, row) { + var ind = heap[0][row]; + var dist = heap[1][row]; + var flag = heap[2][row]; + var minDist = Infinity; + var resultIndex = -1; + for (var i = 0; i > ind.length; i++) { + if (flag[i] === 1 && dist[i] < minDist) { + minDist = dist[i]; + resultIndex = i; + } + } + if (resultIndex >= 0) { + flag[resultIndex] = 0; + return Math.floor(ind[resultIndex]); + } + else { + return -1; + } +} +exports.smallestFlagged = smallestFlagged; /***/ }), @@ -242,41 +472,6 @@ window.UMAP = umap_1.UMAP; "use strict"; -var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { - return new (P || (P = Promise))(function (resolve, reject) { - function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } - function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } - function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } - step((generator = generator.apply(thisArg, _arguments || [])).next()); - }); -}; -var __generator = (this && this.__generator) || function (thisArg, body) { - var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; - return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; - function verb(n) { return function (v) { return step([n, v]); }; } - function step(op) { - if (f) throw new TypeError("Generator is already executing."); - while (_) try { - if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; - if (y = 0, t) op = [op[0] & 2, t.value]; - switch (op[0]) { - case 0: case 1: t = op; break; - case 4: _.label++; return { value: op[1], done: false }; - case 5: _.label++; y = op[1]; op = [0]; continue; - case 7: op = _.ops.pop(); _.trys.pop(); continue; - default: - if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } - if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } - if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } - if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } - if (t[2]) _.ops.pop(); - _.trys.pop(); continue; - } - op = body.call(thisArg, _); - } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } - if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; - } -}; var __read = (this && this.__read) || function (o, n) { var m = typeof Symbol === "function" && o[Symbol.iterator]; if (!m) return o; @@ -297,574 +492,593 @@ var __spread = (this && this.__spread) || function () { for (var ar = [], i = 0; i < arguments.length; i++) ar = ar.concat(__read(arguments[i])); return ar; }; +var __values = (this && this.__values) || function (o) { + var m = typeof Symbol === "function" && o[Symbol.iterator], i = 0; + if (m) return m.call(o); + return { + next: function () { + if (o && i >= o.length) o = void 0; + return { value: o && o[i++], done: !o }; + } + }; +}; Object.defineProperty(exports, "__esModule", { value: true }); -var matrix = __webpack_require__(4); -var nnDescent = __webpack_require__(5); -var tree = __webpack_require__(7); +var _a; var utils = __webpack_require__(1); -var LM = __webpack_require__(8); -var SMOOTH_K_TOLERANCE = 1e-5; -var MIN_K_DIST_SCALE = 1e-3; -var UMAP = (function () { - function UMAP(params) { - if (params === void 0) { params = {}; } - this.minDist = 0.1; - this.nComponents = 2; - this.nEpochs = 0; - this.nNeighbors = 15; - this.random = Math.random; - this.spread = 1.0; - this.targetMetric = "categorical"; - this.targetWeight = 0.5; - this.targetNNeighbors = this.nNeighbors; - this.distanceFn = euclidean; - this.isInitialized = false; - this.embedding = []; - this.optimizationState = new OptimizationState(); - this.minDist = params.minDist || this.minDist; - this.nComponents = params.nComponents || this.nComponents; - this.nEpochs = params.nEpochs || this.nEpochs; - this.nNeighbors = params.nNeighbors || this.nNeighbors; - this.random = params.random || this.random; - this.spread = params.spread || this.spread; +var SparseMatrix = (function () { + function SparseMatrix(rows, cols, values, dims) { + this.entries = new Map(); + this.nRows = 0; + this.nCols = 0; + this.rows = __spread(rows); + this.cols = __spread(cols); + this.values = __spread(values); + for (var i = 0; i < values.length; i++) { + var key = this.makeKey(this.rows[i], this.cols[i]); + this.entries.set(key, i); + } + this.nRows = dims[0]; + this.nCols = dims[1]; } - UMAP.prototype.fit = function (X) { - this.initializeFit(X); - this.optimizeLayout(); - return this.embedding; - }; - UMAP.prototype.fitAsync = function (X, callback) { - if (callback === void 0) { callback = function () { return true; }; } - return __awaiter(this, void 0, void 0, function () { - return __generator(this, function (_a) { - switch (_a.label) { - case 0: - this.initializeFit(X); - return [4, this.optimizeLayout(callback)]; - case 1: - _a.sent(); - return [2, this.embedding]; - } - }); - }); - }; - UMAP.prototype.setSupervisedProjection = function (Y, params) { - if (params === void 0) { params = {}; } - this.Y = Y; - this.targetMetric = params.targetMetric || this.targetMetric; - this.targetWeight = params.targetWeight || this.targetWeight; - this.targetNNeighbors = params.targetNNeighbors || this.targetNNeighbors; - }; - UMAP.prototype.setPrecomputedKNN = function (knnIndices, knnDistances) { - this.knnIndices = knnIndices; - this.knnDistances = knnDistances; + SparseMatrix.prototype.makeKey = function (row, col) { + return row + ":" + col; }; - UMAP.prototype.initializeFit = function (X) { - if (this.X === X && this.isInitialized) { - return this.getNEpochs(); - } - this.X = X; - if (!this.knnIndices && !this.knnDistances) { - var knnResults = this.nearestNeighbors(X); - this.knnIndices = knnResults.knnIndices; - this.knnDistances = knnResults.knnDistances; + SparseMatrix.prototype.checkDims = function (row, col) { + var withinBounds = row < this.nRows && col < this.nCols; + if (!withinBounds) { + throw new Error('array index out of bounds'); } - this.graph = this.fuzzySimplicialSet(X, this.nNeighbors); - this.processGraphForSupervisedProjection(); - var _a = this.initializeSimplicialSetEmbedding(), head = _a.head, tail = _a.tail, epochsPerSample = _a.epochsPerSample; - this.optimizationState.head = head; - this.optimizationState.tail = tail; - this.optimizationState.epochsPerSample = epochsPerSample; - this.isInitialized = true; - return this.getNEpochs(); }; - UMAP.prototype.processGraphForSupervisedProjection = function () { - var _a = this, Y = _a.Y, X = _a.X; - if (Y) { - if (Y.length !== X.length) { - throw new Error('Length of X and y must be equal'); - } - if (this.targetMetric === "categorical") { - var lt = this.targetWeight < 1.0; - var farDist = lt ? 2.5 * (1.0 / (1.0 - this.targetWeight)) : 1.0e12; - this.graph = this.categoricalSimplicialSetIntersection(this.graph, Y, farDist); - } + SparseMatrix.prototype.set = function (row, col, value) { + this.checkDims(row, col); + var key = this.makeKey(row, col); + if (!this.entries.has(key)) { + this.rows.push(row); + this.cols.push(col); + this.values.push(value); + this.entries.set(key, this.values.length - 1); + } + else { + var index = this.entries.get(key); + this.values[index] = value; } }; - UMAP.prototype.step = function () { - var _a = this.optimizationState, currentEpoch = _a.currentEpoch, isInitialized = _a.isInitialized; - if (!isInitialized) { - this.initializeOptimization(); + SparseMatrix.prototype.get = function (row, col, defaultValue) { + if (defaultValue === void 0) { defaultValue = 0; } + this.checkDims(row, col); + var key = this.makeKey(row, col); + if (this.entries.has(key)) { + var index = this.entries.get(key); + return this.values[index]; } - if (currentEpoch < this.getNEpochs()) { - this.optimizeLayoutStep(currentEpoch); + else { + return defaultValue; } - return this.optimizationState.currentEpoch; }; - UMAP.prototype.getEmbedding = function () { - return this.embedding; + SparseMatrix.prototype.getDims = function () { + return [this.nRows, this.nCols]; }; - UMAP.prototype.nearestNeighbors = function (X) { - var _a = this, distanceFn = _a.distanceFn, nNeighbors = _a.nNeighbors; - var log2 = function (n) { return Math.log(n) / Math.log(2); }; - var metricNNDescent = nnDescent.makeNNDescent(distanceFn, this.random); - var round = function (n) { - return n === 0.5 ? 0 : Math.round(n); - }; - var nTrees = 5 + Math.floor(round(Math.pow(X.length, 0.5) / 20.0)); - var nIters = Math.max(5, Math.floor(Math.round(log2(X.length)))); - var rpForest = tree.makeForest(X, nNeighbors, nTrees, this.random); - var leafArray = tree.makeLeafArray(rpForest); - var _b = metricNNDescent(X, leafArray, nNeighbors, nIters), indices = _b.indices, weights = _b.weights; - return { knnIndices: indices, knnDistances: weights }; + SparseMatrix.prototype.getRows = function () { + return __spread(this.rows); }; - UMAP.prototype.fuzzySimplicialSet = function (X, nNeighbors, localConnectivity, setOpMixRatio) { - if (localConnectivity === void 0) { localConnectivity = 1.0; } - if (setOpMixRatio === void 0) { setOpMixRatio = 1.0; } - var _a = this, _b = _a.knnIndices, knnIndices = _b === void 0 ? [] : _b, _c = _a.knnDistances, knnDistances = _c === void 0 ? [] : _c; - var _d = this.smoothKNNDistance(knnDistances, nNeighbors, localConnectivity), sigmas = _d.sigmas, rhos = _d.rhos; - var _e = this.computeMembershipStrengths(knnIndices, knnDistances, sigmas, rhos), rows = _e.rows, cols = _e.cols, vals = _e.vals; - var size = [X.length, X.length]; - var sparseMatrix = new matrix.SparseMatrix(rows, cols, vals, size); - var transpose = matrix.transpose(sparseMatrix); - var prodMatrix = matrix.pairwiseMultiply(sparseMatrix, transpose); - var a = matrix.subtract(matrix.add(sparseMatrix, transpose), prodMatrix); - var b = matrix.multiplyScalar(a, setOpMixRatio); - var c = matrix.multiplyScalar(prodMatrix, 1.0 - setOpMixRatio); - var result = matrix.add(b, c); - return result; + SparseMatrix.prototype.getCols = function () { + return __spread(this.cols); }; - UMAP.prototype.categoricalSimplicialSetIntersection = function (simplicialSet, target, farDist, unknownDist) { - if (unknownDist === void 0) { unknownDist = 1.0; } - var intersection = fastIntersection(simplicialSet, target, unknownDist, farDist); - intersection = matrix.eliminateZeros(intersection); - return resetLocalConnectivity(intersection); + SparseMatrix.prototype.getValues = function () { + return __spread(this.values); }; - UMAP.prototype.smoothKNNDistance = function (distances, k, localConnectivity, nIter, bandwidth) { - if (localConnectivity === void 0) { localConnectivity = 1.0; } - if (nIter === void 0) { nIter = 64; } - if (bandwidth === void 0) { bandwidth = 1.0; } - var target = (Math.log(k) / Math.log(2)) * bandwidth; - var rho = utils.zeros(distances.length); - var result = utils.zeros(distances.length); - for (var i = 0; i < distances.length; i++) { - var lo = 0.0; - var hi = Infinity; - var mid = 1.0; - var ithDistances = distances[i]; - var nonZeroDists = ithDistances.filter(function (d) { return d > 0.0; }); - if (nonZeroDists.length >= localConnectivity) { - var index = Math.floor(localConnectivity); - var interpolation = localConnectivity - index; - if (index > 0) { - rho[i] = nonZeroDists[index - 1]; - if (interpolation > SMOOTH_K_TOLERANCE) { - rho[i] += - interpolation * (nonZeroDists[index] - nonZeroDists[index - 1]); - } - } - else { - rho[i] = interpolation * nonZeroDists[0]; - } - } - else if (nonZeroDists.length > 0) { - rho[i] = utils.max(nonZeroDists); - } - for (var n = 0; n < nIter; n++) { - var psum = 0.0; - for (var j = 1; j < distances[i].length; j++) { - var d = distances[i][j] - rho[i]; - if (d > 0) { - psum += Math.exp(-(d / mid)); - } - else { - psum += 1.0; - } - } - if (Math.abs(psum - target) < SMOOTH_K_TOLERANCE) { - break; - } - if (psum > target) { - hi = mid; - mid = (lo + hi) / 2.0; - } - else { - lo = mid; - if (hi === Infinity) { - mid *= 2; - } - else { - mid = (lo + hi) / 2.0; - } - } - } - result[i] = mid; - if (rho[i] > 0.0) { - var meanIthDistances = utils.mean(ithDistances); - if (result[i] < MIN_K_DIST_SCALE * meanIthDistances) { - result[i] = MIN_K_DIST_SCALE * meanIthDistances; - } - } - else { - var meanDistances = utils.mean(distances.map(utils.mean)); - if (result[i] < MIN_K_DIST_SCALE * meanDistances) { - result[i] = MIN_K_DIST_SCALE * meanDistances; - } - } + SparseMatrix.prototype.forEach = function (fn) { + for (var i = 0; i < this.values.length; i++) { + fn(this.values[i], this.rows[i], this.cols[i]); } - return { sigmas: result, rhos: rho }; }; - UMAP.prototype.computeMembershipStrengths = function (knnIndices, knnDistances, sigmas, rhos) { - var nSamples = knnIndices.length; - var nNeighbors = knnIndices[0].length; - var rows = utils.zeros(nSamples * nNeighbors); - var cols = utils.zeros(nSamples * nNeighbors); - var vals = utils.zeros(nSamples * nNeighbors); - for (var i = 0; i < nSamples; i++) { - for (var j = 0; j < nNeighbors; j++) { - var val = 0; - if (knnIndices[i][j] === -1) { - continue; - } - if (knnIndices[i][j] === i) { - val = 0.0; - } - else if (knnDistances[i][j] - rhos[i] <= 0.0) { - val = 1.0; - } - else { - val = Math.exp(-((knnDistances[i][j] - rhos[i]) / sigmas[i])); - } - rows[i * nNeighbors + j] = i; - cols[i * nNeighbors + j] = knnIndices[i][j]; - vals[i * nNeighbors + j] = val; - } + SparseMatrix.prototype.map = function (fn) { + var vals = []; + for (var i = 0; i < this.values.length; i++) { + vals.push(fn(this.values[i], this.rows[i], this.cols[i])); } - return { rows: rows, cols: cols, vals: vals }; + var dims = [this.nRows, this.nCols]; + return new SparseMatrix(this.rows, this.cols, vals, dims); }; - UMAP.prototype.initializeSimplicialSetEmbedding = function () { + SparseMatrix.prototype.toArray = function () { var _this = this; - var nEpochs = this.getNEpochs(); - var nComponents = this.nComponents; - var graphValues = this.graph.getValues(); - var graphMax = 0; - for (var i = 0; i < graphValues.length; i++) { - var value = graphValues[i]; - if (graphMax < graphValues[i]) { - graphMax = value; - } - } - var graph = this.graph.map(function (value) { - if (value < graphMax / nEpochs) { - return 0; - } - else { - return value; - } - }); - this.embedding = utils.zeros(graph.nRows).map(function () { - return utils.zeros(nComponents).map(function () { - return utils.tauRand(_this.random) * 20 + -10; - }); + var rows = utils.empty(this.nRows); + var output = rows.map(function () { + return utils.zeros(_this.nCols); }); - var weights = []; - var head = []; - var tail = []; - for (var i = 0; i < graph.nRows; i++) { - for (var j = 0; j < graph.nCols; j++) { - var value = graph.get(i, j); - if (value) { - weights.push(value); - tail.push(i); - head.push(j); - } - } + for (var i = 0; i < this.values.length; i++) { + output[this.rows[i]][this.cols[i]] = this.values[i]; } - var epochsPerSample = this.makeEpochsPerSample(weights, nEpochs); - return { head: head, tail: tail, epochsPerSample: epochsPerSample }; + return output; }; - UMAP.prototype.makeEpochsPerSample = function (weights, nEpochs) { - var result = utils.filled(weights.length, -1.0); - var max = utils.max(weights); - var nSamples = weights.map(function (w) { return (w / max) * nEpochs; }); - nSamples.forEach(function (n, i) { - if (n > 0) - result[i] = nEpochs / nSamples[i]; - }); - return result; + return SparseMatrix; +}()); +exports.SparseMatrix = SparseMatrix; +function transpose(matrix) { + var cols = []; + var rows = []; + var vals = []; + matrix.forEach(function (value, row, col) { + cols.push(row); + rows.push(col); + vals.push(value); + }); + var dims = [matrix.nCols, matrix.nRows]; + return new SparseMatrix(rows, cols, vals, dims); +} +exports.transpose = transpose; +function identity(size) { + var _a = __read(size, 1), rows = _a[0]; + var matrix = new SparseMatrix([], [], [], size); + for (var i = 0; i < rows; i++) { + matrix.set(i, i, 1); + } + return matrix; +} +exports.identity = identity; +function pairwiseMultiply(a, b) { + return elementWise(a, b, function (x, y) { return x * y; }); +} +exports.pairwiseMultiply = pairwiseMultiply; +function add(a, b) { + return elementWise(a, b, function (x, y) { return x + y; }); +} +exports.add = add; +function subtract(a, b) { + return elementWise(a, b, function (x, y) { return x - y; }); +} +exports.subtract = subtract; +function maximum(a, b) { + return elementWise(a, b, function (x, y) { return (x > y ? x : y); }); +} +exports.maximum = maximum; +function multiplyScalar(a, scalar) { + return a.map(function (value) { + return value * scalar; + }); +} +exports.multiplyScalar = multiplyScalar; +function eliminateZeros(m) { + var zeroIndices = new Set(); + var values = m.getValues(); + var rows = m.getRows(); + var cols = m.getCols(); + for (var i = 0; i < values.length; i++) { + if (values[i] === 0) { + zeroIndices.add(i); + } + } + var removeByZeroIndex = function (_, index) { return !zeroIndices.has(index); }; + var nextValues = values.filter(removeByZeroIndex); + var nextRows = rows.filter(removeByZeroIndex); + var nextCols = cols.filter(removeByZeroIndex); + return new SparseMatrix(nextRows, nextCols, nextValues, m.getDims()); +} +exports.eliminateZeros = eliminateZeros; +function normalize(m, normType) { + if (normType === void 0) { normType = "l2"; } + var e_1, _a; + var normFn = normFns[normType]; + var colsByRow = new Map(); + m.forEach(function (_, row, col) { + var cols = colsByRow.get(row) || []; + cols.push(col); + colsByRow.set(row, cols); + }); + var nextMatrix = new SparseMatrix([], [], [], m.getDims()); + var _loop_1 = function (row) { + var cols = colsByRow.get(row).sort(); + var vals = cols.map(function (col) { return m.get(row, col); }); + var norm = normFn(vals); + for (var i = 0; i < norm.length; i++) { + nextMatrix.set(row, cols[i], norm[i]); + } }; - UMAP.prototype.initializeOptimization = function () { - var headEmbedding = this.embedding; - var tailEmbedding = this.embedding; - var _a = this.optimizationState, head = _a.head, tail = _a.tail, epochsPerSample = _a.epochsPerSample; - var gamma = 1.0; - var initialAlpha = 1.0; - var negativeSampleRate = 5; - var nEpochs = this.getNEpochs(); - var nVertices = this.graph.nCols; - var _b = findABParams(this.spread, this.minDist), a = _b.a, b = _b.b; - var dim = headEmbedding[0].length; - var moveOther = headEmbedding.length === tailEmbedding.length; - var alpha = initialAlpha; - var epochsPerNegativeSample = epochsPerSample.map(function (e) { return e / negativeSampleRate; }); - var epochOfNextNegativeSample = __spread(epochsPerNegativeSample); - var epochOfNextSample = __spread(epochsPerSample); - Object.assign(this.optimizationState, { - isInitialized: true, - headEmbedding: headEmbedding, - tailEmbedding: tailEmbedding, - head: head, - tail: tail, - epochsPerSample: epochsPerSample, - epochOfNextSample: epochOfNextSample, - epochOfNextNegativeSample: epochOfNextNegativeSample, - epochsPerNegativeSample: epochsPerNegativeSample, - moveOther: moveOther, - initialAlpha: initialAlpha, - alpha: alpha, - gamma: gamma, - a: a, - b: b, - dim: dim, - nEpochs: nEpochs, - nVertices: nVertices, - }); + try { + for (var _b = __values(colsByRow.keys()), _c = _b.next(); !_c.done; _c = _b.next()) { + var row = _c.value; + _loop_1(row); + } + } + catch (e_1_1) { e_1 = { error: e_1_1 }; } + finally { + try { + if (_c && !_c.done && (_a = _b.return)) _a.call(_b); + } + finally { if (e_1) throw e_1.error; } + } + return nextMatrix; +} +exports.normalize = normalize; +var normFns = (_a = {}, + _a["max"] = function (xs) { + var max = -Infinity; + for (var i = 0; i < xs.length; i++) { + max = xs[i] > max ? xs[i] : max; + } + return xs.map(function (x) { return x / max; }); + }, + _a["l1"] = function (xs) { + var sum = 0; + for (var i = 0; i < xs.length; i++) { + sum += xs[i]; + } + return xs.map(function (x) { return x / sum; }); + }, + _a["l2"] = function (xs) { + var sum = 0; + for (var i = 0; i < xs.length; i++) { + sum += Math.pow(xs[i], 2); + } + return xs.map(function (x) { return Math.sqrt(Math.pow(x, 2) / sum); }); + }, + _a); +function elementWise(a, b, op) { + var visited = new Set(); + var rows = []; + var cols = []; + var vals = []; + var operate = function (row, col) { + rows.push(row); + cols.push(col); + var nextValue = op(a.get(row, col), b.get(row, col)); + vals.push(nextValue); }; - UMAP.prototype.optimizeLayoutStep = function (n) { - var optimizationState = this.optimizationState; - var head = optimizationState.head, tail = optimizationState.tail, headEmbedding = optimizationState.headEmbedding, tailEmbedding = optimizationState.tailEmbedding, epochsPerSample = optimizationState.epochsPerSample, epochOfNextSample = optimizationState.epochOfNextSample, epochOfNextNegativeSample = optimizationState.epochOfNextNegativeSample, epochsPerNegativeSample = optimizationState.epochsPerNegativeSample, moveOther = optimizationState.moveOther, initialAlpha = optimizationState.initialAlpha, alpha = optimizationState.alpha, gamma = optimizationState.gamma, a = optimizationState.a, b = optimizationState.b, dim = optimizationState.dim, nEpochs = optimizationState.nEpochs, nVertices = optimizationState.nVertices; - var clipValue = 4.0; - for (var i = 0; i < epochsPerSample.length; i++) { - if (epochOfNextSample[i] > n) { - continue; - } - var j = head[i]; - var k = tail[i]; - var current = headEmbedding[j]; - var other = tailEmbedding[k]; - var distSquared = rDist(current, other); - var gradCoeff = 0; - if (distSquared > 0) { - gradCoeff = -2.0 * a * b * Math.pow(distSquared, b - 1.0); - gradCoeff /= a * Math.pow(distSquared, b) + 1.0; - } - for (var d = 0; d < dim; d++) { - var gradD = clip(gradCoeff * (current[d] - other[d]), clipValue); - current[d] += gradD * alpha; - if (moveOther) { - other[d] += -gradD * alpha; - } - } - epochOfNextSample[i] += epochsPerSample[i]; - var nNegSamples = Math.floor((n - epochOfNextNegativeSample[i]) / epochsPerNegativeSample[i]); - for (var p = 0; p < nNegSamples; p++) { - var k_1 = utils.tauRandInt(nVertices, this.random); - var other_1 = tailEmbedding[k_1]; - var distSquared_1 = rDist(current, other_1); - var gradCoeff_1 = 0.0; - if (distSquared_1 > 0.0) { - gradCoeff_1 = 2.0 * gamma * b; - gradCoeff_1 /= - (0.001 + distSquared_1) * (a * Math.pow(distSquared_1, b) + 1); - } - else if (j === k_1) { - continue; - } - for (var d = 0; d < dim; d++) { - var gradD = 4.0; - if (gradCoeff_1 > 0.0) { - gradD = clip(gradCoeff_1 * (current[d] - other_1[d]), clipValue); - } - current[d] += gradD * alpha; - } - } - epochOfNextNegativeSample[i] += nNegSamples * epochsPerNegativeSample[i]; + var valuesA = a.getValues(); + var rowsA = a.getRows(); + var colsA = a.getCols(); + for (var i = 0; i < valuesA.length; i++) { + var row = rowsA[i]; + var col = colsA[i]; + var key = row + ":" + col; + visited.add(key); + operate(row, col); + } + var valuesB = b.getValues(); + var rowsB = b.getRows(); + var colsB = b.getCols(); + for (var i = 0; i < valuesB.length; i++) { + var row = rowsB[i]; + var col = colsB[i]; + var key = row + ":" + col; + if (visited.has(key)) + continue; + operate(row, col); + } + var dims = [a.nRows, a.nCols]; + return new SparseMatrix(rows, cols, vals, dims); +} +function getCSR(x) { + var entries = []; + x.forEach(function (value, row, col) { + entries.push({ value: value, row: row, col: col }); + }); + entries.sort(function (a, b) { + if (a.row === b.row) { + return a.col - b.col; + } + else { + return a.row - b.col; + } + }); + var indices = []; + var values = []; + var indptr = []; + var currentRow = -1; + for (var i = 0; i < entries.length; i++) { + var _a = entries[i], row = _a.row, col = _a.col, value = _a.value; + if (row !== currentRow) { + currentRow = row; + indptr.push(i); + } + indices.push(col); + values.push(value); + } + return { indices: indices, values: values, indptr: indptr }; +} +exports.getCSR = getCSR; + + +/***/ }), +/* 4 */ +/***/ (function(module, exports, __webpack_require__) { + +"use strict"; + +var __read = (this && this.__read) || function (o, n) { + var m = typeof Symbol === "function" && o[Symbol.iterator]; + if (!m) return o; + var i = m.call(o), r, ar = [], e; + try { + while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value); + } + catch (error) { e = { error: error }; } + finally { + try { + if (r && !r.done && (m = i["return"])) m.call(i); + } + finally { if (e) throw e.error; } + } + return ar; +}; +var __spread = (this && this.__spread) || function () { + for (var ar = [], i = 0; i < arguments.length; i++) ar = ar.concat(__read(arguments[i])); + return ar; +}; +var __values = (this && this.__values) || function (o) { + var m = typeof Symbol === "function" && o[Symbol.iterator], i = 0; + if (m) return m.call(o); + return { + next: function () { + if (o && i >= o.length) o = void 0; + return { value: o && o[i++], done: !o }; } - optimizationState.alpha = initialAlpha * (1.0 - n / nEpochs); - optimizationState.currentEpoch += 1; - this.embedding = headEmbedding; - return optimizationState.currentEpoch; }; - UMAP.prototype.optimizeLayout = function (epochCallback) { - var _this = this; - if (epochCallback === void 0) { epochCallback = function () { return true; }; } - if (!this.optimizationState.isInitialized) { - this.initializeOptimization(); +}; +Object.defineProperty(exports, "__esModule", { value: true }); +var utils = __webpack_require__(1); +var FlatTree = (function () { + function FlatTree(hyperplanes, offsets, children, indices) { + this.hyperplanes = hyperplanes; + this.offsets = offsets; + this.children = children; + this.indices = indices; + } + return FlatTree; +}()); +exports.FlatTree = FlatTree; +function makeForest(data, nNeighbors, nTrees, random) { + var leafSize = Math.max(10, nNeighbors); + var trees = utils + .range(nTrees) + .map(function (_, i) { return makeTree(data, leafSize, i, random); }); + var forest = trees.map(function (tree) { return flattenTree(tree, leafSize); }); + return forest; +} +exports.makeForest = makeForest; +function makeTree(data, leafSize, n, random) { + if (leafSize === void 0) { leafSize = 30; } + var indices = utils.range(data.length); + var tree = makeEuclideanTree(data, indices, leafSize, n, random); + return tree; +} +function makeEuclideanTree(data, indices, leafSize, q, random) { + if (leafSize === void 0) { leafSize = 30; } + if (indices.length > leafSize) { + var splitResults = euclideanRandomProjectionSplit(data, indices, random); + var indicesLeft = splitResults.indicesLeft, indicesRight = splitResults.indicesRight, hyperplane = splitResults.hyperplane, offset = splitResults.offset; + var leftChild = makeEuclideanTree(data, indicesLeft, leafSize, q + 1, random); + var rightChild = makeEuclideanTree(data, indicesRight, leafSize, q + 1, random); + var node = { leftChild: leftChild, rightChild: rightChild, isLeaf: false, hyperplane: hyperplane, offset: offset }; + return node; + } + else { + var node = { indices: indices, isLeaf: true }; + return node; + } +} +function euclideanRandomProjectionSplit(data, indices, random) { + var dim = data[0].length; + var leftIndex = utils.tauRandInt(indices.length, random); + var rightIndex = utils.tauRandInt(indices.length, random); + rightIndex += leftIndex === rightIndex ? 1 : 0; + rightIndex = rightIndex % indices.length; + var left = indices[leftIndex]; + var right = indices[rightIndex]; + var hyperplaneOffset = 0; + var hyperplaneVector = utils.zeros(dim); + for (var i = 0; i < hyperplaneVector.length; i++) { + hyperplaneVector[i] = data[left][i] - data[right][i]; + hyperplaneOffset -= + (hyperplaneVector[i] * (data[left][i] + data[right][i])) / 2.0; + } + var nLeft = 0; + var nRight = 0; + var side = utils.zeros(indices.length); + for (var i = 0; i < indices.length; i++) { + var margin = hyperplaneOffset; + for (var d = 0; d < dim; d++) { + margin += hyperplaneVector[d] * data[indices[i]][d]; } - return new Promise(function (resolve, reject) { - var step = function () { return __awaiter(_this, void 0, void 0, function () { - var _a, nEpochs, currentEpoch, epochCompleted, shouldStop, isFinished; - return __generator(this, function (_b) { - try { - _a = this.optimizationState, nEpochs = _a.nEpochs, currentEpoch = _a.currentEpoch; - epochCompleted = this.optimizeLayoutStep(currentEpoch); - shouldStop = epochCallback(epochCompleted) === false; - isFinished = epochCompleted === nEpochs; - if (!shouldStop && !isFinished) { - step(); - } - else { - return [2, resolve(isFinished)]; - } - } - catch (err) { - reject(err); - } - return [2]; - }); - }); }; - step(); - }); - }; - UMAP.prototype.getNEpochs = function () { - var graph = this.graph; - if (this.nEpochs > 0) { - return this.nEpochs; + if (margin === 0) { + side[i] = utils.tauRandInt(2, random); + if (side[i] === 0) { + nLeft += 1; + } + else { + nRight += 1; + } } - var length = graph.nRows; - if (length <= 2500) { - return 500; + else if (margin > 0) { + side[i] = 0; + nLeft += 1; } - else if (length <= 5000) { - return 400; + else { + side[i] = 1; + nRight += 1; } - else if (length <= 7500) { - return 300; + } + var indicesLeft = utils.zeros(nLeft); + var indicesRight = utils.zeros(nRight); + nLeft = 0; + nRight = 0; + for (var i in utils.range(side.length)) { + if (side[i] === 0) { + indicesLeft[nLeft] = indices[i]; + nLeft += 1; } else { - return 200; + indicesRight[nRight] = indices[i]; + nRight += 1; } - }; - return UMAP; -}()); -exports.UMAP = UMAP; -function euclidean(x, y) { - var result = 0; - for (var i = 0; i < x.length; i++) { - result += Math.pow((x[i] - y[i]), 2); } - return Math.sqrt(result); + return { + indicesLeft: indicesLeft, + indicesRight: indicesRight, + hyperplane: hyperplaneVector, + offset: hyperplaneOffset, + }; } -exports.euclidean = euclidean; -function cosine(x, y) { - var result = 0.0; - var normX = 0.0; - var normY = 0.0; - for (var i = 0; i < x.length; i++) { - result += x[i] * y[i]; - normX += Math.pow(x[i], 2); - normY += Math.pow(y[i], 2); +function flattenTree(tree, leafSize) { + var nNodes = numNodes(tree); + var nLeaves = numLeaves(tree); + var hyperplanes = utils + .range(nNodes) + .map(function () { return utils.zeros(tree.hyperplane.length); }); + var offsets = utils.zeros(nNodes); + var children = utils.range(nNodes).map(function () { return [-1, -1]; }); + var indices = utils + .range(nLeaves) + .map(function () { return utils.range(leafSize).map(function () { return -1; }); }); + recursiveFlatten(tree, hyperplanes, offsets, children, indices, 0, 0); + return new FlatTree(hyperplanes, offsets, children, indices); +} +function recursiveFlatten(tree, hyperplanes, offsets, children, indices, nodeNum, leafNum) { + var _a; + if (tree.isLeaf) { + children[nodeNum][0] = -leafNum; + (_a = indices[leafNum]).splice.apply(_a, __spread([0, tree.indices.length], tree.indices)); + leafNum += 1; + return { nodeNum: nodeNum, leafNum: leafNum }; } - if (normX === 0 && normY === 0) { - return 0; + else { + hyperplanes[nodeNum] = tree.hyperplane; + offsets[nodeNum] = tree.offset; + children[nodeNum][0] = nodeNum + 1; + var oldNodeNum = nodeNum; + var res = recursiveFlatten(tree.leftChild, hyperplanes, offsets, children, indices, nodeNum + 1, leafNum); + nodeNum = res.nodeNum; + leafNum = res.leafNum; + children[oldNodeNum][1] = nodeNum + 1; + res = recursiveFlatten(tree.rightChild, hyperplanes, offsets, children, indices, nodeNum + 1, leafNum); + return { nodeNum: res.nodeNum, leafNum: res.leafNum }; } - else if (normX === 0 || normY === 0) { - return 1.0; +} +function numNodes(tree) { + if (tree.isLeaf) { + return 1; } else { - return 1.0 - result / Math.sqrt(normX * normY); + return 1 + numNodes(tree.leftChild) + numNodes(tree.rightChild); } } -exports.cosine = cosine; -var OptimizationState = (function () { - function OptimizationState() { - this.currentEpoch = 0; - this.isInitialized = false; - this.headEmbedding = []; - this.tailEmbedding = []; - this.head = []; - this.tail = []; - this.epochsPerSample = []; - this.epochOfNextSample = []; - this.epochOfNextNegativeSample = []; - this.epochsPerNegativeSample = []; - this.moveOther = true; - this.initialAlpha = 1.0; - this.alpha = 1.0; - this.gamma = 1.0; - this.a = 1.5769434603113077; - this.b = 0.8950608779109733; - this.dim = 2; - this.nEpochs = 500; - this.nVertices = 0; +function numLeaves(tree) { + if (tree.isLeaf) { + return 1; } - return OptimizationState; -}()); -function clip(x, clipValue) { - if (x > clipValue) - return clipValue; - else if (x < -clipValue) - return -clipValue; - else - return x; -} -function rDist(x, y) { - var result = 0.0; - for (var i = 0; i < x.length; i++) { - result += Math.pow(x[i] - y[i], 2); + else { + return numLeaves(tree.leftChild) + numLeaves(tree.rightChild); } - return result; -} -function findABParams(spread, minDist) { - var curve = function (_a) { - var _b = __read(_a, 2), a = _b[0], b = _b[1]; - return function (x) { - return 1.0 / (1.0 + a * Math.pow(x, (2 * b))); - }; - }; - var xv = utils - .linear(0, spread * 3, 300) - .map(function (val) { return (val < minDist ? 1.0 : val); }); - var yv = utils.zeros(xv.length).map(function (val, index) { - var gte = xv[index] >= minDist; - return gte ? Math.exp(-(xv[index] - minDist) / spread) : val; - }); - var initialValues = [0.5, 0.5]; - var data = { x: xv, y: yv }; - var options = { - damping: 1.5, - initialValues: initialValues, - gradientDifference: 10e-2, - maxIterations: 100, - errorTolerance: 10e-3, - }; - var parameterValues = LM(data, curve, options).parameterValues; - var _a = __read(parameterValues, 2), a = _a[0], b = _a[1]; - return { a: a, b: b }; } -exports.findABParams = findABParams; -function fastIntersection(graph, target, unknownDist, farDist) { - if (unknownDist === void 0) { unknownDist = 1.0; } - if (farDist === void 0) { farDist = 5.0; } - return graph.map(function (value, row, col) { - if (target[row] === -1 || target[col] === -1) { - return value * Math.exp(-unknownDist); - } - else if (target[row] !== target[col]) { - return value * Math.exp(-farDist); +function makeLeafArray(rpForest) { + var e_1, _a; + if (rpForest.length > 0) { + var output = []; + try { + for (var rpForest_1 = __values(rpForest), rpForest_1_1 = rpForest_1.next(); !rpForest_1_1.done; rpForest_1_1 = rpForest_1.next()) { + var tree = rpForest_1_1.value; + output.push.apply(output, __spread(tree.indices)); + } } - else { - return value; + catch (e_1_1) { e_1 = { error: e_1_1 }; } + finally { + try { + if (rpForest_1_1 && !rpForest_1_1.done && (_a = rpForest_1.return)) _a.call(rpForest_1); + } + finally { if (e_1) throw e_1.error; } } - }); + return output; + } + else { + return [[-1]]; + } } -exports.fastIntersection = fastIntersection; -function resetLocalConnectivity(simplicialSet) { - simplicialSet = matrix.normalize(simplicialSet, "max"); - var transpose = matrix.transpose(simplicialSet); - var prodMatrix = matrix.pairwiseMultiply(transpose, simplicialSet); - simplicialSet = matrix.add(simplicialSet, matrix.subtract(transpose, prodMatrix)); - return matrix.eliminateZeros(simplicialSet); +exports.makeLeafArray = makeLeafArray; +function selectSide(hyperplane, offset, point) { + var margin = offset; + for (var d = 0; d < point.length; d++) { + margin += hyperplane[d] * point[d]; + } + if (margin === 0) { + var side = utils.tauRandInt(2); + return side; + } + else if (margin > 0) { + return 0; + } + else { + return 1; + } } -exports.resetLocalConnectivity = resetLocalConnectivity; +function searchFlatTree(point, tree) { + var node = 0; + while (tree.children[node][0] > 0) { + var side = selectSide(tree.hyperplanes[node], tree.offsets[node], point); + if (side === 0) { + node = tree.children[node][0]; + } + else { + node = tree.children[node][1]; + } + } + var index = -1 * tree.children[node][0]; + return tree.indices[index]; +} +exports.searchFlatTree = searchFlatTree; /***/ }), -/* 4 */ +/* 5 */ +/***/ (function(module, exports, __webpack_require__) { + +"use strict"; + +Object.defineProperty(exports, "__esModule", { value: true }); +var umap_1 = __webpack_require__(6); +window.UMAP = umap_1.UMAP; + + +/***/ }), +/* 6 */ /***/ (function(module, exports, __webpack_require__) { "use strict"; +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +var __generator = (this && this.__generator) || function (thisArg, body) { + var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; + return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; + function verb(n) { return function (v) { return step([n, v]); }; } + function step(op) { + if (f) throw new TypeError("Generator is already executing."); + while (_) try { + if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; + if (y = 0, t) op = [op[0] & 2, t.value]; + switch (op[0]) { + case 0: case 1: t = op; break; + case 4: _.label++; return { value: op[1], done: false }; + case 5: _.label++; y = op[1]; op = [0]; continue; + case 7: op = _.ops.pop(); _.trys.pop(); continue; + default: + if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } + if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } + if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } + if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } + if (t[2]) _.ops.pop(); + _.trys.pop(); continue; + } + op = body.call(thisArg, _); + } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } + if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; + } +}; var __read = (this && this.__read) || function (o, n) { var m = typeof Symbol === "function" && o[Symbol.iterator]; if (!m) return o; @@ -885,531 +1099,695 @@ var __spread = (this && this.__spread) || function () { for (var ar = [], i = 0; i < arguments.length; i++) ar = ar.concat(__read(arguments[i])); return ar; }; -var __values = (this && this.__values) || function (o) { - var m = typeof Symbol === "function" && o[Symbol.iterator], i = 0; - if (m) return m.call(o); - return { - next: function () { - if (o && i >= o.length) o = void 0; - return { value: o && o[i++], done: !o }; - } - }; -}; Object.defineProperty(exports, "__esModule", { value: true }); -var _a; +var heap = __webpack_require__(2); +var matrix = __webpack_require__(3); +var nnDescent = __webpack_require__(7); +var tree = __webpack_require__(4); var utils = __webpack_require__(1); -var SparseMatrix = (function () { - function SparseMatrix(rows, cols, values, dims) { - this.entries = new Map(); - this.nRows = 0; - this.nCols = 0; - this.rows = __spread(rows); - this.cols = __spread(cols); - this.values = __spread(values); - for (var i = 0; i < values.length; i++) { - var key = this.makeKey(this.rows[i], this.cols[i]); - this.entries.set(key, i); - } - this.nRows = dims[0]; - this.nCols = dims[0]; - } - SparseMatrix.prototype.makeKey = function (row, col) { - return row + ":" + col; - }; - SparseMatrix.prototype.checkDims = function (row, col) { - var withinBounds = row < this.nRows && col < this.nCols; - if (!withinBounds) { - throw new Error('array index out of bounds'); - } - }; - SparseMatrix.prototype.set = function (row, col, value) { - this.checkDims(row, col); - var key = this.makeKey(row, col); - if (!this.entries.has(key)) { - this.rows.push(row); - this.cols.push(col); - this.values.push(value); - this.entries.set(key, this.values.length - 1); - } - else { - var index = this.entries.get(key); - this.values[index] = value; - } - }; - SparseMatrix.prototype.get = function (row, col, defaultValue) { - if (defaultValue === void 0) { defaultValue = 0; } - this.checkDims(row, col); - var key = this.makeKey(row, col); - if (this.entries.has(key)) { - var index = this.entries.get(key); - return this.values[index]; - } - else { - return defaultValue; - } - }; - SparseMatrix.prototype.getDims = function () { - return [this.nRows, this.nCols]; - }; - SparseMatrix.prototype.getRows = function () { - return __spread(this.rows); - }; - SparseMatrix.prototype.getCols = function () { - return __spread(this.cols); - }; - SparseMatrix.prototype.getValues = function () { - return __spread(this.values); - }; - SparseMatrix.prototype.forEach = function (fn) { - for (var i = 0; i < this.values.length; i++) { - fn(this.values[i], this.rows[i], this.cols[i]); - } - }; - SparseMatrix.prototype.map = function (fn) { - var vals = []; - for (var i = 0; i < this.values.length; i++) { - vals.push(fn(this.values[i], this.rows[i], this.cols[i])); - } - var dims = [this.nRows, this.nCols]; - return new SparseMatrix(this.rows, this.cols, vals, dims); - }; - SparseMatrix.prototype.toArray = function () { +var LM = __webpack_require__(8); +var SMOOTH_K_TOLERANCE = 1e-5; +var MIN_K_DIST_SCALE = 1e-3; +var UMAP = (function () { + function UMAP(params) { + if (params === void 0) { params = {}; } var _this = this; - var rows = utils.empty(this.nRows); - var output = rows.map(function () { - return utils.zeros(_this.nCols); - }); - for (var i = 0; i < this.values.length; i++) { - output[this.rows[i]][this.cols[i]] = this.values[i]; - } - return output; - }; - return SparseMatrix; -}()); -exports.SparseMatrix = SparseMatrix; -function transpose(matrix) { - var cols = []; - var rows = []; - var vals = []; - matrix.forEach(function (value, row, col) { - cols.push(row); - rows.push(col); - vals.push(value); - }); - var dims = [matrix.nCols, matrix.nRows]; - return new SparseMatrix(rows, cols, vals, dims); -} -exports.transpose = transpose; -function identity(size) { - var _a = __read(size, 1), rows = _a[0]; - var matrix = new SparseMatrix([], [], [], size); - for (var i = 0; i < rows; i++) { - matrix.set(i, i, 1); - } - return matrix; -} -exports.identity = identity; -function pairwiseMultiply(a, b) { - return elementWise(a, b, function (x, y) { return x * y; }); -} -exports.pairwiseMultiply = pairwiseMultiply; -function add(a, b) { - return elementWise(a, b, function (x, y) { return x + y; }); -} -exports.add = add; -function subtract(a, b) { - return elementWise(a, b, function (x, y) { return x - y; }); -} -exports.subtract = subtract; -function multiplyScalar(a, scalar) { - return a.map(function (value) { - return value * scalar; - }); -} -exports.multiplyScalar = multiplyScalar; -function eliminateZeros(m) { - var zeroIndices = new Set(); - var values = m.getValues(); - var rows = m.getRows(); - var cols = m.getCols(); - for (var i = 0; i < values.length; i++) { - if (values[i] === 0) { - zeroIndices.add(i); - } + this.learningRate = 1.0; + this.localConnectivity = 1.0; + this.minDist = 0.1; + this.nComponents = 2; + this.nEpochs = 0; + this.nNeighbors = 15; + this.negativeSampleRate = 5; + this.random = Math.random; + this.repulsionStrength = 1.0; + this.setOpMixRatio = 1.0; + this.spread = 1.0; + this.transformQueueSize = 4.0; + this.targetMetric = "categorical"; + this.targetWeight = 0.5; + this.targetNNeighbors = this.nNeighbors; + this.distanceFn = euclidean; + this.isInitialized = false; + this.rpForest = []; + this.embedding = []; + this.optimizationState = new OptimizationState(); + var setParam = function (key) { + if (params[key] !== undefined) + _this[key] = params[key]; + }; + setParam('learningRate'); + setParam('localConnectivity'); + setParam('minDist'); + setParam('nComponents'); + setParam('nEpochs'); + setParam('nNeighbors'); + setParam('negativeSampleRate'); + setParam('random'); + setParam('repulsionStrength'); + setParam('setOpMixRatio'); + setParam('spread'); + setParam('transformQueueSize'); } - var removeByZeroIndex = function (_, index) { return !zeroIndices.has(index); }; - var nextValues = values.filter(removeByZeroIndex); - var nextRows = rows.filter(removeByZeroIndex); - var nextCols = cols.filter(removeByZeroIndex); - return new SparseMatrix(nextRows, nextCols, nextValues, m.getDims()); -} -exports.eliminateZeros = eliminateZeros; -function normalize(m, normType) { - if (normType === void 0) { normType = "l2"; } - var e_1, _a; - var normFn = normFns[normType]; - var colsByRow = new Map(); - m.forEach(function (_, row, col) { - var cols = colsByRow.get(row) || []; - cols.push(col); - colsByRow.set(row, cols); - }); - var nextMatrix = new SparseMatrix([], [], [], m.getDims()); - var _loop_1 = function (row) { - var cols = colsByRow.get(row).sort(); - var vals = cols.map(function (col) { return m.get(row, col); }); - var norm = normFn(vals); - for (var i = 0; i < norm.length; i++) { - nextMatrix.set(row, cols[i], norm[i]); - } + UMAP.prototype.fit = function (X) { + this.initializeFit(X); + this.optimizeLayout(); + return this.embedding; + }; + UMAP.prototype.fitAsync = function (X, callback) { + if (callback === void 0) { callback = function () { return true; }; } + return __awaiter(this, void 0, void 0, function () { + return __generator(this, function (_a) { + switch (_a.label) { + case 0: + this.initializeFit(X); + return [4, this.optimizeLayoutAsync(callback)]; + case 1: + _a.sent(); + return [2, this.embedding]; + } + }); + }); }; - try { - for (var _b = __values(colsByRow.keys()), _c = _b.next(); !_c.done; _c = _b.next()) { - var row = _c.value; - _loop_1(row); + UMAP.prototype.setSupervisedProjection = function (Y, params) { + if (params === void 0) { params = {}; } + this.Y = Y; + this.targetMetric = params.targetMetric || this.targetMetric; + this.targetWeight = params.targetWeight || this.targetWeight; + this.targetNNeighbors = params.targetNNeighbors || this.targetNNeighbors; + }; + UMAP.prototype.setPrecomputedKNN = function (knnIndices, knnDistances) { + this.knnIndices = knnIndices; + this.knnDistances = knnDistances; + }; + UMAP.prototype.initializeFit = function (X) { + if (this.X === X && this.isInitialized) { + return this.getNEpochs(); } - } - catch (e_1_1) { e_1 = { error: e_1_1 }; } - finally { - try { - if (_c && !_c.done && (_a = _b.return)) _a.call(_b); + this.X = X; + if (!this.knnIndices && !this.knnDistances) { + var knnResults = this.nearestNeighbors(X); + this.knnIndices = knnResults.knnIndices; + this.knnDistances = knnResults.knnDistances; } - finally { if (e_1) throw e_1.error; } - } - return nextMatrix; -} -exports.normalize = normalize; -var normFns = (_a = {}, - _a["max"] = function (xs) { - var max = -Infinity; - for (var i = 0; i < xs.length; i++) { - max = xs[i] > max ? xs[i] : max; + this.graph = this.fuzzySimplicialSet(X, this.nNeighbors, this.setOpMixRatio); + this.makeSearchFns(); + this.searchGraph = this.makeSearchGraph(X); + this.processGraphForSupervisedProjection(); + var _a = this.initializeSimplicialSetEmbedding(), head = _a.head, tail = _a.tail, epochsPerSample = _a.epochsPerSample; + this.optimizationState.head = head; + this.optimizationState.tail = tail; + this.optimizationState.epochsPerSample = epochsPerSample; + this.initializeOptimization(); + this.prepareForOptimizationLoop(); + this.isInitialized = true; + return this.getNEpochs(); + }; + UMAP.prototype.makeSearchFns = function () { + var _a = nnDescent.makeInitializations(this.distanceFn), initFromTree = _a.initFromTree, initFromRandom = _a.initFromRandom; + this.initFromTree = initFromTree; + this.initFromRandom = initFromRandom; + this.search = nnDescent.makeInitializedNNSearch(this.distanceFn); + }; + UMAP.prototype.makeSearchGraph = function (X) { + var knnIndices = this.knnIndices; + var knnDistances = this.knnDistances; + var dims = [X.length, X.length]; + var searchGraph = new matrix.SparseMatrix([], [], [], dims); + for (var i = 0; i < knnIndices.length; i++) { + var knn = knnIndices[i]; + var distances = knnDistances[i]; + for (var j = 0; j < knn.length; j++) { + var neighbor = knn[j]; + var distance = distances[j]; + if (distance > 0) { + searchGraph.set(i, neighbor, distance); + } + } } - return xs.map(function (x) { return x / max; }); - }, - _a["l1"] = function (xs) { - var sum = 0; - for (var i = 0; i < xs.length; i++) { - sum += xs[i]; + var transpose = matrix.transpose(searchGraph); + return matrix.maximum(searchGraph, transpose); + }; + UMAP.prototype.transform = function (toTransform) { + var _this = this; + var rawData = this.X; + if (rawData === undefined || rawData.length === 0) { + throw new Error('No data has been fit.'); + } + var nNeighbors = Math.floor(this.nNeighbors * this.transformQueueSize); + var init = nnDescent.initializeSearch(this.rpForest, rawData, toTransform, nNeighbors, this.initFromRandom, this.initFromTree); + var result = this.search(rawData, this.searchGraph, init, toTransform); + var _a = heap.deheapSort(result), indices = _a.indices, distances = _a.weights; + indices = indices.map(function (x) { return x.slice(0, _this.nNeighbors); }); + distances = distances.map(function (x) { return x.slice(0, _this.nNeighbors); }); + var adjustedLocalConnectivity = Math.max(0, this.localConnectivity - 1); + var _b = this.smoothKNNDistance(distances, this.nNeighbors, adjustedLocalConnectivity), sigmas = _b.sigmas, rhos = _b.rhos; + var _c = this.computeMembershipStrengths(indices, distances, sigmas, rhos), rows = _c.rows, cols = _c.cols, vals = _c.vals; + var size = [toTransform.length, rawData.length]; + var graph = new matrix.SparseMatrix(rows, cols, vals, size); + var normed = matrix.normalize(graph, "l1"); + var csrMatrix = matrix.getCSR(normed); + var nPoints = toTransform.length; + var eIndices = utils.reshape2d(csrMatrix.indices, nPoints, this.nNeighbors); + var eWeights = utils.reshape2d(csrMatrix.values, nPoints, this.nNeighbors); + var embedding = initTransform(eIndices, eWeights, this.embedding); + var nEpochs = this.nEpochs + ? this.nEpochs / 3 + : graph.nRows <= 10000 + ? 100 + : 30; + var graphMax = graph + .getValues() + .reduce(function (max, val) { return (val > max ? val : max); }, 0); + graph = graph.map(function (value) { return (value < graphMax / nEpochs ? 0 : value); }); + graph = matrix.eliminateZeros(graph); + var epochsPerSample = this.makeEpochsPerSample(graph.getValues(), nEpochs); + var head = graph.getRows(); + var tail = graph.getCols(); + this.assignOptimizationStateParameters({ + headEmbedding: embedding, + tailEmbedding: this.embedding, + head: head, + tail: tail, + currentEpoch: 0, + nEpochs: nEpochs, + nVertices: graph.getDims()[1], + epochsPerSample: epochsPerSample, + }); + this.prepareForOptimizationLoop(); + return this.optimizeLayout(); + }; + UMAP.prototype.processGraphForSupervisedProjection = function () { + var _a = this, Y = _a.Y, X = _a.X; + if (Y) { + if (Y.length !== X.length) { + throw new Error('Length of X and y must be equal'); + } + if (this.targetMetric === "categorical") { + var lt = this.targetWeight < 1.0; + var farDist = lt ? 2.5 * (1.0 / (1.0 - this.targetWeight)) : 1.0e12; + this.graph = this.categoricalSimplicialSetIntersection(this.graph, Y, farDist); + } } - return xs.map(function (x) { return x / sum; }); - }, - _a["l2"] = function (xs) { - var sum = 0; - for (var i = 0; i < xs.length; i++) { - sum += Math.pow(xs[i], 2); + }; + UMAP.prototype.step = function () { + var currentEpoch = this.optimizationState.currentEpoch; + if (currentEpoch < this.getNEpochs()) { + this.optimizeLayoutStep(currentEpoch); } - return xs.map(function (x) { return Math.sqrt(Math.pow(x, 2) / sum); }); - }, - _a); -function elementWise(a, b, op) { - var visited = new Set(); - var rows = []; - var cols = []; - var vals = []; - var operate = function (row, col) { - rows.push(row); - cols.push(col); - var nextValue = op(a.get(row, col), b.get(row, col)); - vals.push(nextValue); + return this.optimizationState.currentEpoch; }; - var valuesA = a.getValues(); - var rowsA = a.getRows(); - var colsA = a.getCols(); - for (var i = 0; i < valuesA.length; i++) { - var row = rowsA[i]; - var col = colsA[i]; - var key = row + ":" + col; - visited.add(key); - operate(row, col); - } - var valuesB = b.getValues(); - var rowsB = b.getRows(); - var colsB = b.getCols(); - for (var i = 0; i < valuesB.length; i++) { - var row = rowsB[i]; - var col = colsB[i]; - var key = row + ":" + col; - if (visited.has(key)) - continue; - operate(row, col); - } - var dims = [a.nRows, a.nCols]; - return new SparseMatrix(rows, cols, vals, dims); -} - - -/***/ }), -/* 5 */ -/***/ (function(module, exports, __webpack_require__) { - -"use strict"; - -Object.defineProperty(exports, "__esModule", { value: true }); -var heap = __webpack_require__(6); -var utils = __webpack_require__(1); -function makeNNDescent(distanceFn, random) { - return function nNDescent(data, leafArray, nNeighbors, nIters, maxCandidates, delta, rho, rpTreeInit) { - if (nIters === void 0) { nIters = 10; } - if (maxCandidates === void 0) { maxCandidates = 50; } - if (delta === void 0) { delta = 0.001; } - if (rho === void 0) { rho = 0.5; } - if (rpTreeInit === void 0) { rpTreeInit = true; } - var nVertices = data.length; - var currentGraph = heap.makeHeap(data.length, nNeighbors); - for (var i = 0; i < data.length; i++) { - var indices = heap.rejectionSample(nNeighbors, data.length, random); - for (var j = 0; j < indices.length; j++) { - var d = distanceFn(data[i], data[indices[j]]); - heap.heapPush(currentGraph, i, d, indices[j], 1); - heap.heapPush(currentGraph, indices[j], d, i, 1); + UMAP.prototype.getEmbedding = function () { + return this.embedding; + }; + UMAP.prototype.nearestNeighbors = function (X) { + var _a = this, distanceFn = _a.distanceFn, nNeighbors = _a.nNeighbors; + var log2 = function (n) { return Math.log(n) / Math.log(2); }; + var metricNNDescent = nnDescent.makeNNDescent(distanceFn, this.random); + var round = function (n) { + return n === 0.5 ? 0 : Math.round(n); + }; + var nTrees = 5 + Math.floor(round(Math.pow(X.length, 0.5) / 20.0)); + var nIters = Math.max(5, Math.floor(Math.round(log2(X.length)))); + this.rpForest = tree.makeForest(X, nNeighbors, nTrees, this.random); + var leafArray = tree.makeLeafArray(this.rpForest); + var _b = metricNNDescent(X, leafArray, nNeighbors, nIters), indices = _b.indices, weights = _b.weights; + return { knnIndices: indices, knnDistances: weights }; + }; + UMAP.prototype.fuzzySimplicialSet = function (X, nNeighbors, setOpMixRatio) { + if (setOpMixRatio === void 0) { setOpMixRatio = 1.0; } + var _a = this, _b = _a.knnIndices, knnIndices = _b === void 0 ? [] : _b, _c = _a.knnDistances, knnDistances = _c === void 0 ? [] : _c, localConnectivity = _a.localConnectivity; + var _d = this.smoothKNNDistance(knnDistances, nNeighbors, localConnectivity), sigmas = _d.sigmas, rhos = _d.rhos; + var _e = this.computeMembershipStrengths(knnIndices, knnDistances, sigmas, rhos), rows = _e.rows, cols = _e.cols, vals = _e.vals; + var size = [X.length, X.length]; + var sparseMatrix = new matrix.SparseMatrix(rows, cols, vals, size); + var transpose = matrix.transpose(sparseMatrix); + var prodMatrix = matrix.pairwiseMultiply(sparseMatrix, transpose); + var a = matrix.subtract(matrix.add(sparseMatrix, transpose), prodMatrix); + var b = matrix.multiplyScalar(a, setOpMixRatio); + var c = matrix.multiplyScalar(prodMatrix, 1.0 - setOpMixRatio); + var result = matrix.add(b, c); + return result; + }; + UMAP.prototype.categoricalSimplicialSetIntersection = function (simplicialSet, target, farDist, unknownDist) { + if (unknownDist === void 0) { unknownDist = 1.0; } + var intersection = fastIntersection(simplicialSet, target, unknownDist, farDist); + intersection = matrix.eliminateZeros(intersection); + return resetLocalConnectivity(intersection); + }; + UMAP.prototype.smoothKNNDistance = function (distances, k, localConnectivity, nIter, bandwidth) { + if (localConnectivity === void 0) { localConnectivity = 1.0; } + if (nIter === void 0) { nIter = 64; } + if (bandwidth === void 0) { bandwidth = 1.0; } + var target = (Math.log(k) / Math.log(2)) * bandwidth; + var rho = utils.zeros(distances.length); + var result = utils.zeros(distances.length); + for (var i = 0; i < distances.length; i++) { + var lo = 0.0; + var hi = Infinity; + var mid = 1.0; + var ithDistances = distances[i]; + var nonZeroDists = ithDistances.filter(function (d) { return d > 0.0; }); + if (nonZeroDists.length >= localConnectivity) { + var index = Math.floor(localConnectivity); + var interpolation = localConnectivity - index; + if (index > 0) { + rho[i] = nonZeroDists[index - 1]; + if (interpolation > SMOOTH_K_TOLERANCE) { + rho[i] += + interpolation * (nonZeroDists[index] - nonZeroDists[index - 1]); + } + } + else { + rho[i] = interpolation * nonZeroDists[0]; + } } - } - if (rpTreeInit) { - for (var n = 0; n < leafArray.length; n++) { - for (var i = 0; i < leafArray[n].length; i++) { - if (leafArray[n][i] < 0) { - break; + else if (nonZeroDists.length > 0) { + rho[i] = utils.max(nonZeroDists); + } + for (var n = 0; n < nIter; n++) { + var psum = 0.0; + for (var j = 1; j < distances[i].length; j++) { + var d = distances[i][j] - rho[i]; + if (d > 0) { + psum += Math.exp(-(d / mid)); } - for (var j = i + 1; j < leafArray[n].length; j++) { - if (leafArray[n][j] < 0) { - break; - } - var d = distanceFn(data[leafArray[n][i]], data[leafArray[n][j]]); - heap.heapPush(currentGraph, leafArray[n][i], d, leafArray[n][j], 1); - heap.heapPush(currentGraph, leafArray[n][j], d, leafArray[n][i], 1); + else { + psum += 1.0; + } + } + if (Math.abs(psum - target) < SMOOTH_K_TOLERANCE) { + break; + } + if (psum > target) { + hi = mid; + mid = (lo + hi) / 2.0; + } + else { + lo = mid; + if (hi === Infinity) { + mid *= 2; } + else { + mid = (lo + hi) / 2.0; + } + } + } + result[i] = mid; + if (rho[i] > 0.0) { + var meanIthDistances = utils.mean(ithDistances); + if (result[i] < MIN_K_DIST_SCALE * meanIthDistances) { + result[i] = MIN_K_DIST_SCALE * meanIthDistances; + } + } + else { + var meanDistances = utils.mean(distances.map(utils.mean)); + if (result[i] < MIN_K_DIST_SCALE * meanDistances) { + result[i] = MIN_K_DIST_SCALE * meanDistances; } } } - for (var n = 0; n < nIters; n++) { - var candidateNeighbors = heap.buildCandidates(currentGraph, nVertices, nNeighbors, maxCandidates, random); - var c = 0; - for (var i = 0; i < nVertices; i++) { - for (var j = 0; j < maxCandidates; j++) { - var p = Math.floor(candidateNeighbors[0][i][j]); - if (p < 0 || utils.tauRand(random) < rho) { - continue; - } - for (var k = 0; k < maxCandidates; k++) { - var q = Math.floor(candidateNeighbors[0][i][k]); - var cj = candidateNeighbors[2][i][j]; - var ck = candidateNeighbors[2][i][k]; - if (q < 0 || (!cj && !ck)) { - continue; - } - var d = distanceFn(data[p], data[q]); - c += heap.heapPush(currentGraph, p, d, q, 1); - c += heap.heapPush(currentGraph, q, d, p, 1); - } + return { sigmas: result, rhos: rho }; + }; + UMAP.prototype.computeMembershipStrengths = function (knnIndices, knnDistances, sigmas, rhos) { + var nSamples = knnIndices.length; + var nNeighbors = knnIndices[0].length; + var rows = utils.zeros(nSamples * nNeighbors); + var cols = utils.zeros(nSamples * nNeighbors); + var vals = utils.zeros(nSamples * nNeighbors); + for (var i = 0; i < nSamples; i++) { + for (var j = 0; j < nNeighbors; j++) { + var val = 0; + if (knnIndices[i][j] === -1) { + continue; + } + if (knnIndices[i][j] === i) { + val = 0.0; } + else if (knnDistances[i][j] - rhos[i] <= 0.0) { + val = 1.0; + } + else { + val = Math.exp(-((knnDistances[i][j] - rhos[i]) / sigmas[i])); + } + rows[i * nNeighbors + j] = i; + cols[i * nNeighbors + j] = knnIndices[i][j]; + vals[i * nNeighbors + j] = val; } - if (c <= delta * nNeighbors * data.length) { - break; + } + return { rows: rows, cols: cols, vals: vals }; + }; + UMAP.prototype.initializeSimplicialSetEmbedding = function () { + var _this = this; + var nEpochs = this.getNEpochs(); + var nComponents = this.nComponents; + var graphValues = this.graph.getValues(); + var graphMax = 0; + for (var i = 0; i < graphValues.length; i++) { + var value = graphValues[i]; + if (graphMax < graphValues[i]) { + graphMax = value; } } - var sorted = heap.deheapSort(currentGraph); - return sorted; + var graph = this.graph.map(function (value) { + if (value < graphMax / nEpochs) { + return 0; + } + else { + return value; + } + }); + this.embedding = utils.zeros(graph.nRows).map(function () { + return utils.zeros(nComponents).map(function () { + return utils.tauRand(_this.random) * 20 + -10; + }); + }); + var weights = []; + var head = []; + var tail = []; + for (var i = 0; i < graph.nRows; i++) { + for (var j = 0; j < graph.nCols; j++) { + var value = graph.get(i, j); + if (value) { + weights.push(value); + tail.push(i); + head.push(j); + } + } + } + var epochsPerSample = this.makeEpochsPerSample(weights, nEpochs); + return { head: head, tail: tail, epochsPerSample: epochsPerSample }; }; -} -exports.makeNNDescent = makeNNDescent; - - -/***/ }), -/* 6 */ -/***/ (function(module, exports, __webpack_require__) { - -"use strict"; - -Object.defineProperty(exports, "__esModule", { value: true }); -var utils = __webpack_require__(1); -function makeHeap(nPoints, size) { - var makeArrays = function (fillValue) { - return utils.empty(nPoints).map(function () { - return utils.filled(size, fillValue); + UMAP.prototype.makeEpochsPerSample = function (weights, nEpochs) { + var result = utils.filled(weights.length, -1.0); + var max = utils.max(weights); + var nSamples = weights.map(function (w) { return (w / max) * nEpochs; }); + nSamples.forEach(function (n, i) { + if (n > 0) + result[i] = nEpochs / nSamples[i]; }); + return result; }; - var heap = []; - heap.push(makeArrays(-1)); - heap.push(makeArrays(Infinity)); - heap.push(makeArrays(0)); - return heap; -} -exports.makeHeap = makeHeap; -function rejectionSample(nSamples, poolSize, random) { - var result = utils.zeros(nSamples); - for (var i = 0; i < nSamples; i++) { - var rejectSample = true; - var j = 0; - while (rejectSample) { - j = utils.tauRandInt(poolSize, random); - var broken = false; - for (var k = 0; k < i; k++) { - if (j === result[k]) { - broken = true; - break; + UMAP.prototype.assignOptimizationStateParameters = function (state) { + Object.assign(this.optimizationState, state); + }; + UMAP.prototype.prepareForOptimizationLoop = function () { + var _a = this, repulsionStrength = _a.repulsionStrength, learningRate = _a.learningRate, negativeSampleRate = _a.negativeSampleRate; + var _b = this.optimizationState, epochsPerSample = _b.epochsPerSample, headEmbedding = _b.headEmbedding, tailEmbedding = _b.tailEmbedding; + var dim = headEmbedding[0].length; + var moveOther = headEmbedding.length === tailEmbedding.length; + var epochsPerNegativeSample = epochsPerSample.map(function (e) { return e / negativeSampleRate; }); + var epochOfNextNegativeSample = __spread(epochsPerNegativeSample); + var epochOfNextSample = __spread(epochsPerSample); + this.assignOptimizationStateParameters({ + epochOfNextSample: epochOfNextSample, + epochOfNextNegativeSample: epochOfNextNegativeSample, + epochsPerNegativeSample: epochsPerNegativeSample, + moveOther: moveOther, + initialAlpha: learningRate, + alpha: learningRate, + gamma: repulsionStrength, + dim: dim, + }); + }; + UMAP.prototype.initializeOptimization = function () { + var headEmbedding = this.embedding; + var tailEmbedding = this.embedding; + var _a = this.optimizationState, head = _a.head, tail = _a.tail, epochsPerSample = _a.epochsPerSample; + var nEpochs = this.getNEpochs(); + var nVertices = this.graph.nCols; + var _b = findABParams(this.spread, this.minDist), a = _b.a, b = _b.b; + this.assignOptimizationStateParameters({ + headEmbedding: headEmbedding, + tailEmbedding: tailEmbedding, + head: head, + tail: tail, + epochsPerSample: epochsPerSample, + a: a, + b: b, + nEpochs: nEpochs, + nVertices: nVertices, + }); + }; + UMAP.prototype.optimizeLayoutStep = function (n) { + var optimizationState = this.optimizationState; + var head = optimizationState.head, tail = optimizationState.tail, headEmbedding = optimizationState.headEmbedding, tailEmbedding = optimizationState.tailEmbedding, epochsPerSample = optimizationState.epochsPerSample, epochOfNextSample = optimizationState.epochOfNextSample, epochOfNextNegativeSample = optimizationState.epochOfNextNegativeSample, epochsPerNegativeSample = optimizationState.epochsPerNegativeSample, moveOther = optimizationState.moveOther, initialAlpha = optimizationState.initialAlpha, alpha = optimizationState.alpha, gamma = optimizationState.gamma, a = optimizationState.a, b = optimizationState.b, dim = optimizationState.dim, nEpochs = optimizationState.nEpochs, nVertices = optimizationState.nVertices; + var clipValue = 4.0; + for (var i = 0; i < epochsPerSample.length; i++) { + if (epochOfNextSample[i] > n) { + continue; + } + var j = head[i]; + var k = tail[i]; + var current = headEmbedding[j]; + var other = tailEmbedding[k]; + var distSquared = rDist(current, other); + var gradCoeff = 0; + if (distSquared > 0) { + gradCoeff = -2.0 * a * b * Math.pow(distSquared, b - 1.0); + gradCoeff /= a * Math.pow(distSquared, b) + 1.0; + } + for (var d = 0; d < dim; d++) { + var gradD = clip(gradCoeff * (current[d] - other[d]), clipValue); + current[d] += gradD * alpha; + if (moveOther) { + other[d] += -gradD * alpha; } } - if (!broken) - rejectSample = false; + epochOfNextSample[i] += epochsPerSample[i]; + var nNegSamples = Math.floor((n - epochOfNextNegativeSample[i]) / epochsPerNegativeSample[i]); + for (var p = 0; p < nNegSamples; p++) { + var k_1 = utils.tauRandInt(nVertices, this.random); + var other_1 = tailEmbedding[k_1]; + var distSquared_1 = rDist(current, other_1); + var gradCoeff_1 = 0.0; + if (distSquared_1 > 0.0) { + gradCoeff_1 = 2.0 * gamma * b; + gradCoeff_1 /= + (0.001 + distSquared_1) * (a * Math.pow(distSquared_1, b) + 1); + } + else if (j === k_1) { + continue; + } + for (var d = 0; d < dim; d++) { + var gradD = 4.0; + if (gradCoeff_1 > 0.0) { + gradD = clip(gradCoeff_1 * (current[d] - other_1[d]), clipValue); + } + current[d] += gradD * alpha; + } + } + epochOfNextNegativeSample[i] += nNegSamples * epochsPerNegativeSample[i]; } - result[i] = j; - } - return result; -} -exports.rejectionSample = rejectionSample; -function heapPush(heap, row, weight, index, flag) { - row = Math.floor(row); - var indices = heap[0][row]; - var weights = heap[1][row]; - var isNew = heap[2][row]; - if (weight >= weights[0]) { - return 0; - } - for (var i_1 = 0; i_1 < indices.length; i_1++) { - if (index === indices[i_1]) { - return 0; + optimizationState.alpha = initialAlpha * (1.0 - n / nEpochs); + optimizationState.currentEpoch += 1; + return headEmbedding; + }; + UMAP.prototype.optimizeLayoutAsync = function (epochCallback) { + var _this = this; + if (epochCallback === void 0) { epochCallback = function () { return true; }; } + return new Promise(function (resolve, reject) { + var step = function () { return __awaiter(_this, void 0, void 0, function () { + var _a, nEpochs, currentEpoch, epochCompleted, shouldStop, isFinished; + return __generator(this, function (_b) { + try { + _a = this.optimizationState, nEpochs = _a.nEpochs, currentEpoch = _a.currentEpoch; + this.embedding = this.optimizeLayoutStep(currentEpoch); + epochCompleted = this.optimizationState.currentEpoch; + shouldStop = epochCallback(epochCompleted) === false; + isFinished = epochCompleted === nEpochs; + if (!shouldStop && !isFinished) { + step(); + } + else { + return [2, resolve(isFinished)]; + } + } + catch (err) { + reject(err); + } + return [2]; + }); + }); }; + step(); + }); + }; + UMAP.prototype.optimizeLayout = function (epochCallback) { + if (epochCallback === void 0) { epochCallback = function () { return true; }; } + var isFinished = false; + var embedding = []; + while (!isFinished) { + var _a = this.optimizationState, nEpochs = _a.nEpochs, currentEpoch = _a.currentEpoch; + embedding = this.optimizeLayoutStep(currentEpoch); + var epochCompleted = this.optimizationState.currentEpoch; + var shouldStop = epochCallback(epochCompleted) === false; + isFinished = epochCompleted === nEpochs || shouldStop; + } + return embedding; + }; + UMAP.prototype.getNEpochs = function () { + var graph = this.graph; + if (this.nEpochs > 0) { + return this.nEpochs; } - } - weights[0] = weight; - indices[0] = index; - isNew[0] = flag; - var i = 0; - var iSwap = 0; - while (true) { - var ic1 = 2 * i + 1; - var ic2 = ic1 + 1; - var heapShape2 = heap[0][0].length; - if (ic1 >= heapShape2) { - break; + var length = graph.nRows; + if (length <= 2500) { + return 500; } - else if (ic2 >= heapShape2) { - if (weights[ic1] > weight) { - iSwap = ic1; - } - else { - break; - } + else if (length <= 5000) { + return 400; } - else if (weights[ic1] >= weights[ic2]) { - if (weight < weights[ic1]) { - iSwap = ic1; - } - else { - break; - } + else if (length <= 7500) { + return 300; } else { - if (weight < weights[ic2]) { - iSwap = ic2; - } - else { - break; - } + return 200; } - weights[i] = weights[iSwap]; - indices[i] = indices[iSwap]; - isNew[i] = isNew[iSwap]; - i = iSwap; + }; + return UMAP; +}()); +exports.UMAP = UMAP; +function euclidean(x, y) { + var result = 0; + for (var i = 0; i < x.length; i++) { + result += Math.pow((x[i] - y[i]), 2); } - weights[i] = weight; - indices[i] = index; - isNew[i] = flag; - return 1; + return Math.sqrt(result); } -exports.heapPush = heapPush; -function buildCandidates(currentGraph, nVertices, nNeighbors, maxCandidates, random) { - var candidateNeighbors = makeHeap(nVertices, maxCandidates); - for (var i = 0; i < nVertices; i++) { - for (var j = 0; j < nNeighbors; j++) { - if (currentGraph[0][i][j] < 0) { - continue; - } - var idx = currentGraph[0][i][j]; - var isn = currentGraph[2][i][j]; - var d = utils.tauRand(random); - heapPush(candidateNeighbors, i, d, idx, isn); - heapPush(candidateNeighbors, idx, d, i, isn); - currentGraph[2][i][j] = 0; - } +exports.euclidean = euclidean; +function cosine(x, y) { + var result = 0.0; + var normX = 0.0; + var normY = 0.0; + for (var i = 0; i < x.length; i++) { + result += x[i] * y[i]; + normX += Math.pow(x[i], 2); + normY += Math.pow(y[i], 2); + } + if (normX === 0 && normY === 0) { + return 0; + } + else if (normX === 0 || normY === 0) { + return 1.0; + } + else { + return 1.0 - result / Math.sqrt(normX * normY); } - return candidateNeighbors; } -exports.buildCandidates = buildCandidates; -function deheapSort(heap) { - var indices = heap[0]; - var weights = heap[1]; - for (var i = 0; i < indices.length; i++) { - var indHeap = indices[i]; - var distHeap = weights[i]; - for (var j = 0; j < indHeap.length - 1; j++) { - var indHeapIndex = indHeap.length - j - 1; - var distHeapIndex = distHeap.length - j - 1; - var temp1 = indHeap[0]; - indHeap[0] = indHeap[indHeapIndex]; - indHeap[indHeapIndex] = temp1; - var temp2 = distHeap[0]; - distHeap[0] = distHeap[distHeapIndex]; - distHeap[distHeapIndex] = temp2; - siftDown(distHeap, indHeap, distHeapIndex, 0); - } +exports.cosine = cosine; +var OptimizationState = (function () { + function OptimizationState() { + this.currentEpoch = 0; + this.headEmbedding = []; + this.tailEmbedding = []; + this.head = []; + this.tail = []; + this.epochsPerSample = []; + this.epochOfNextSample = []; + this.epochOfNextNegativeSample = []; + this.epochsPerNegativeSample = []; + this.moveOther = true; + this.initialAlpha = 1.0; + this.alpha = 1.0; + this.gamma = 1.0; + this.a = 1.5769434603113077; + this.b = 0.8950608779109733; + this.dim = 2; + this.nEpochs = 500; + this.nVertices = 0; } - return { indices: indices, weights: weights }; + return OptimizationState; +}()); +function clip(x, clipValue) { + if (x > clipValue) + return clipValue; + else if (x < -clipValue) + return -clipValue; + else + return x; } -exports.deheapSort = deheapSort; -function siftDown(heap1, heap2, ceiling, elt) { - while (elt * 2 + 1 < ceiling) { - var leftChild = elt * 2 + 1; - var rightChild = leftChild + 1; - var swap = elt; - if (heap1[swap] < heap1[leftChild]) { - swap = leftChild; - } - if (rightChild < ceiling && heap1[swap] < heap1[rightChild]) { - swap = rightChild; +function rDist(x, y) { + var result = 0.0; + for (var i = 0; i < x.length; i++) { + result += Math.pow(x[i] - y[i], 2); + } + return result; +} +function findABParams(spread, minDist) { + var curve = function (_a) { + var _b = __read(_a, 2), a = _b[0], b = _b[1]; + return function (x) { + return 1.0 / (1.0 + a * Math.pow(x, (2 * b))); + }; + }; + var xv = utils + .linear(0, spread * 3, 300) + .map(function (val) { return (val < minDist ? 1.0 : val); }); + var yv = utils.zeros(xv.length).map(function (val, index) { + var gte = xv[index] >= minDist; + return gte ? Math.exp(-(xv[index] - minDist) / spread) : val; + }); + var initialValues = [0.5, 0.5]; + var data = { x: xv, y: yv }; + var options = { + damping: 1.5, + initialValues: initialValues, + gradientDifference: 10e-2, + maxIterations: 100, + errorTolerance: 10e-3, + }; + var parameterValues = LM(data, curve, options).parameterValues; + var _a = __read(parameterValues, 2), a = _a[0], b = _a[1]; + return { a: a, b: b }; +} +exports.findABParams = findABParams; +function fastIntersection(graph, target, unknownDist, farDist) { + if (unknownDist === void 0) { unknownDist = 1.0; } + if (farDist === void 0) { farDist = 5.0; } + return graph.map(function (value, row, col) { + if (target[row] === -1 || target[col] === -1) { + return value * Math.exp(-unknownDist); } - if (swap === elt) { - break; + else if (target[row] !== target[col]) { + return value * Math.exp(-farDist); } else { - var temp1 = heap1[elt]; - heap1[elt] = heap1[swap]; - heap1[swap] = temp1; - var temp2 = heap2[elt]; - heap2[elt] = heap2[swap]; - heap2[swap] = temp2; - elt = swap; + return value; } - } + }); } - - -/***/ }), -/* 7 */ -/***/ (function(module, exports, __webpack_require__) { - -"use strict"; - -var __read = (this && this.__read) || function (o, n) { - var m = typeof Symbol === "function" && o[Symbol.iterator]; - if (!m) return o; - var i = m.call(o), r, ar = [], e; - try { - while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value); - } - catch (error) { e = { error: error }; } - finally { - try { - if (r && !r.done && (m = i["return"])) m.call(i); +exports.fastIntersection = fastIntersection; +function resetLocalConnectivity(simplicialSet) { + simplicialSet = matrix.normalize(simplicialSet, "max"); + var transpose = matrix.transpose(simplicialSet); + var prodMatrix = matrix.pairwiseMultiply(transpose, simplicialSet); + simplicialSet = matrix.add(simplicialSet, matrix.subtract(transpose, prodMatrix)); + return matrix.eliminateZeros(simplicialSet); +} +exports.resetLocalConnectivity = resetLocalConnectivity; +function initTransform(indices, weights, embedding) { + var result = utils + .zeros(indices.length) + .map(function (z) { return utils.zeros(embedding[0].length); }); + for (var i = 0; i < indices.length; i++) { + for (var j = 0; j < indices[0].length; j++) { + for (var d = 0; d < embedding[0].length; d++) { + var a = indices[i][j]; + result[i][d] += weights[i][j] * embedding[a][d]; + } } - finally { if (e) throw e.error; } } - return ar; -}; -var __spread = (this && this.__spread) || function () { - for (var ar = [], i = 0; i < arguments.length; i++) ar = ar.concat(__read(arguments[i])); - return ar; -}; + return result; +} +exports.initTransform = initTransform; + + +/***/ }), +/* 7 */ +/***/ (function(module, exports, __webpack_require__) { + +"use strict"; + var __values = (this && this.__values) || function (o) { var m = typeof Symbol === "function" && o[Symbol.iterator], i = 0; if (m) return m.call(o); @@ -1421,184 +1799,164 @@ var __values = (this && this.__values) || function (o) { }; }; Object.defineProperty(exports, "__esModule", { value: true }); +var heap = __webpack_require__(2); +var matrix = __webpack_require__(3); +var tree = __webpack_require__(4); var utils = __webpack_require__(1); -var FlatTree = (function () { - function FlatTree(hyperplanes, offsets, children, indices) { - this.hyperplanes = hyperplanes; - this.offsets = offsets; - this.children = children; - this.indices = indices; - } - return FlatTree; -}()); -exports.FlatTree = FlatTree; -function makeForest(data, nNeighbors, nTrees, random) { - var leafSize = Math.max(10, nNeighbors); - var trees = utils - .range(nTrees) - .map(function (_, i) { return makeTree(data, leafSize, i, random); }); - var forest = trees.map(function (tree) { return flattenTree(tree, leafSize); }); - return forest; -} -exports.makeForest = makeForest; -function makeTree(data, leafSize, n, random) { - if (leafSize === void 0) { leafSize = 30; } - var indices = utils.range(data.length); - var tree = makeEuclideanTree(data, indices, leafSize, n, random); - return tree; -} -function makeEuclideanTree(data, indices, leafSize, q, random) { - if (leafSize === void 0) { leafSize = 30; } - if (indices.length > leafSize) { - var splitResults = euclideanRandomProjectionSplit(data, indices, random); - var indicesLeft = splitResults.indicesLeft, indicesRight = splitResults.indicesRight, hyperplane = splitResults.hyperplane, offset = splitResults.offset; - var leftChild = makeEuclideanTree(data, indicesLeft, leafSize, q + 1, random); - var rightChild = makeEuclideanTree(data, indicesRight, leafSize, q + 1, random); - var node = { leftChild: leftChild, rightChild: rightChild, isLeaf: false, hyperplane: hyperplane, offset: offset }; - return node; - } - else { - var node = { indices: indices, isLeaf: true }; - return node; - } -} -function euclideanRandomProjectionSplit(data, indices, random) { - var dim = data[0].length; - var leftIndex = utils.tauRandInt(indices.length, random); - var rightIndex = utils.tauRandInt(indices.length, random); - rightIndex += leftIndex === rightIndex ? 1 : 0; - rightIndex = rightIndex % indices.length; - var left = indices[leftIndex]; - var right = indices[rightIndex]; - var hyperplaneOffset = 0; - var hyperplaneVector = utils.zeros(dim); - for (var i = 0; i < hyperplaneVector.length; i++) { - hyperplaneVector[i] = data[left][i] - data[right][i]; - hyperplaneOffset -= - (hyperplaneVector[i] * (data[left][i] + data[right][i])) / 2.0; - } - var nLeft = 0; - var nRight = 0; - var side = utils.zeros(indices.length); - for (var i = 0; i < indices.length; i++) { - var margin = hyperplaneOffset; - for (var d = 0; d < dim; d++) { - margin += hyperplaneVector[d] * data[indices[i]][d]; - } - if (margin === 0) { - side[i] = utils.tauRandInt(2, random); - if (side[i] === 0) { - nLeft += 1; - } - else { - nRight += 1; +function makeNNDescent(distanceFn, random) { + return function nNDescent(data, leafArray, nNeighbors, nIters, maxCandidates, delta, rho, rpTreeInit) { + if (nIters === void 0) { nIters = 10; } + if (maxCandidates === void 0) { maxCandidates = 50; } + if (delta === void 0) { delta = 0.001; } + if (rho === void 0) { rho = 0.5; } + if (rpTreeInit === void 0) { rpTreeInit = true; } + var nVertices = data.length; + var currentGraph = heap.makeHeap(data.length, nNeighbors); + for (var i = 0; i < data.length; i++) { + var indices = heap.rejectionSample(nNeighbors, data.length, random); + for (var j = 0; j < indices.length; j++) { + var d = distanceFn(data[i], data[indices[j]]); + heap.heapPush(currentGraph, i, d, indices[j], 1); + heap.heapPush(currentGraph, indices[j], d, i, 1); } } - else if (margin > 0) { - side[i] = 0; - nLeft += 1; + if (rpTreeInit) { + for (var n = 0; n < leafArray.length; n++) { + for (var i = 0; i < leafArray[n].length; i++) { + if (leafArray[n][i] < 0) { + break; + } + for (var j = i + 1; j < leafArray[n].length; j++) { + if (leafArray[n][j] < 0) { + break; + } + var d = distanceFn(data[leafArray[n][i]], data[leafArray[n][j]]); + heap.heapPush(currentGraph, leafArray[n][i], d, leafArray[n][j], 1); + heap.heapPush(currentGraph, leafArray[n][j], d, leafArray[n][i], 1); + } + } + } } - else { - side[i] = 1; - nRight += 1; + for (var n = 0; n < nIters; n++) { + var candidateNeighbors = heap.buildCandidates(currentGraph, nVertices, nNeighbors, maxCandidates, random); + var c = 0; + for (var i = 0; i < nVertices; i++) { + for (var j = 0; j < maxCandidates; j++) { + var p = Math.floor(candidateNeighbors[0][i][j]); + if (p < 0 || utils.tauRand(random) < rho) { + continue; + } + for (var k = 0; k < maxCandidates; k++) { + var q = Math.floor(candidateNeighbors[0][i][k]); + var cj = candidateNeighbors[2][i][j]; + var ck = candidateNeighbors[2][i][k]; + if (q < 0 || (!cj && !ck)) { + continue; + } + var d = distanceFn(data[p], data[q]); + c += heap.heapPush(currentGraph, p, d, q, 1); + c += heap.heapPush(currentGraph, q, d, p, 1); + } + } + } + if (c <= delta * nNeighbors * data.length) { + break; + } } - } - var indicesLeft = utils.zeros(nLeft); - var indicesRight = utils.zeros(nRight); - nLeft = 0; - nRight = 0; - for (var i in utils.range(side.length)) { - if (side[i] === 0) { - indicesLeft[nLeft] = indices[i]; - nLeft += 1; + var sorted = heap.deheapSort(currentGraph); + return sorted; + }; +} +exports.makeNNDescent = makeNNDescent; +function makeInitializations(distanceFn) { + var initFromRandom = function (nNeighbors, data, queryPoints, _heap) { + for (var i = 0; i < queryPoints.length; i++) { + var indices = utils.rejectionSample(nNeighbors, data.length); + for (var j = 0; j < indices.length; j++) { + if (indices[j] < 0) { + continue; + } + var d = distanceFn(data[indices[j]], queryPoints[i]); + heap.heapPush(_heap, i, d, indices[j], 1); + } } - else { - indicesRight[nRight] = indices[i]; - nRight += 1; + }; + var initFromTree = function (_tree, data, queryPoints, _heap) { + for (var i = 0; i < queryPoints.length; i++) { + var indices = tree.searchFlatTree(queryPoints[i], _tree); + for (var j = 0; j < indices.length; j++) { + if (indices[j] < 0) { + return; + } + var d = distanceFn(data[indices[j]], queryPoints[i]); + heap.heapPush(_heap, i, d, indices[j], 1); + } } - } - return { - indicesLeft: indicesLeft, - indicesRight: indicesRight, - hyperplane: hyperplaneVector, - offset: hyperplaneOffset, + return; }; + return { initFromRandom: initFromRandom, initFromTree: initFromTree }; } -function flattenTree(tree, leafSize) { - var nNodes = numNodes(tree); - var nLeaves = numLeaves(tree); - var hyperplanes = utils - .range(nNodes) - .map(function () { return utils.zeros(tree.hyperplane.length); }); - var offsets = utils.zeros(nNodes); - var children = utils.range(nNodes).map(function () { return [-1, -1]; }); - var indices = utils - .range(nLeaves) - .map(function () { return utils.range(leafSize).map(function () { return -1; }); }); - recursiveFlatten(tree, hyperplanes, offsets, children, indices, 0, 0); - return new FlatTree(hyperplanes, offsets, children, indices); -} -function recursiveFlatten(tree, hyperplanes, offsets, children, indices, nodeNum, leafNum) { - var _a; - if (tree.isLeaf) { - children[nodeNum][0] = -leafNum; - (_a = indices[leafNum]).splice.apply(_a, __spread([0, tree.indices.length], tree.indices)); - leafNum += 1; - return { nodeNum: nodeNum, leafNum: leafNum }; - } - else { - hyperplanes[nodeNum] = tree.hyperplane; - offsets[nodeNum] = tree.offset; - children[nodeNum][0] = nodeNum + 1; - var oldNodeNum = nodeNum; - var res = recursiveFlatten(tree.leftChild, hyperplanes, offsets, children, indices, nodeNum + 1, leafNum); - nodeNum = res.nodeNum; - leafNum = res.leafNum; - children[oldNodeNum][1] = nodeNum + 1; - res = recursiveFlatten(tree.rightChild, hyperplanes, offsets, children, indices, nodeNum + 1, leafNum); - return { nodeNum: res.nodeNum, leafNum: res.leafNum }; - } -} -function numNodes(tree) { - if (tree.isLeaf) { - return 1; - } - else { - return 1 + numNodes(tree.leftChild) + numNodes(tree.rightChild); - } -} -function numLeaves(tree) { - if (tree.isLeaf) { - return 1; - } - else { - return numLeaves(tree.leftChild) + numLeaves(tree.rightChild); - } +exports.makeInitializations = makeInitializations; +function makeInitializedNNSearch(distanceFn) { + return function (data, graph, initialization, queryPoints) { + var e_1, _a; + var _b = matrix.getCSR(graph), indices = _b.indices, indptr = _b.indptr; + for (var i = 0; i < queryPoints.length; i++) { + var tried = new Set(initialization[0][i]); + while (true) { + var vertex = heap.smallestFlagged(initialization, i); + if (vertex === -1) { + break; + } + var candidates = indices.slice(indptr[vertex], indptr[vertex + 1]); + try { + for (var candidates_1 = __values(candidates), candidates_1_1 = candidates_1.next(); !candidates_1_1.done; candidates_1_1 = candidates_1.next()) { + var candidate = candidates_1_1.value; + if (candidate === vertex || + candidate === -1 || + tried.has(candidate)) { + continue; + } + var d = distanceFn(data[candidate], queryPoints[i]); + heap.uncheckedHeapPush(initialization, i, d, candidate, 1); + tried.add(candidate); + } + } + catch (e_1_1) { e_1 = { error: e_1_1 }; } + finally { + try { + if (candidates_1_1 && !candidates_1_1.done && (_a = candidates_1.return)) _a.call(candidates_1); + } + finally { if (e_1) throw e_1.error; } + } + } + } + return initialization; + }; } -function makeLeafArray(rpForest) { - var e_1, _a; - if (rpForest.length > 0) { - var output = []; +exports.makeInitializedNNSearch = makeInitializedNNSearch; +function initializeSearch(forest, data, queryPoints, nNeighbors, initFromRandom, initFromTree) { + var e_2, _a; + var results = heap.makeHeap(queryPoints.length, nNeighbors); + initFromRandom(nNeighbors, data, queryPoints, results); + if (forest) { try { - for (var rpForest_1 = __values(rpForest), rpForest_1_1 = rpForest_1.next(); !rpForest_1_1.done; rpForest_1_1 = rpForest_1.next()) { - var tree = rpForest_1_1.value; - output.push.apply(output, __spread(tree.indices)); + for (var forest_1 = __values(forest), forest_1_1 = forest_1.next(); !forest_1_1.done; forest_1_1 = forest_1.next()) { + var tree_1 = forest_1_1.value; + initFromTree(tree_1, data, queryPoints, results); } } - catch (e_1_1) { e_1 = { error: e_1_1 }; } + catch (e_2_1) { e_2 = { error: e_2_1 }; } finally { try { - if (rpForest_1_1 && !rpForest_1_1.done && (_a = rpForest_1.return)) _a.call(rpForest_1); + if (forest_1_1 && !forest_1_1.done && (_a = forest_1.return)) _a.call(forest_1); } - finally { if (e_1) throw e_1.error; } + finally { if (e_2) throw e_2.error; } } - return output; - } - else { - return [[-1]]; } + return results; } -exports.makeLeafArray = makeLeafArray; +exports.initializeSearch = initializeSearch; /***/ }), diff --git a/lib/umap-js.min.js b/lib/umap-js.min.js index 6494325..767059c 100644 --- a/lib/umap-js.min.js +++ b/lib/umap-js.min.js @@ -1 +1 @@ -!function(t){var r={};function e(n){if(r[n])return r[n].exports;var i=r[n]={i:n,l:!1,exports:{}};return t[n].call(i.exports,i,i.exports,e),i.l=!0,i.exports}e.m=t,e.c=r,e.d=function(t,r,n){e.o(t,r)||Object.defineProperty(t,r,{enumerable:!0,get:n})},e.r=function(t){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})},e.t=function(t,r){if(1&r&&(t=e(t)),8&r)return t;if(4&r&&"object"==typeof t&&t&&t.__esModule)return t;var n=Object.create(null);if(e.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:t}),2&r&&"string"!=typeof t)for(var i in t)e.d(n,i,function(r){return t[r]}.bind(null,i));return n},e.n=function(t){var r=t&&t.__esModule?function(){return t.default}:function(){return t};return e.d(r,"a",r),r},e.o=function(t,r){return Object.prototype.hasOwnProperty.call(t,r)},e.p="",e(e.s=2)}([function(t,r,e){"use strict";const n=Object.prototype.toString;t.exports=function(t){return n.call(t).endsWith("Array]")}},function(t,r,e){"use strict";var n=this&&this.__values||function(t){var r="function"==typeof Symbol&&t[Symbol.iterator],e=0;return r?r.call(t):{next:function(){return t&&e>=t.length&&(t=void 0),{value:t&&t[e++],done:!t}}}};function i(t){return void 0===t&&(t=Math.random),t()}function o(t){for(var r=[],e=0;er?t[e]:r;return r},r.max2d=function(t){for(var r=0,e=0;er?t[e][n]:r;return r}},function(t,r,e){"use strict";Object.defineProperty(r,"__esModule",{value:!0});var n=e(3);window.UMAP=n.UMAP},function(t,r,e){"use strict";var n=this&&this.__awaiter||function(t,r,e,n){return new(e||(e=Promise))(function(i,o){function s(t){try{h(n.next(t))}catch(t){o(t)}}function a(t){try{h(n.throw(t))}catch(t){o(t)}}function h(t){t.done?i(t.value):new e(function(r){r(t.value)}).then(s,a)}h((n=n.apply(t,r||[])).next())})},i=this&&this.__generator||function(t,r){var e,n,i,o,s={label:0,sent:function(){if(1&i[0])throw i[1];return i[1]},trys:[],ops:[]};return o={next:a(0),throw:a(1),return:a(2)},"function"==typeof Symbol&&(o[Symbol.iterator]=function(){return this}),o;function a(o){return function(a){return function(o){if(e)throw new TypeError("Generator is already executing.");for(;s;)try{if(e=1,n&&(i=2&o[0]?n.return:o[0]?n.throw||((i=n.return)&&i.call(n),0):n.next)&&!(i=i.call(n,o[1])).done)return i;switch(n=0,i&&(o=[2&o[0],i.value]),o[0]){case 0:case 1:i=o;break;case 4:return s.label++,{value:o[1],done:!1};case 5:s.label++,n=o[1],o=[0];continue;case 7:o=s.ops.pop(),s.trys.pop();continue;default:if(!(i=(i=s.trys).length>0&&i[i.length-1])&&(6===o[0]||2===o[0])){s=0;continue}if(3===o[0]&&(!i||o[1]>i[0]&&o[1]0)&&!(n=o.next()).done;)s.push(n.value)}catch(t){i={error:t}}finally{try{n&&!n.done&&(e=o.return)&&e.call(o)}finally{if(i)throw i.error}}return s},s=this&&this.__spread||function(){for(var t=[],r=0;r0});if(g.length>=e){var v=Math.floor(e),p=e-v;v>0?(s[h]=g[v-1],p>1e-5&&(s[h]+=p*(g[v]-g[v-1]))):s[h]=p*g[0]}else g.length>0&&(s[h]=l.max(g));for(var w=0;w0?Math.exp(-b/c):1}if(Math.abs(d-o)<1e-5)break;d>o?c=(u+(f=c))/2:(u=c,f===1/0?c*=2:c=(u+f)/2)}if(a[h]=c,s[h]>0){var M=l.mean(m);a[h]<.001*M&&(a[h]=.001*M)}else{var x=l.mean(t.map(l.mean));a[h]<.001*x&&(a[h]=.001*x)}}return{sigmas:a,rhos:s}},t.prototype.computeMembershipStrengths=function(t,r,e,n){for(var i=t.length,o=t[0].length,s=l.zeros(i*o),a=l.zeros(i*o),h=l.zeros(i*o),u=0;u0&&(e[n]=r/i[n])}),e},t.prototype.initializeOptimization=function(){var t=this.embedding,r=this.embedding,e=this.optimizationState,n=e.head,i=e.tail,o=e.epochsPerSample,a=this.getNEpochs(),h=this.graph.nCols,u=w(this.spread,this.minDist),l=u.a,f=u.b,c=t[0].length,m=t.length===r.length,g=o.map(function(t){return t/5}),v=s(g),p=s(o);Object.assign(this.optimizationState,{isInitialized:!0,headEmbedding:t,tailEmbedding:r,head:n,tail:i,epochsPerSample:o,epochOfNextSample:p,epochOfNextNegativeSample:v,epochsPerNegativeSample:g,moveOther:m,initialAlpha:1,alpha:1,gamma:1,a:l,b:f,dim:c,nEpochs:a,nVertices:h})},t.prototype.optimizeLayoutStep=function(t){for(var r=this.optimizationState,e=r.head,n=r.tail,i=r.headEmbedding,o=r.tailEmbedding,s=r.epochsPerSample,a=r.epochOfNextSample,h=r.epochOfNextNegativeSample,u=r.epochsPerNegativeSample,f=r.moveOther,c=r.initialAlpha,m=r.alpha,g=r.gamma,w=r.a,d=r.b,y=r.dim,b=r.nEpochs,M=r.nVertices,x=0;xt)){var S=e[x],E=n[x],R=i[S],k=o[E],A=p(R,k),N=0;A>0&&(N=-2*w*d*Math.pow(A,d-1),N/=w*Math.pow(A,d)+1);for(var V=0;V0)_=2*g*d,_/=(.001+D)*(w*Math.pow(D,d)+1);else if(S===j)continue;for(V=0;V0&&(z=v(_*(R[V]-P[V]),4)),R[V]+=z*m}}h[x]+=C*u[x]}return r.alpha=c*(1-t/b),r.currentEpoch+=1,this.embedding=i,r.currentEpoch},t.prototype.optimizeLayout=function(t){var r=this;return void 0===t&&(t=function(){return!0}),this.optimizationState.isInitialized||this.initializeOptimization(),new Promise(function(e,o){var s=function(){return n(r,void 0,void 0,function(){var r,n,a,h,u,l;return i(this,function(i){try{if(r=this.optimizationState,n=r.nEpochs,a=r.currentEpoch,h=this.optimizeLayoutStep(a),u=!1===t(h),l=h===n,u||l)return[2,e(l)];s()}catch(t){o(t)}return[2]})})};s()})},t.prototype.getNEpochs=function(){var t=this.graph;if(this.nEpochs>0)return this.nEpochs;var r=t.nRows;return r<=2500?500:r<=5e3?400:r<=7500?300:200},t}();function m(t,r){for(var e=0,n=0;nr?r:t<-r?-r:t}function p(t,r){for(var e=0,n=0;n=r?Math.exp(-(e[i]-r)/t):n}),i=f({x:e,y:n},function(t){var r=o(t,2),e=r[0],n=r[1];return function(t){return 1/(1+e*Math.pow(t,2*n))}},{damping:1.5,initialValues:[.5,.5],gradientDifference:.1,maxIterations:100,errorTolerance:.01}).parameterValues,s=o(i,2);return{a:s[0],b:s[1]}}function d(t,r,e,n){return void 0===e&&(e=1),void 0===n&&(n=5),t.map(function(t,i,o){return-1===r[i]||-1===r[o]?t*Math.exp(-e):r[i]!==r[o]?t*Math.exp(-n):t})}function y(t){t=a.normalize(t,"max");var r=a.transpose(t),e=a.pairwiseMultiply(r,t);return t=a.add(t,a.subtract(r,e)),a.eliminateZeros(t)}r.findABParams=w,r.fastIntersection=d,r.resetLocalConnectivity=y},function(t,r,e){"use strict";var n,i=this&&this.__read||function(t,r){var e="function"==typeof Symbol&&t[Symbol.iterator];if(!e)return t;var n,i,o=e.call(t),s=[];try{for(;(void 0===r||r-- >0)&&!(n=o.next()).done;)s.push(n.value)}catch(t){i={error:t}}finally{try{n&&!n.done&&(e=o.return)&&e.call(o)}finally{if(i)throw i.error}}return s},o=this&&this.__spread||function(){for(var t=[],r=0;r=t.length&&(t=void 0),{value:t&&t[e++],done:!t}}}};Object.defineProperty(r,"__esModule",{value:!0});var a=e(1),h=function(){function t(t,r,e,n){this.entries=new Map,this.nRows=0,this.nCols=0,this.rows=o(t),this.cols=o(r),this.values=o(e);for(var i=0;ir?t[e]:r;return t.map(function(t){return t/r})},n.l1=function(t){for(var r=0,e=0;e=s[0])return 0;for(var h=0;h=m)break;if(c>=m){if(!(s[f]>e))break;l=f}else if(s[f]>=s[c]){if(!(e0)&&!(n=o.next()).done;)s.push(n.value)}catch(t){i={error:t}}finally{try{n&&!n.done&&(e=o.return)&&e.call(o)}finally{if(i)throw i.error}}return s},i=this&&this.__spread||function(){for(var t=[],r=0;r=t.length&&(t=void 0),{value:t&&t[e++],done:!t}}}};Object.defineProperty(r,"__esModule",{value:!0});var s=e(1),a=function(){return function(t,r,e,n){this.hyperplanes=t,this.offsets=r,this.children=e,this.indices=n}}();r.FlatTree=a,r.makeForest=function(t,r,e,n){var o=Math.max(10,r);return s.range(e).map(function(r,e){return function(t,r,e,n){void 0===r&&(r=30);var i=s.range(t.length);return function t(r,e,n,i,o){if(void 0===n&&(n=30),e.length>n){var a=function(t,r,e){var n=t[0].length,i=s.tauRandInt(r.length,e),o=s.tauRandInt(r.length,e);o=(o+=i===o?1:0)%r.length;for(var a=r[i],h=r[o],u=0,l=s.zeros(n),f=0;f0?(g[f]=0,c+=1):(g[f]=1,m+=1)}var w=s.zeros(c),d=s.zeros(m);for(var f in c=0,m=0,s.range(g.length))0===g[f]?(w[c]=r[f],c+=1):(d[m]=r[f],m+=1);return{indicesLeft:w,indicesRight:d,hyperplane:l,offset:u}}(r,e,o),h=a.indicesLeft,u=a.indicesRight,l=a.hyperplane,f=a.offset,c=t(r,h,n,i+1,o),m=t(r,u,n,i+1,o),g={leftChild:c,rightChild:m,isLeaf:!1,hyperplane:l,offset:f};return g}var g={indices:e,isLeaf:!0};return g}(t,i,r,e,n)}(t,o,e,n)}).map(function(t){return function(t,r){var e=function t(r){return r.isLeaf?1:1+t(r.leftChild)+t(r.rightChild)}(t),n=function t(r){return r.isLeaf?1:t(r.leftChild)+t(r.rightChild)}(t),o=s.range(e).map(function(){return s.zeros(t.hyperplane.length)}),h=s.zeros(e),u=s.range(e).map(function(){return[-1,-1]}),l=s.range(n).map(function(){return s.range(r).map(function(){return-1})});return function t(r,e,n,o,s,a,h){var u;if(r.isLeaf)return o[a][0]=-h,(u=s[h]).splice.apply(u,i([0,r.indices.length],r.indices)),{nodeNum:a,leafNum:h+=1};e[a]=r.hyperplane,n[a]=r.offset,o[a][0]=a+1;var l=a,f=t(r.leftChild,e,n,o,s,a+1,h);return a=f.nodeNum,h=f.leafNum,o[l][1]=a+1,{nodeNum:(f=t(r.rightChild,e,n,o,s,a+1,h)).nodeNum,leafNum:f.leafNum}}(t,o,h,u,l,0,0),new a(o,h,u,l)}(t,o)})},r.makeLeafArray=function(t){var r,e;if(t.length>0){var n=[];try{for(var s=o(t),a=s.next();!a.done;a=s.next()){var h=a.value;n.push.apply(n,i(h.indices))}}catch(t){r={error:t}}finally{try{a&&!a.done&&(e=s.return)&&e.call(s)}finally{if(r)throw r.error}}return n}return[[-1]]}},function(t,r,e){"use strict";var n=e(9);function i(t,r,e){var n=0;const i=e(r);for(var o=0;or&&(r=t[e]);return r};var s=function(t){if(!i()(t))throw new TypeError("input must be an array");if(0===t.length)throw new TypeError("input must not be empty");for(var r=t[0],e=1;e1&&void 0!==arguments[1]?arguments[1]:{};if(!i()(t))throw new TypeError("input must be an array");if(0===t.length)throw new TypeError("input must not be empty");if(void 0!==e.output){if(!i()(e.output))throw new TypeError("output option must be an array if specified");r=e.output}else r=new Array(t.length);var n=s(t),a=o(t);if(n===a)throw new RangeError("minimum and maximum input values are equal. Cannot rescale a constant array");var h=e.min,u=void 0===h?e.autoMinMax?n:0:h,l=e.max,f=void 0===l?e.autoMinMax?a:1:l;if(u>=f)throw new RangeError("min option must be smaller than max option");for(var c=(f-u)/(a-n),m=0;mMath.abs(h[i])&&(i=r);if(i!==e){for(n=0;n=0;i--){for(n=0;no?t[i][o]:i===o?1:0;return n}get upperTriangularMatrix(){for(var t=this.LU,r=t.rows,e=t.columns,n=new I(r,e),i=0;iMath.abs(r)?(e=r/t,Math.abs(t)*Math.sqrt(1+e*e)):0!==r?(e=t/r,Math.abs(r)*Math.sqrt(1+e*e)):0}function l(t,r,e){for(var n=new Array(t),i=0;i=0;t--)if(0!==p[t]){for(let r=t+1;r=0;t--){if(t0;){let t,r;for(t=R-2;t>=-1&&-1!==t;t--){const r=Number.MIN_VALUE+A*Math.abs(p[t]+Math.abs(p[t+1]));if(Math.abs(y[t])<=r||Number.isNaN(y[t])){y[t]=0;break}}if(t===R-2)r=4;else{let e;for(e=R-1;e>=t&&e!==t;e--){let r=(e!==R?Math.abs(y[e]):0)+(e!==t+1?Math.abs(y[e-1]):0);if(Math.abs(p[e])<=A*r){p[e]=0;break}}e===t?r=3:e===R-1?r=1:(r=2,t=e)}switch(t++,r){case 1:{let r=y[R-2];y[R-2]=0;for(let e=R-2;e>=t;e--){let i=u(p[e],r),o=p[e]/i,s=r/i;if(p[e]=i,e!==t&&(r=-s*y[e-1],y[e-1]=o*y[e-1]),f)for(let t=0;t=p[t+1]);){let r=p[t];if(p[t]=p[t+1],p[t+1]=r,f&&tr?i[o][e]=t[o][e]/this.s[e]:i[o][e]=0;var o=this.U,s=o.length,a=o[0].length,h=new I(e,s);for(let t=0;tt&&r++;return r}get diagonal(){return this.s}get threshold(){return Number.EPSILON/2*Math.max(this.m,this.n)*this.s[0]}get leftSingularVectors(){return I.isMatrix(this.U)||(this.U=new I(this.U)),this.U}get rightSingularVectors(){return I.isMatrix(this.V)||(this.V=new I(this.V)),this.V}get diagonalMatrix(){return I.diag(this.s)}}function c(t,r,e){var n=e?t.rows:t.rows-1;if(r<0||r>n)throw new RangeError("Row index out of range")}function m(t,r,e){var n=e?t.columns:t.columns-1;if(r<0||r>n)throw new RangeError("Column index out of range")}function g(t,r){if(r.to1DArray&&(r=r.to1DArray()),r.length!==t.columns)throw new RangeError("vector size must be the same as the number of columns");return r}function v(t,r){if(r.to1DArray&&(r=r.to1DArray()),r.length!==t.rows)throw new RangeError("vector size must be the same as the number of rows");return r}function p(t,r,e){return{row:w(t,r),column:d(t,e)}}function w(t,r){if("object"!=typeof r)throw new TypeError("unexpected type for row indices");if(r.some(r=>r<0||r>=t.rows))throw new RangeError("row indices are out of range");return Array.isArray(r)||(r=Array.from(r)),r}function d(t,r){if("object"!=typeof r)throw new TypeError("unexpected type for column indices");if(r.some(r=>r<0||r>=t.columns))throw new RangeError("column indices are out of range");return Array.isArray(r)||(r=Array.from(r)),r}function y(t,r,e,n,i){if(5!==arguments.length)throw new RangeError("expected 4 arguments");if(b("startRow",r),b("endRow",e),b("startColumn",n),b("endColumn",i),r>e||n>i||r<0||r>=t.rows||e<0||e>=t.rows||n<0||n>=t.columns||i<0||i>=t.columns)throw new RangeError("Submatrix indices are out of range")}function b(t,r){if("number"!=typeof r)throw new TypeError(`${t} must be a number`)}class M extends(C()){constructor(t,r,e){super(),this.matrix=t,this.rows=r,this.columns=e}static get[Symbol.species](){return I}}class x extends M{constructor(t){super(t,t.columns,t.rows)}set(t,r,e){return this.matrix.set(r,t,e),this}get(t,r){return this.matrix.get(r,t)}}class S extends M{constructor(t,r){super(t,1,t.columns),this.row=r}set(t,r,e){return this.matrix.set(this.row,r,e),this}get(t,r){return this.matrix.get(this.row,r)}}class E extends M{constructor(t,r,e,n,i){y(t,r,e,n,i),super(t,e-r+1,i-n+1),this.startRow=r,this.startColumn=n}set(t,r,e){return this.matrix.set(this.startRow+t,this.startColumn+r,e),this}get(t,r){return this.matrix.get(this.startRow+t,this.startColumn+r)}}class R extends M{constructor(t,r,e){var n=p(t,r,e);super(t,n.row.length,n.column.length),this.rowIndices=n.row,this.columnIndices=n.column}set(t,r,e){return this.matrix.set(this.rowIndices[t],this.columnIndices[r],e),this}get(t,r){return this.matrix.get(this.rowIndices[t],this.columnIndices[r])}}class k extends M{constructor(t,r){super(t,(r=w(t,r)).length,t.columns),this.rowIndices=r}set(t,r,e){return this.matrix.set(this.rowIndices[t],r,e),this}get(t,r){return this.matrix.get(this.rowIndices[t],r)}}class A extends M{constructor(t,r){r=d(t,r),super(t,t.rows,r.length),this.columnIndices=r}set(t,r,e){return this.matrix.set(t,this.columnIndices[r],e),this}get(t,r){return this.matrix.get(t,this.columnIndices[r])}}class N extends M{constructor(t,r){super(t,t.rows,1),this.column=r}set(t,r,e){return this.matrix.set(t,this.column,e),this}get(t){return this.matrix.get(t,this.column)}}class V extends M{constructor(t){super(t,t.rows,t.columns)}set(t,r,e){return this.matrix.set(this.rows-t-1,r,e),this}get(t,r){return this.matrix.get(this.rows-t-1,r)}}class z extends M{constructor(t){super(t,t.rows,t.columns)}set(t,r,e){return this.matrix.set(t,this.columns-r-1,e),this}get(t,r){return this.matrix.get(t,this.columns-r-1)}}function C(t){void 0===t&&(t=Object);class r extends t{static get[Symbol.species](){return this}static from1DArray(t,r,e){if(t*r!==e.length)throw new RangeError("Data length does not match given dimensions");for(var n=new this(t,r),i=0;it&&(t=this.get(r,e));return t}maxIndex(){for(var t=this.get(0,0),r=[0,0],e=0;et&&(t=this.get(e,n),r[0]=e,r[1]=n);return r}min(){for(var t=this.get(0,0),r=0;rr&&(r=this.get(t,e));return r}maxRowIndex(t){c(this,t);for(var r=this.get(t,0),e=[t,0],n=1;nr&&(r=this.get(t,n),e[1]=n);return e}minRow(t){c(this,t);for(var r=this.get(t,0),e=1;er&&(r=this.get(e,t));return r}maxColumnIndex(t){m(this,t);for(var r=this.get(0,t),e=[0,t],n=1;nr&&(r=this.get(n,t),e[0]=n);return e}minColumn(t){m(this,t);for(var r=this.get(0,t),e=1;e=(r=void 0===r?1:r))throw new RangeError("min should be strictly smaller than max");for(var e=this.constructor.empty(this.rows,this.columns),n=0;n=(r=void 0===r?1:r))throw new RangeError("min should be strictly smaller than max");for(var e=this.constructor.empty(this.rows,this.columns),n=0;ne||r<0||r>=this.columns||e<0||e>=this.columns)throw new RangeError("Argument out of range");for(var n=new this.constructor[Symbol.species](t.length,e-r+1),i=0;i=this.rows)throw new RangeError(`Row index out of range: ${t[i]}`);n.set(i,o-r,this.get(t[i],o))}return n}subMatrixColumn(t,r,e){if(void 0===r&&(r=0),void 0===e&&(e=this.rows-1),r>e||r<0||r>=this.rows||e<0||e>=this.rows)throw new RangeError("Argument out of range");for(var n=new this.constructor[Symbol.species](e-r+1,t.length),i=0;i=this.columns)throw new RangeError(`Column index out of range: ${t[i]}`);n.set(o-r,i,this.get(o,t[i]))}return n}setSubMatrix(t,r,e){y(this,r,r+(t=this.constructor.checkMatrix(t)).rows-1,e,e+t.columns-1);for(var n=0;nt?i[o]=1/i[o]:i[o]=0;return i=this.constructor[Symbol.species].diag(i),n.mmul(i.mmul(e.transposeView()))}clone(){for(var t=new this.constructor[Symbol.species](this.rows,this.columns),r=0;r>","signPropagatingRightShift"],[">>>","rightShift","zeroFillRightShift"]]){var u=o(F("\n(function %name%(value) {\n if (typeof value === 'number') return this.%name%S(value);\n return this.%name%M(value);\n})\n",{name:s[1],op:s[0]})),l=o(F("\n(function %name%S(value) {\n for (var i = 0; i < this.rows; i++) {\n for (var j = 0; j < this.columns; j++) {\n this.set(i, j, this.get(i, j) %op% value);\n }\n }\n return this;\n})\n",{name:`${s[1]}S`,op:s[0]})),w=o(F("\n(function %name%M(matrix) {\n matrix = this.constructor.checkMatrix(matrix);\n if (this.rows !== matrix.rows ||\n this.columns !== matrix.columns) {\n throw new RangeError('Matrices dimensions must be equal');\n }\n for (var i = 0; i < this.rows; i++) {\n for (var j = 0; j < this.columns; j++) {\n this.set(i, j, this.get(i, j) %op% matrix.get(i, j));\n }\n }\n return this;\n})\n",{name:`${s[1]}M`,op:s[0]})),d=o(F("\n(function %name%(matrix, value) {\n var newMatrix = new this[Symbol.species](matrix);\n return newMatrix.%name%(value);\n})\n",{name:s[1]}));for(n=1;n0){if(super(t),!(Number.isInteger(r)&&r>0))throw new TypeError("nColumns must be a positive integer");for(e=0;e=0;o--){for(i=0;i=0;e--){for(t=0;ti)return new Array(r.rows+1).fill(0);for(var o=r.addRow(e,[0]),s=0;s0;a--){for(f=0,s=0,u=0;u0&&(o=-o),r[a]=f*o,s-=i*o,e[a-1]=i-o,h=0;hl){0;do{for(1,i=e[l],c=(e[l+1]-i)/(2*r[l]),m=u(c,1),c<0&&(m=-m),e[l]=r[l]/(c+m),e[l+1]=r[l]*(c+m),g=e[l+1],o=i-e[l],s=l+2;s=l;s--)for(w=p,p=v,b=y,i=v*r[s],o=v*c,m=u(c,r[s]),r[s+1]=y*m,y=r[s]/m,c=(v=c/m)*e[s]-y*i,e[s+1]=o+y*(v*i+y*e[s]),h=0;hS*x)}e[l]=e[l]+M,r[l]=0}for(s=0;s=u;a--)e[a]=r[a][u-1]/l,s+=e[a]*e[a];for(o=Math.sqrt(s),e[u]>0&&(o=-o),s-=e[u]*o,e[u]=e[u]-o,h=u;h=u;a--)i+=e[a]*r[a][h];for(i/=s,a=u;a<=f;a++)r[a][h]-=i*e[a]}for(a=0;a<=f;a++){for(i=0,h=f;h>=u;h--)i+=e[h]*r[a][h];for(i/=s,h=u;h<=f;h++)r[a][h]-=i*e[h]}e[u]=l*e[u],r[u][u-1]=l*o}}for(a=0;a=1;u--)if(0!==r[u][u-1]){for(a=u+1;a<=f;a++)e[a]=r[a][u-1];for(h=u;h<=f;h++){for(o=0,a=u;a<=f;a++)o+=e[a]*n[a][h];for(o=o/e[u]/r[u][u-1],a=u;a<=f;a++)n[a][h]+=o*e[a]}}}(o,c,m,s),function(t,r,e,n,i){var o,s,a,h,u,l,f,c,m,g,v,p,w,d,y,b=t-1,M=t-1,x=Number.EPSILON,S=0,E=0,R=0,k=0,A=0,N=0,V=0,z=0;for(o=0;oM)&&(e[o]=i[o][o],r[o]=0),s=Math.max(o-1,0);s=0;){for(h=b;h>0&&(0===(N=Math.abs(i[h-1][h-1])+Math.abs(i[h][h]))&&(N=E),!(Math.abs(i[h][h-1])=0){for(V=R>=0?R+V:R-V,e[b-1]=c+V,e[b]=e[b-1],0!==V&&(e[b]=c-f/V),r[b-1]=0,r[b]=0,c=i[b][b-1],N=Math.abs(c)+Math.abs(V),R=c/N,k=V/N,A=Math.sqrt(R*R+k*k),R/=A,k/=A,s=b-1;s0){for(N=Math.sqrt(N),m=h&&(V=i[u][u],R=((A=c-V)*(N=m-V)-f)/i[u+1][u]+i[u][u+1],k=i[u+1][u+1]-V-A-N,A=i[u+2][u+1],N=Math.abs(R)+Math.abs(k)+Math.abs(A),R/=N,k/=N,A/=N,u!==h)&&!(Math.abs(i[u][u-1])*(Math.abs(k)+Math.abs(A))u+2&&(i[o][o-3]=0);for(a=u;a<=b-1&&(d=a!==b-1,a!==u&&(R=i[a][a-1],k=i[a+1][a-1],A=d?i[a+2][a-1]:0,0!==(c=Math.abs(R)+Math.abs(k)+Math.abs(A))&&(R/=c,k/=c,A/=c)),0!==c);a++)if(N=Math.sqrt(R*R+k*k+A*A),R<0&&(N=-N),0!==N){for(a!==u?i[a][a-1]=-N*c:h!==u&&(i[a][a-1]=-i[a][a-1]),c=(R+=N)/N,m=k/N,V=A/N,k/=R,A/=R,s=a;s=0;b--)if(R=e[b],0===(k=r[b]))for(h=b,i[b][b]=1,o=b-1;o>=0;o--){for(f=i[o][o]-R,A=0,s=h;s<=b;s++)A+=i[o][s]*i[s][b];if(r[o]<0)V=f,N=A;else if(h=o,0===r[o]?i[o][b]=0!==f?-A/f:-A/(x*E):(c=i[o][o+1],m=i[o+1][o],k=(e[o]-R)*(e[o]-R)+r[o]*r[o],l=(c*N-V*A)/k,i[o][b]=l,i[o+1][b]=Math.abs(c)>Math.abs(V)?(-A-f*l)/c:(-N-m*l)/V),l=Math.abs(i[o][b]),x*l*l>1)for(s=o;s<=b;s++)i[s][b]=i[s][b]/l}else if(k<0)for(h=b-1,Math.abs(i[b][b-1])>Math.abs(i[b-1][b])?(i[b-1][b-1]=k/i[b][b-1],i[b-1][b]=-(i[b][b]-R)/i[b][b-1]):(y=F(0,-i[b-1][b],i[b-1][b-1]-R,k),i[b-1][b-1]=y[0],i[b-1][b]=y[1]),i[b][b-1]=0,i[b][b]=1,o=b-2;o>=0;o--){for(g=0,v=0,s=h;s<=b;s++)g+=i[o][s]*i[s][b-1],v+=i[o][s]*i[s][b];if(f=i[o][o]-R,r[o]<0)V=f,A=g,N=v;else if(h=o,0===r[o]?(y=F(-g,-v,f,k),i[o][b-1]=y[0],i[o][b]=y[1]):(c=i[o][o+1],m=i[o+1][o],p=(e[o]-R)*(e[o]-R)+r[o]*r[o]-k*k,w=2*(e[o]-R)*k,0===p&&0===w&&(p=x*E*(Math.abs(f)+Math.abs(k)+Math.abs(c)+Math.abs(m)+Math.abs(V))),y=F(c*A-V*g+k*v,c*N-V*v-k*g,p,w),i[o][b-1]=y[0],i[o][b]=y[1],Math.abs(c)>Math.abs(V)+Math.abs(k)?(i[o+1][b-1]=(-g-f*i[o][b-1]+k*i[o][b])/c,i[o+1][b]=(-v-f*i[o][b]-k*i[o][b-1])/c):(y=F(-A-m*i[o][b-1],-N-m*i[o][b],V,k),i[o+1][b-1]=y[0],i[o+1][b]=y[1])),l=Math.max(Math.abs(i[o][b-1]),Math.abs(i[o][b])),x*l*l>1)for(s=o;s<=b;s++)i[s][b-1]=i[s][b-1]/l,i[s][b]=i[s][b]/l}for(o=0;oM)for(s=o;s=0;s--)for(o=0;o<=M;o++){for(V=0,a=0;a<=Math.min(s,M);a++)V+=n[o][a]*i[a][s];n[o][s]=V}}(o,h,a,s,c)}this.n=o,this.e=h,this.d=a,this.V=s}get realEigenvalues(){return this.d}get imaginaryEigenvalues(){return this.e}get eigenvectorMatrix(){return I.isMatrix(this.V)||(this.V=new I(this.V)),this.V}get diagonalMatrix(){var t,r,e=this.n,n=this.e,i=this.d,o=new I(e,e);for(t=0;t0?o[t][t+1]=n[t]:n[t]<0&&(o[t][t-1]=n[t])}return o}}function F(t,r,e,n){var i,o;return Math.abs(e)>Math.abs(n)?[(t+(i=n/e)*r)/(o=e+i*n),(r-i*t)/o]:[((i=e/n)*t+r)/(o=n+i*e),(i*r-t)/o]}class W{constructor(t){if(!(t=P.checkMatrix(t)).isSymmetric())throw new Error("Matrix is not symmetric");var r,e,n,i=t,o=i.rows,s=new I(o,o),a=!0;for(e=0;e0,s[e][e]=Math.sqrt(Math.max(u,0)),n=e+1;n=0;o--)for(i=0;i=t.length&&(t=void 0),{value:t&&t[e++],done:!t}}}};function i(t,r){return void 0===r&&(r=Math.random),Math.floor(r()*t)}function o(t){return void 0===t&&(t=Math.random),t()}function s(t){for(var r=[],e=0;er?t[e]:r;return r},r.max2d=function(t){for(var r=0,e=0;er?t[e][n]:r;return r},r.rejectionSample=function(t,r){for(var e=h(t),n=0;n=a[0])return 0;for(var h=0;h=s[0])return 0;s[0]=e,o[0]=n,a[0]=i;for(var h=0,u=0;;){var l=2*h+1,c=l+1,f=t[0][0].length;if(l>=f)break;if(c>=f){if(!(s[l]>e))break;u=l}else if(s[l]>=s[c]){if(!(ee.length;a++)1===i[a]&&n[a]=0?(i[s]=0,Math.floor(e[s])):-1}},function(t,r,e){"use strict";var n,i=this&&this.__read||function(t,r){var e="function"==typeof Symbol&&t[Symbol.iterator];if(!e)return t;var n,i,o=e.call(t),s=[];try{for(;(void 0===r||r-- >0)&&!(n=o.next()).done;)s.push(n.value)}catch(t){i={error:t}}finally{try{n&&!n.done&&(e=o.return)&&e.call(o)}finally{if(i)throw i.error}}return s},o=this&&this.__spread||function(){for(var t=[],r=0;r=t.length&&(t=void 0),{value:t&&t[e++],done:!t}}}};Object.defineProperty(r,"__esModule",{value:!0});var a=e(1),h=function(){function t(t,r,e,n){this.entries=new Map,this.nRows=0,this.nCols=0,this.rows=o(t),this.cols=o(r),this.values=o(e);for(var i=0;ir?t:r})},r.multiplyScalar=function(t,r){return t.map(function(t){return t*r})},r.eliminateZeros=function(t){for(var r=new Set,e=t.getValues(),n=t.getRows(),i=t.getCols(),o=0;or?t[e]:r;return t.map(function(t){return t/r})},n.l1=function(t){for(var r=0,e=0;e0)&&!(n=o.next()).done;)s.push(n.value)}catch(t){i={error:t}}finally{try{n&&!n.done&&(e=o.return)&&e.call(o)}finally{if(i)throw i.error}}return s},i=this&&this.__spread||function(){for(var t=[],r=0;r=t.length&&(t=void 0),{value:t&&t[e++],done:!t}}}};Object.defineProperty(r,"__esModule",{value:!0});var s=e(1),a=function(){return function(t,r,e,n){this.hyperplanes=t,this.offsets=r,this.children=e,this.indices=n}}();function h(t,r,e){for(var n=r,i=0;i0?0:1}r.FlatTree=a,r.makeForest=function(t,r,e,n){var o=Math.max(10,r);return s.range(e).map(function(r,e){return function(t,r,e,n){void 0===r&&(r=30);var i=s.range(t.length);return function t(r,e,n,i,o){if(void 0===n&&(n=30),e.length>n){var a=function(t,r,e){var n=t[0].length,i=s.tauRandInt(r.length,e),o=s.tauRandInt(r.length,e);o=(o+=i===o?1:0)%r.length;for(var a=r[i],h=r[o],u=0,l=s.zeros(n),c=0;c0?(g[c]=0,f+=1):(g[c]=1,m+=1)}var d=s.zeros(f),w=s.zeros(m);for(var c in f=0,m=0,s.range(g.length))0===g[c]?(d[f]=r[c],f+=1):(w[m]=r[c],m+=1);return{indicesLeft:d,indicesRight:w,hyperplane:l,offset:u}}(r,e,o),h=a.indicesLeft,u=a.indicesRight,l=a.hyperplane,c=a.offset,f=t(r,h,n,i+1,o),m=t(r,u,n,i+1,o),g={leftChild:f,rightChild:m,isLeaf:!1,hyperplane:l,offset:c};return g}var g={indices:e,isLeaf:!0};return g}(t,i,r,e,n)}(t,o,e,n)}).map(function(t){return function(t,r){var e=function t(r){return r.isLeaf?1:1+t(r.leftChild)+t(r.rightChild)}(t),n=function t(r){return r.isLeaf?1:t(r.leftChild)+t(r.rightChild)}(t),o=s.range(e).map(function(){return s.zeros(t.hyperplane.length)}),h=s.zeros(e),u=s.range(e).map(function(){return[-1,-1]}),l=s.range(n).map(function(){return s.range(r).map(function(){return-1})});return function t(r,e,n,o,s,a,h){var u;if(r.isLeaf)return o[a][0]=-h,(u=s[h]).splice.apply(u,i([0,r.indices.length],r.indices)),{nodeNum:a,leafNum:h+=1};e[a]=r.hyperplane,n[a]=r.offset,o[a][0]=a+1;var l=a,c=t(r.leftChild,e,n,o,s,a+1,h);return a=c.nodeNum,h=c.leafNum,o[l][1]=a+1,{nodeNum:(c=t(r.rightChild,e,n,o,s,a+1,h)).nodeNum,leafNum:c.leafNum}}(t,o,h,u,l,0,0),new a(o,h,u,l)}(t,o)})},r.makeLeafArray=function(t){var r,e;if(t.length>0){var n=[];try{for(var s=o(t),a=s.next();!a.done;a=s.next()){var h=a.value;n.push.apply(n,i(h.indices))}}catch(t){r={error:t}}finally{try{a&&!a.done&&(e=s.return)&&e.call(s)}finally{if(r)throw r.error}}return n}return[[-1]]},r.searchFlatTree=function(t,r){for(var e=0;r.children[e][0]>0;)e=0===h(r.hyperplanes[e],r.offsets[e],t)?r.children[e][0]:r.children[e][1];var n=-1*r.children[e][0];return r.indices[n]}},function(t,r,e){"use strict";Object.defineProperty(r,"__esModule",{value:!0});var n=e(6);window.UMAP=n.UMAP},function(t,r,e){"use strict";var n=this&&this.__awaiter||function(t,r,e,n){return new(e||(e=Promise))(function(i,o){function s(t){try{h(n.next(t))}catch(t){o(t)}}function a(t){try{h(n.throw(t))}catch(t){o(t)}}function h(t){t.done?i(t.value):new e(function(r){r(t.value)}).then(s,a)}h((n=n.apply(t,r||[])).next())})},i=this&&this.__generator||function(t,r){var e,n,i,o,s={label:0,sent:function(){if(1&i[0])throw i[1];return i[1]},trys:[],ops:[]};return o={next:a(0),throw:a(1),return:a(2)},"function"==typeof Symbol&&(o[Symbol.iterator]=function(){return this}),o;function a(o){return function(a){return function(o){if(e)throw new TypeError("Generator is already executing.");for(;s;)try{if(e=1,n&&(i=2&o[0]?n.return:o[0]?n.throw||((i=n.return)&&i.call(n),0):n.next)&&!(i=i.call(n,o[1])).done)return i;switch(n=0,i&&(o=[2&o[0],i.value]),o[0]){case 0:case 1:i=o;break;case 4:return s.label++,{value:o[1],done:!1};case 5:s.label++,n=o[1],o=[0];continue;case 7:o=s.ops.pop(),s.trys.pop();continue;default:if(!(i=(i=s.trys).length>0&&i[i.length-1])&&(6===o[0]||2===o[0])){s=0;continue}if(3===o[0]&&(!i||o[1]>i[0]&&o[1]0)&&!(n=o.next()).done;)s.push(n.value)}catch(t){i={error:t}}finally{try{n&&!n.done&&(e=o.return)&&e.call(o)}finally{if(i)throw i.error}}return s},s=this&&this.__spread||function(){for(var t=[],r=0;r0&&i.set(o,l,c)}var f=h.transpose(i);return h.maximum(i,f)},t.prototype.transform=function(t){var r=this,e=this.X;if(void 0===e||0===e.length)throw new Error("No data has been fit.");var n=Math.floor(this.nNeighbors*this.transformQueueSize),i=u.initializeSearch(this.rpForest,e,t,n,this.initFromRandom,this.initFromTree),o=this.search(e,this.searchGraph,i,t),s=a.deheapSort(o),l=s.indices,f=s.weights;l=l.map(function(t){return t.slice(0,r.nNeighbors)}),f=f.map(function(t){return t.slice(0,r.nNeighbors)});var m=Math.max(0,this.localConnectivity-1),g=this.smoothKNNDistance(f,this.nNeighbors,m),p=g.sigmas,v=g.rhos,d=this.computeMembershipStrengths(l,f,p,v),w=d.rows,y=d.cols,b=d.vals,x=[t.length,e.length],S=new h.SparseMatrix(w,y,b,x),E=h.normalize(S,"l1"),R=h.getCSR(E),k=t.length,z=M(c.reshape2d(R.indices,k,this.nNeighbors),c.reshape2d(R.values,k,this.nNeighbors),this.embedding),N=this.nEpochs?this.nEpochs/3:S.nRows<=1e4?100:30,A=S.getValues().reduce(function(t,r){return r>t?r:t},0);S=S.map(function(t){return t0});if(g.length>=e){var p=Math.floor(e),v=e-p;p>0?(s[h]=g[p-1],v>1e-5&&(s[h]+=v*(g[p]-g[p-1]))):s[h]=v*g[0]}else g.length>0&&(s[h]=c.max(g));for(var d=0;d0?Math.exp(-b/f):1}if(Math.abs(w-o)<1e-5)break;w>o?f=(u+(l=f))/2:(u=f,l===1/0?f*=2:f=(u+l)/2)}if(a[h]=f,s[h]>0){var M=c.mean(m);a[h]<.001*M&&(a[h]=.001*M)}else{var x=c.mean(t.map(c.mean));a[h]<.001*x&&(a[h]=.001*x)}}return{sigmas:a,rhos:s}},t.prototype.computeMembershipStrengths=function(t,r,e,n){for(var i=t.length,o=t[0].length,s=c.zeros(i*o),a=c.zeros(i*o),h=c.zeros(i*o),u=0;u0&&(e[n]=r/i[n])}),e},t.prototype.assignOptimizationStateParameters=function(t){Object.assign(this.optimizationState,t)},t.prototype.prepareForOptimizationLoop=function(){var t=this.repulsionStrength,r=this.learningRate,e=this.negativeSampleRate,n=this.optimizationState,i=n.epochsPerSample,o=n.headEmbedding,a=n.tailEmbedding,h=o[0].length,u=o.length===a.length,l=i.map(function(t){return t/e}),c=s(l),f=s(i);this.assignOptimizationStateParameters({epochOfNextSample:f,epochOfNextNegativeSample:c,epochsPerNegativeSample:l,moveOther:u,initialAlpha:r,alpha:r,gamma:t,dim:h})},t.prototype.initializeOptimization=function(){var t=this.embedding,r=this.embedding,e=this.optimizationState,n=e.head,i=e.tail,o=e.epochsPerSample,s=this.getNEpochs(),a=this.graph.nCols,h=w(this.spread,this.minDist),u=h.a,l=h.b;this.assignOptimizationStateParameters({headEmbedding:t,tailEmbedding:r,head:n,tail:i,epochsPerSample:o,a:u,b:l,nEpochs:s,nVertices:a})},t.prototype.optimizeLayoutStep=function(t){for(var r=this.optimizationState,e=r.head,n=r.tail,i=r.headEmbedding,o=r.tailEmbedding,s=r.epochsPerSample,a=r.epochOfNextSample,h=r.epochOfNextNegativeSample,u=r.epochsPerNegativeSample,l=r.moveOther,f=r.initialAlpha,m=r.alpha,g=r.gamma,p=r.a,w=r.b,y=r.dim,b=r.nEpochs,M=r.nVertices,x=0;xt)){var S=e[x],E=n[x],R=i[S],k=o[E],z=d(R,k),N=0;z>0&&(N=-2*p*w*Math.pow(z,w-1),N/=p*Math.pow(z,w)+1);for(var A=0;A0)_=2*g*w,_/=(.001+D)*(p*Math.pow(D,w)+1);else if(S===P)continue;for(A=0;A0&&(V=v(_*(R[A]-j[A]),4)),R[A]+=V*m}}h[x]+=C*u[x]}return r.alpha=f*(1-t/b),r.currentEpoch+=1,i},t.prototype.optimizeLayoutAsync=function(t){var r=this;return void 0===t&&(t=function(){return!0}),new Promise(function(e,o){var s=function(){return n(r,void 0,void 0,function(){var r,n,a,h,u,l;return i(this,function(i){try{if(r=this.optimizationState,n=r.nEpochs,a=r.currentEpoch,this.embedding=this.optimizeLayoutStep(a),h=this.optimizationState.currentEpoch,u=!1===t(h),l=h===n,u||l)return[2,e(l)];s()}catch(t){o(t)}return[2]})})};s()})},t.prototype.optimizeLayout=function(t){void 0===t&&(t=function(){return!0});for(var r=!1,e=[];!r;){var n=this.optimizationState,i=n.nEpochs,o=n.currentEpoch;e=this.optimizeLayoutStep(o);var s=this.optimizationState.currentEpoch,a=!1===t(s);r=s===i||a}return e},t.prototype.getNEpochs=function(){var t=this.graph;if(this.nEpochs>0)return this.nEpochs;var r=t.nRows;return r<=2500?500:r<=5e3?400:r<=7500?300:200},t}();function g(t,r){for(var e=0,n=0;nr?r:t<-r?-r:t}function d(t,r){for(var e=0,n=0;n=r?Math.exp(-(e[i]-r)/t):n}),i=f({x:e,y:n},function(t){var r=o(t,2),e=r[0],n=r[1];return function(t){return 1/(1+e*Math.pow(t,2*n))}},{damping:1.5,initialValues:[.5,.5],gradientDifference:.1,maxIterations:100,errorTolerance:.01}).parameterValues,s=o(i,2);return{a:s[0],b:s[1]}}function y(t,r,e,n){return void 0===e&&(e=1),void 0===n&&(n=5),t.map(function(t,i,o){return-1===r[i]||-1===r[o]?t*Math.exp(-e):r[i]!==r[o]?t*Math.exp(-n):t})}function b(t){t=h.normalize(t,"max");var r=h.transpose(t),e=h.pairwiseMultiply(r,t);return t=h.add(t,h.subtract(r,e)),h.eliminateZeros(t)}function M(t,r,e){for(var n=c.zeros(t.length).map(function(t){return c.zeros(e[0].length)}),i=0;i=t.length&&(t=void 0),{value:t&&t[e++],done:!t}}}};Object.defineProperty(r,"__esModule",{value:!0});var i=e(2),o=e(3),s=e(4),a=e(1);r.makeNNDescent=function(t,r){return function(e,n,o,s,h,u,l,c){void 0===s&&(s=10),void 0===h&&(h=50),void 0===u&&(u=.001),void 0===l&&(l=.5),void 0===c&&(c=!0);for(var f=e.length,m=i.makeHeap(e.length,o),g=0;gr&&(r=t[e]);return r};var s=function(t){if(!i()(t))throw new TypeError("input must be an array");if(0===t.length)throw new TypeError("input must not be empty");for(var r=t[0],e=1;e1&&void 0!==arguments[1]?arguments[1]:{};if(!i()(t))throw new TypeError("input must be an array");if(0===t.length)throw new TypeError("input must not be empty");if(void 0!==e.output){if(!i()(e.output))throw new TypeError("output option must be an array if specified");r=e.output}else r=new Array(t.length);var n=s(t),a=o(t);if(n===a)throw new RangeError("minimum and maximum input values are equal. Cannot rescale a constant array");var h=e.min,u=void 0===h?e.autoMinMax?n:0:h,l=e.max,c=void 0===l?e.autoMinMax?a:1:l;if(u>=c)throw new RangeError("min option must be smaller than max option");for(var f=(c-u)/(a-n),m=0;mMath.abs(h[i])&&(i=r);if(i!==e){for(n=0;n=0;i--){for(n=0;no?t[i][o]:i===o?1:0;return n}get upperTriangularMatrix(){for(var t=this.LU,r=t.rows,e=t.columns,n=new I(r,e),i=0;iMath.abs(r)?(e=r/t,Math.abs(t)*Math.sqrt(1+e*e)):0!==r?(e=t/r,Math.abs(r)*Math.sqrt(1+e*e)):0}function l(t,r,e){for(var n=new Array(t),i=0;i=0;t--)if(0!==v[t]){for(let r=t+1;r=0;t--){if(t0;){let t,r;for(t=R-2;t>=-1&&-1!==t;t--){const r=Number.MIN_VALUE+z*Math.abs(v[t]+Math.abs(v[t+1]));if(Math.abs(y[t])<=r||Number.isNaN(y[t])){y[t]=0;break}}if(t===R-2)r=4;else{let e;for(e=R-1;e>=t&&e!==t;e--){let r=(e!==R?Math.abs(y[e]):0)+(e!==t+1?Math.abs(y[e-1]):0);if(Math.abs(v[e])<=z*r){v[e]=0;break}}e===t?r=3:e===R-1?r=1:(r=2,t=e)}switch(t++,r){case 1:{let r=y[R-2];y[R-2]=0;for(let e=R-2;e>=t;e--){let i=u(v[e],r),o=v[e]/i,s=r/i;if(v[e]=i,e!==t&&(r=-s*y[e-1],y[e-1]=o*y[e-1]),c)for(let t=0;t=v[t+1]);){let r=v[t];if(v[t]=v[t+1],v[t+1]=r,c&&tr?i[o][e]=t[o][e]/this.s[e]:i[o][e]=0;var o=this.U,s=o.length,a=o[0].length,h=new I(e,s);for(let t=0;tt&&r++;return r}get diagonal(){return this.s}get threshold(){return Number.EPSILON/2*Math.max(this.m,this.n)*this.s[0]}get leftSingularVectors(){return I.isMatrix(this.U)||(this.U=new I(this.U)),this.U}get rightSingularVectors(){return I.isMatrix(this.V)||(this.V=new I(this.V)),this.V}get diagonalMatrix(){return I.diag(this.s)}}function f(t,r,e){var n=e?t.rows:t.rows-1;if(r<0||r>n)throw new RangeError("Row index out of range")}function m(t,r,e){var n=e?t.columns:t.columns-1;if(r<0||r>n)throw new RangeError("Column index out of range")}function g(t,r){if(r.to1DArray&&(r=r.to1DArray()),r.length!==t.columns)throw new RangeError("vector size must be the same as the number of columns");return r}function p(t,r){if(r.to1DArray&&(r=r.to1DArray()),r.length!==t.rows)throw new RangeError("vector size must be the same as the number of rows");return r}function v(t,r,e){return{row:d(t,r),column:w(t,e)}}function d(t,r){if("object"!=typeof r)throw new TypeError("unexpected type for row indices");if(r.some(r=>r<0||r>=t.rows))throw new RangeError("row indices are out of range");return Array.isArray(r)||(r=Array.from(r)),r}function w(t,r){if("object"!=typeof r)throw new TypeError("unexpected type for column indices");if(r.some(r=>r<0||r>=t.columns))throw new RangeError("column indices are out of range");return Array.isArray(r)||(r=Array.from(r)),r}function y(t,r,e,n,i){if(5!==arguments.length)throw new RangeError("expected 4 arguments");if(b("startRow",r),b("endRow",e),b("startColumn",n),b("endColumn",i),r>e||n>i||r<0||r>=t.rows||e<0||e>=t.rows||n<0||n>=t.columns||i<0||i>=t.columns)throw new RangeError("Submatrix indices are out of range")}function b(t,r){if("number"!=typeof r)throw new TypeError(`${t} must be a number`)}class M extends(C()){constructor(t,r,e){super(),this.matrix=t,this.rows=r,this.columns=e}static get[Symbol.species](){return I}}class x extends M{constructor(t){super(t,t.columns,t.rows)}set(t,r,e){return this.matrix.set(r,t,e),this}get(t,r){return this.matrix.get(r,t)}}class S extends M{constructor(t,r){super(t,1,t.columns),this.row=r}set(t,r,e){return this.matrix.set(this.row,r,e),this}get(t,r){return this.matrix.get(this.row,r)}}class E extends M{constructor(t,r,e,n,i){y(t,r,e,n,i),super(t,e-r+1,i-n+1),this.startRow=r,this.startColumn=n}set(t,r,e){return this.matrix.set(this.startRow+t,this.startColumn+r,e),this}get(t,r){return this.matrix.get(this.startRow+t,this.startColumn+r)}}class R extends M{constructor(t,r,e){var n=v(t,r,e);super(t,n.row.length,n.column.length),this.rowIndices=n.row,this.columnIndices=n.column}set(t,r,e){return this.matrix.set(this.rowIndices[t],this.columnIndices[r],e),this}get(t,r){return this.matrix.get(this.rowIndices[t],this.columnIndices[r])}}class k extends M{constructor(t,r){super(t,(r=d(t,r)).length,t.columns),this.rowIndices=r}set(t,r,e){return this.matrix.set(this.rowIndices[t],r,e),this}get(t,r){return this.matrix.get(this.rowIndices[t],r)}}class z extends M{constructor(t,r){r=w(t,r),super(t,t.rows,r.length),this.columnIndices=r}set(t,r,e){return this.matrix.set(t,this.columnIndices[r],e),this}get(t,r){return this.matrix.get(t,this.columnIndices[r])}}class N extends M{constructor(t,r){super(t,t.rows,1),this.column=r}set(t,r,e){return this.matrix.set(t,this.column,e),this}get(t){return this.matrix.get(t,this.column)}}class A extends M{constructor(t){super(t,t.rows,t.columns)}set(t,r,e){return this.matrix.set(this.rows-t-1,r,e),this}get(t,r){return this.matrix.get(this.rows-t-1,r)}}class V extends M{constructor(t){super(t,t.rows,t.columns)}set(t,r,e){return this.matrix.set(t,this.columns-r-1,e),this}get(t,r){return this.matrix.get(t,this.columns-r-1)}}function C(t){void 0===t&&(t=Object);class r extends t{static get[Symbol.species](){return this}static from1DArray(t,r,e){if(t*r!==e.length)throw new RangeError("Data length does not match given dimensions");for(var n=new this(t,r),i=0;it&&(t=this.get(r,e));return t}maxIndex(){for(var t=this.get(0,0),r=[0,0],e=0;et&&(t=this.get(e,n),r[0]=e,r[1]=n);return r}min(){for(var t=this.get(0,0),r=0;rr&&(r=this.get(t,e));return r}maxRowIndex(t){f(this,t);for(var r=this.get(t,0),e=[t,0],n=1;nr&&(r=this.get(t,n),e[1]=n);return e}minRow(t){f(this,t);for(var r=this.get(t,0),e=1;er&&(r=this.get(e,t));return r}maxColumnIndex(t){m(this,t);for(var r=this.get(0,t),e=[0,t],n=1;nr&&(r=this.get(n,t),e[0]=n);return e}minColumn(t){m(this,t);for(var r=this.get(0,t),e=1;e=(r=void 0===r?1:r))throw new RangeError("min should be strictly smaller than max");for(var e=this.constructor.empty(this.rows,this.columns),n=0;n=(r=void 0===r?1:r))throw new RangeError("min should be strictly smaller than max");for(var e=this.constructor.empty(this.rows,this.columns),n=0;ne||r<0||r>=this.columns||e<0||e>=this.columns)throw new RangeError("Argument out of range");for(var n=new this.constructor[Symbol.species](t.length,e-r+1),i=0;i=this.rows)throw new RangeError(`Row index out of range: ${t[i]}`);n.set(i,o-r,this.get(t[i],o))}return n}subMatrixColumn(t,r,e){if(void 0===r&&(r=0),void 0===e&&(e=this.rows-1),r>e||r<0||r>=this.rows||e<0||e>=this.rows)throw new RangeError("Argument out of range");for(var n=new this.constructor[Symbol.species](e-r+1,t.length),i=0;i=this.columns)throw new RangeError(`Column index out of range: ${t[i]}`);n.set(o-r,i,this.get(o,t[i]))}return n}setSubMatrix(t,r,e){y(this,r,r+(t=this.constructor.checkMatrix(t)).rows-1,e,e+t.columns-1);for(var n=0;nt?i[o]=1/i[o]:i[o]=0;return i=this.constructor[Symbol.species].diag(i),n.mmul(i.mmul(e.transposeView()))}clone(){for(var t=new this.constructor[Symbol.species](this.rows,this.columns),r=0;r>","signPropagatingRightShift"],[">>>","rightShift","zeroFillRightShift"]]){var u=o($("\n(function %name%(value) {\n if (typeof value === 'number') return this.%name%S(value);\n return this.%name%M(value);\n})\n",{name:s[1],op:s[0]})),l=o($("\n(function %name%S(value) {\n for (var i = 0; i < this.rows; i++) {\n for (var j = 0; j < this.columns; j++) {\n this.set(i, j, this.get(i, j) %op% value);\n }\n }\n return this;\n})\n",{name:`${s[1]}S`,op:s[0]})),d=o($("\n(function %name%M(matrix) {\n matrix = this.constructor.checkMatrix(matrix);\n if (this.rows !== matrix.rows ||\n this.columns !== matrix.columns) {\n throw new RangeError('Matrices dimensions must be equal');\n }\n for (var i = 0; i < this.rows; i++) {\n for (var j = 0; j < this.columns; j++) {\n this.set(i, j, this.get(i, j) %op% matrix.get(i, j));\n }\n }\n return this;\n})\n",{name:`${s[1]}M`,op:s[0]})),w=o($("\n(function %name%(matrix, value) {\n var newMatrix = new this[Symbol.species](matrix);\n return newMatrix.%name%(value);\n})\n",{name:s[1]}));for(n=1;n0){if(super(t),!(Number.isInteger(r)&&r>0))throw new TypeError("nColumns must be a positive integer");for(e=0;e=0;o--){for(i=0;i=0;e--){for(t=0;ti)return new Array(r.rows+1).fill(0);for(var o=r.addRow(e,[0]),s=0;s0;a--){for(c=0,s=0,u=0;u0&&(o=-o),r[a]=c*o,s-=i*o,e[a-1]=i-o,h=0;hl){0;do{for(1,i=e[l],f=(e[l+1]-i)/(2*r[l]),m=u(f,1),f<0&&(m=-m),e[l]=r[l]/(f+m),e[l+1]=r[l]*(f+m),g=e[l+1],o=i-e[l],s=l+2;s=l;s--)for(d=v,v=p,b=y,i=p*r[s],o=p*f,m=u(f,r[s]),r[s+1]=y*m,y=r[s]/m,f=(p=f/m)*e[s]-y*i,e[s+1]=o+y*(p*i+y*e[s]),h=0;hS*x)}e[l]=e[l]+M,r[l]=0}for(s=0;s=u;a--)e[a]=r[a][u-1]/l,s+=e[a]*e[a];for(o=Math.sqrt(s),e[u]>0&&(o=-o),s-=e[u]*o,e[u]=e[u]-o,h=u;h=u;a--)i+=e[a]*r[a][h];for(i/=s,a=u;a<=c;a++)r[a][h]-=i*e[a]}for(a=0;a<=c;a++){for(i=0,h=c;h>=u;h--)i+=e[h]*r[a][h];for(i/=s,h=u;h<=c;h++)r[a][h]-=i*e[h]}e[u]=l*e[u],r[u][u-1]=l*o}}for(a=0;a=1;u--)if(0!==r[u][u-1]){for(a=u+1;a<=c;a++)e[a]=r[a][u-1];for(h=u;h<=c;h++){for(o=0,a=u;a<=c;a++)o+=e[a]*n[a][h];for(o=o/e[u]/r[u][u-1],a=u;a<=c;a++)n[a][h]+=o*e[a]}}}(o,f,m,s),function(t,r,e,n,i){var o,s,a,h,u,l,c,f,m,g,p,v,d,w,y,b=t-1,M=t-1,x=Number.EPSILON,S=0,E=0,R=0,k=0,z=0,N=0,A=0,V=0;for(o=0;oM)&&(e[o]=i[o][o],r[o]=0),s=Math.max(o-1,0);s=0;){for(h=b;h>0&&(0===(N=Math.abs(i[h-1][h-1])+Math.abs(i[h][h]))&&(N=E),!(Math.abs(i[h][h-1])=0){for(A=R>=0?R+A:R-A,e[b-1]=f+A,e[b]=e[b-1],0!==A&&(e[b]=f-c/A),r[b-1]=0,r[b]=0,f=i[b][b-1],N=Math.abs(f)+Math.abs(A),R=f/N,k=A/N,z=Math.sqrt(R*R+k*k),R/=z,k/=z,s=b-1;s0){for(N=Math.sqrt(N),m=h&&(A=i[u][u],R=((z=f-A)*(N=m-A)-c)/i[u+1][u]+i[u][u+1],k=i[u+1][u+1]-A-z-N,z=i[u+2][u+1],N=Math.abs(R)+Math.abs(k)+Math.abs(z),R/=N,k/=N,z/=N,u!==h)&&!(Math.abs(i[u][u-1])*(Math.abs(k)+Math.abs(z))u+2&&(i[o][o-3]=0);for(a=u;a<=b-1&&(w=a!==b-1,a!==u&&(R=i[a][a-1],k=i[a+1][a-1],z=w?i[a+2][a-1]:0,0!==(f=Math.abs(R)+Math.abs(k)+Math.abs(z))&&(R/=f,k/=f,z/=f)),0!==f);a++)if(N=Math.sqrt(R*R+k*k+z*z),R<0&&(N=-N),0!==N){for(a!==u?i[a][a-1]=-N*f:h!==u&&(i[a][a-1]=-i[a][a-1]),f=(R+=N)/N,m=k/N,A=z/N,k/=R,z/=R,s=a;s=0;b--)if(R=e[b],0===(k=r[b]))for(h=b,i[b][b]=1,o=b-1;o>=0;o--){for(c=i[o][o]-R,z=0,s=h;s<=b;s++)z+=i[o][s]*i[s][b];if(r[o]<0)A=c,N=z;else if(h=o,0===r[o]?i[o][b]=0!==c?-z/c:-z/(x*E):(f=i[o][o+1],m=i[o+1][o],k=(e[o]-R)*(e[o]-R)+r[o]*r[o],l=(f*N-A*z)/k,i[o][b]=l,i[o+1][b]=Math.abs(f)>Math.abs(A)?(-z-c*l)/f:(-N-m*l)/A),l=Math.abs(i[o][b]),x*l*l>1)for(s=o;s<=b;s++)i[s][b]=i[s][b]/l}else if(k<0)for(h=b-1,Math.abs(i[b][b-1])>Math.abs(i[b-1][b])?(i[b-1][b-1]=k/i[b][b-1],i[b-1][b]=-(i[b][b]-R)/i[b][b-1]):(y=$(0,-i[b-1][b],i[b-1][b-1]-R,k),i[b-1][b-1]=y[0],i[b-1][b]=y[1]),i[b][b-1]=0,i[b][b]=1,o=b-2;o>=0;o--){for(g=0,p=0,s=h;s<=b;s++)g+=i[o][s]*i[s][b-1],p+=i[o][s]*i[s][b];if(c=i[o][o]-R,r[o]<0)A=c,z=g,N=p;else if(h=o,0===r[o]?(y=$(-g,-p,c,k),i[o][b-1]=y[0],i[o][b]=y[1]):(f=i[o][o+1],m=i[o+1][o],v=(e[o]-R)*(e[o]-R)+r[o]*r[o]-k*k,d=2*(e[o]-R)*k,0===v&&0===d&&(v=x*E*(Math.abs(c)+Math.abs(k)+Math.abs(f)+Math.abs(m)+Math.abs(A))),y=$(f*z-A*g+k*p,f*N-A*p-k*g,v,d),i[o][b-1]=y[0],i[o][b]=y[1],Math.abs(f)>Math.abs(A)+Math.abs(k)?(i[o+1][b-1]=(-g-c*i[o][b-1]+k*i[o][b])/f,i[o+1][b]=(-p-c*i[o][b]-k*i[o][b-1])/f):(y=$(-z-m*i[o][b-1],-N-m*i[o][b],A,k),i[o+1][b-1]=y[0],i[o+1][b]=y[1])),l=Math.max(Math.abs(i[o][b-1]),Math.abs(i[o][b])),x*l*l>1)for(s=o;s<=b;s++)i[s][b-1]=i[s][b-1]/l,i[s][b]=i[s][b]/l}for(o=0;oM)for(s=o;s=0;s--)for(o=0;o<=M;o++){for(A=0,a=0;a<=Math.min(s,M);a++)A+=n[o][a]*i[a][s];n[o][s]=A}}(o,h,a,s,f)}this.n=o,this.e=h,this.d=a,this.V=s}get realEigenvalues(){return this.d}get imaginaryEigenvalues(){return this.e}get eigenvectorMatrix(){return I.isMatrix(this.V)||(this.V=new I(this.V)),this.V}get diagonalMatrix(){var t,r,e=this.n,n=this.e,i=this.d,o=new I(e,e);for(t=0;t0?o[t][t+1]=n[t]:n[t]<0&&(o[t][t-1]=n[t])}return o}}function $(t,r,e,n){var i,o;return Math.abs(e)>Math.abs(n)?[(t+(i=n/e)*r)/(o=e+i*n),(r-i*t)/o]:[((i=e/n)*t+r)/(o=n+i*e),(i*r-t)/o]}class Q{constructor(t){if(!(t=j.checkMatrix(t)).isSymmetric())throw new Error("Matrix is not symmetric");var r,e,n,i=t,o=i.rows,s=new I(o,o),a=!0;for(e=0;e0,s[e][e]=Math.sqrt(Math.max(u,0)),n=e+1;n=0;o--)for(i=0;i= weights[0]) { + return 0; + } + // Insert val at position zero weights[0] = weight; indices[0] = index; @@ -153,8 +178,8 @@ export function heapPush( let i = 0; let iSwap = 0; while (true) { - let ic1 = 2 * i + 1; - let ic2 = ic1 + 1; + const ic1 = 2 * i + 1; + const ic2 = ic1 + 1; const heapShape2 = heap[0][0].length; if (ic1 >= heapShape2) { @@ -290,3 +315,29 @@ function siftDown( } } } + +/** + * Search the heap for the smallest element that is still flagged. + */ +export function smallestFlagged(heap: Heap, row: number) { + const ind = heap[0][row]; + const dist = heap[1][row]; + const flag = heap[2][row]; + + let minDist = Infinity; + let resultIndex = -1; + + for (let i = 0; i > ind.length; i++) { + if (flag[i] === 1 && dist[i] < minDist) { + minDist = dist[i]; + resultIndex = i; + } + } + + if (resultIndex >= 0) { + flag[resultIndex] = 0; + return Math.floor(ind[resultIndex]); + } else { + return -1; + } +} diff --git a/src/matrix.ts b/src/matrix.ts index 6a59c0f..da384ef 100644 --- a/src/matrix.ts +++ b/src/matrix.ts @@ -50,7 +50,7 @@ export class SparseMatrix { // TODO: Assert that dims are legit. this.nRows = dims[0]; - this.nCols = dims[0]; + this.nCols = dims[1]; } private makeKey(row: number, col: number): string { @@ -186,6 +186,13 @@ export function subtract(a: SparseMatrix, b: SparseMatrix): SparseMatrix { return elementWise(a, b, (x, y) => x - y); } +/** + * Element-wise maximum of two matrices + */ +export function maximum(a: SparseMatrix, b: SparseMatrix): SparseMatrix { + return elementWise(a, b, (x, y) => (x > y ? x : y)); +} + /** * Scalar multiplication of two matrices */ @@ -323,3 +330,43 @@ function elementWise( const dims = [a.nRows, a.nCols]; return new SparseMatrix(rows, cols, vals, dims); } + +/** + * Helper function for getting data, indices, and inptr arrays from a sparse + * matrix to follow csr matrix conventions. Super inefficient (and kind of + * defeats the purpose of this convention) but a lot of the ported python tree + * search logic depends on this data format. + */ +export function getCSR(x: SparseMatrix) { + type Entry = { value: number; row: number; col: number }; + const entries: Entry[] = []; + + x.forEach((value, row, col) => { + entries.push({ value, row, col }); + }); + + entries.sort((a, b) => { + if (a.row === b.row) { + return a.col - b.col; + } else { + return a.row - b.col; + } + }); + + const indices: number[] = []; + const values: number[] = []; + const indptr: number[] = []; + + let currentRow = -1; + for (let i = 0; i < entries.length; i++) { + const { row, col, value } = entries[i]; + if (row !== currentRow) { + currentRow = row; + indptr.push(i); + } + indices.push(col); + values.push(value); + } + + return { indices, values, indptr }; +} diff --git a/src/nn_descent.ts b/src/nn_descent.ts index 4cb16ed..af162b0 100644 --- a/src/nn_descent.ts +++ b/src/nn_descent.ts @@ -58,6 +58,8 @@ */ import * as heap from './heap'; +import * as matrix from './matrix'; +import * as tree from './tree'; import * as utils from './utils'; import { Vectors, DistanceFn } from './umap'; @@ -143,3 +145,121 @@ export function makeNNDescent(distanceFn: DistanceFn, random: () => number) { return sorted; }; } + +export type InitFromRandomFn = ( + nNeighbors: number, + data: Vectors, + queryPoints: Vectors, + _heap: heap.Heap +) => void; + +export type InitFromTreeFn = ( + _tree: tree.FlatTree, + data: Vectors, + queryPoints: Vectors, + _heap: heap.Heap +) => void; + +export function makeInitializations(distanceFn: DistanceFn) { + function initFromRandom( + nNeighbors: number, + data: Vectors, + queryPoints: Vectors, + _heap: heap.Heap + ) { + for (let i = 0; i < queryPoints.length; i++) { + const indices = utils.rejectionSample(nNeighbors, data.length); + for (let j = 0; j < indices.length; j++) { + if (indices[j] < 0) { + continue; + } + const d = distanceFn(data[indices[j]], queryPoints[i]); + heap.heapPush(_heap, i, d, indices[j], 1); + } + } + } + + function initFromTree( + _tree: tree.FlatTree, + data: Vectors, + queryPoints: Vectors, + _heap: heap.Heap + ) { + for (let i = 0; i < queryPoints.length; i++) { + const indices = tree.searchFlatTree(queryPoints[i], _tree); + + for (let j = 0; j < indices.length; j++) { + if (indices[j] < 0) { + return; + } + const d = distanceFn(data[indices[j]], queryPoints[i]); + heap.heapPush(_heap, i, d, indices[j], 1); + } + } + return; + } + + return { initFromRandom, initFromTree }; +} + +export type SearchFn = ( + data: Vectors, + graph: matrix.SparseMatrix, + initialization: heap.Heap, + queryPoints: Vectors +) => heap.Heap; + +export function makeInitializedNNSearch(distanceFn: DistanceFn) { + return function nnSearchFn( + data: Vectors, + graph: matrix.SparseMatrix, + initialization: heap.Heap, + queryPoints: Vectors + ) { + const { indices, indptr } = matrix.getCSR(graph); + + for (let i = 0; i < queryPoints.length; i++) { + const tried = new Set(initialization[0][i]); + while (true) { + // Find smallest flagged vertex + const vertex = heap.smallestFlagged(initialization, i); + + if (vertex === -1) { + break; + } + const candidates = indices.slice(indptr[vertex], indptr[vertex + 1]); + for (const candidate of candidates) { + if ( + candidate === vertex || + candidate === -1 || + tried.has(candidate) + ) { + continue; + } + const d = distanceFn(data[candidate], queryPoints[i]); + heap.uncheckedHeapPush(initialization, i, d, candidate, 1); + tried.add(candidate); + } + } + } + return initialization; + }; +} + +export function initializeSearch( + forest: tree.FlatTree[], + data: Vectors, + queryPoints: Vectors, + nNeighbors: number, + initFromRandom: InitFromRandomFn, + initFromTree: InitFromTreeFn +) { + const results = heap.makeHeap(queryPoints.length, nNeighbors); + initFromRandom(nNeighbors, data, queryPoints, results); + if (forest) { + for (let tree of forest) { + initFromTree(tree, data, queryPoints, results); + } + } + return results; +} diff --git a/src/tree.ts b/src/tree.ts index c2a3e66..c73c9df 100644 --- a/src/tree.ts +++ b/src/tree.ts @@ -58,7 +58,7 @@ */ import * as utils from './utils'; -import { Vectors } from './umap'; +import { Vector, Vectors } from './umap'; /** * Tree functionality for approximating nearest neighbors @@ -339,3 +339,40 @@ export function makeLeafArray(rpForest: FlatTree[]): number[][] { return [[-1]]; } } + +/** + * Selects the side of the tree to search during flat tree search. + */ +function selectSide(hyperplane: number[], offset: number, point: Vector) { + let margin = offset; + for (let d = 0; d < point.length; d++) { + margin += hyperplane[d] * point[d]; + } + + if (margin === 0) { + const side = utils.tauRandInt(2); + return side; + } else if (margin > 0) { + return 0; + } else { + return 1; + } +} + +/** + * Searches a flattened rp-tree for a point. + */ +export function searchFlatTree(point: Vector, tree: FlatTree) { + let node = 0; + while (tree.children[node][0] > 0) { + const side = selectSide(tree.hyperplanes[node], tree.offsets[node], point); + if (side === 0) { + node = tree.children[node][0]; + } else { + node = tree.children[node][1]; + } + } + + const index = -1 * tree.children[node][0]; + return tree.indices[index]; +} diff --git a/src/umap.ts b/src/umap.ts index 3080670..503cee9 100644 --- a/src/umap.ts +++ b/src/umap.ts @@ -57,6 +57,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +import * as heap from './heap'; import * as matrix from './matrix'; import * as nnDescent from './nn_descent'; import * as tree from './tree'; @@ -77,6 +78,18 @@ const SMOOTH_K_TOLERANCE = 1e-5; const MIN_K_DIST_SCALE = 1e-3; export interface UMAPParameters { + /** + * The initial learning rate for the embedding optimization. + */ + learningRate?: number; + /** + * The local connectivity required -- i.e. the number of nearest + * neighbors that should be assumed to be connected at a local level. + * The higher this value the more connected the manifold becomes + * locally. In practice this should be not more than the local intrinsic + * dimension of the manifold. + */ + localConnectivity?: number; /** * The effective minimum distance between embedded points. Smaller values * will result in a more clustered/clumped embedding where nearby points @@ -91,7 +104,6 @@ export interface UMAPParameters { * provide easy visualization, but can reasonably be set to any * integer value in the range 2 to 100. */ - nComponents?: number; /** * The number of training epochs to be used in optimizing the @@ -99,7 +111,6 @@ export interface UMAPParameters { * embeddings. If None is specified a value will be selected based on * the size of the input dataset (200 for large datasets, 500 for small). */ - nEpochs?: number; /** * The size of local neighborhood (in terms of number of neighboring @@ -108,18 +119,46 @@ export interface UMAPParameters { * values result in more local data being preserved. In general * values should be in the range 2 to 100. */ - nNeighbors?: number; + /** + * The number of negative samples to select per positive sample + * in the optimization process. Increasing this value will result + * in greater repulsive force being applied, greater optimization + * cost, but slightly more accuracy. + */ + negativeSampleRate?: number; + /** + * Weighting applied to negative samples in low dimensional embedding + * optimization. Values higher than one will result in greater weight + * being given to negative samples. + */ + repulsionStrength?: number; /** * The pseudo-random number generator used by the stochastic parts of the * algorithm. */ random?: () => number; + /** + * Interpolate between (fuzzy) union and intersection as the set operation + * used to combine local fuzzy simplicial sets to obtain a global fuzzy + * simplicial sets. Both fuzzy set operations use the product t-norm. + * The value of this parameter should be between 0.0 and 1.0; a value of + * 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy + * intersection. + */ + setOpMixRatio?: number; /** * The effective scale of embedded points. In combination with ``min_dist`` * this determines how clustered/clumped the embedded points are. */ spread?: number; + /** + * For transform operations (embedding new points using a trained model) + * this will control how aggressively to search for nearest neighbors. + * Larger values will result in slower performance but more accurate + * nearest neighbor evaluation. + */ + transformQueueSize?: number; } export interface UMAPSupervisedParams { @@ -157,9 +196,7 @@ export interface UMAPSupervisedParams { * computationally intensive matrix eigen computations that aren't easily * ported to JavaScript. * b) A lot of "extra" functionality has been omitted from this implementation, - * most notably a great deal of alternate distance functions, the ability - * to do supervised projection, and the ability to transform additional data - * into an existing embedding space. + * most notably a great deal of alternate distance functions. * * This implementation provides three methods of reducing dimensionality: * 1) fit: fit the data synchronously @@ -169,12 +206,18 @@ export interface UMAPSupervisedParams { * step through each epoch of the SGD optimization */ export class UMAP { + private learningRate = 1.0; + private localConnectivity = 1.0; private minDist = 0.1; private nComponents = 2; private nEpochs = 0; private nNeighbors = 15; + private negativeSampleRate = 5; private random = Math.random; + private repulsionStrength = 1.0; + private setOpMixRatio = 1.0; private spread = 1.0; + private transformQueueSize = 4.0; // Supervised projection params private targetMetric = TargetMetric.categorical; @@ -191,6 +234,11 @@ export class UMAP { private graph!: matrix.SparseMatrix; private X!: Vectors; private isInitialized = false; + private rpForest: tree.FlatTree[] = []; + private initFromRandom!: nnDescent.InitFromRandomFn; + private initFromTree!: nnDescent.InitFromTreeFn; + private search!: nnDescent.SearchFn; + private searchGraph!: matrix.SparseMatrix; // Supervised projection labels / targets private Y?: number[]; @@ -200,12 +248,22 @@ export class UMAP { private optimizationState = new OptimizationState(); constructor(params: UMAPParameters = {}) { - this.minDist = params.minDist || this.minDist; - this.nComponents = params.nComponents || this.nComponents; - this.nEpochs = params.nEpochs || this.nEpochs; - this.nNeighbors = params.nNeighbors || this.nNeighbors; - this.random = params.random || this.random; - this.spread = params.spread || this.spread; + const setParam = (key: string) => { + if (params[key] !== undefined) this[key] = params[key]; + }; + + setParam('learningRate'); + setParam('localConnectivity'); + setParam('minDist'); + setParam('nComponents'); + setParam('nEpochs'); + setParam('nNeighbors'); + setParam('negativeSampleRate'); + setParam('random'); + setParam('repulsionStrength'); + setParam('setOpMixRatio'); + setParam('spread'); + setParam('transformQueueSize'); } /** @@ -228,7 +286,7 @@ export class UMAP { ) { this.initializeFit(X); - await this.optimizeLayout(callback); + await this.optimizeLayoutAsync(callback); return this.embedding; } @@ -270,7 +328,15 @@ export class UMAP { this.knnDistances = knnResults.knnDistances; } - this.graph = this.fuzzySimplicialSet(X, this.nNeighbors); + this.graph = this.fuzzySimplicialSet( + X, + this.nNeighbors, + this.setOpMixRatio + ); + + // Set up the search graph for subsequent transformation. + this.makeSearchFns(); + this.searchGraph = this.makeSearchGraph(X); // Check if supervised projection, then adjust the graph. this.processGraphForSupervisedProjection(); @@ -286,10 +352,146 @@ export class UMAP { this.optimizationState.tail = tail; this.optimizationState.epochsPerSample = epochsPerSample; + // Now, initialize the optimization steps + this.initializeOptimization(); + this.prepareForOptimizationLoop(); this.isInitialized = true; + return this.getNEpochs(); } + private makeSearchFns() { + const { initFromTree, initFromRandom } = nnDescent.makeInitializations( + this.distanceFn + ); + this.initFromTree = initFromTree; + this.initFromRandom = initFromRandom; + this.search = nnDescent.makeInitializedNNSearch(this.distanceFn); + } + + private makeSearchGraph(X: Vectors) { + const knnIndices = this.knnIndices!; + const knnDistances = this.knnDistances!; + const dims = [X.length, X.length]; + const searchGraph = new matrix.SparseMatrix([], [], [], dims); + for (let i = 0; i < knnIndices.length; i++) { + const knn = knnIndices[i]; + const distances = knnDistances[i]; + for (let j = 0; j < knn.length; j++) { + const neighbor = knn[j]; + const distance = distances[j]; + if (distance > 0) { + searchGraph.set(i, neighbor, distance); + } + } + } + + const transpose = matrix.transpose(searchGraph); + return matrix.maximum(searchGraph, transpose); + } + + /** + * Transforms data to the existing embedding space. + */ + transform(toTransform: Vectors) { + // Use the previous rawData + const rawData = this.X; + if (rawData === undefined || rawData.length === 0) { + throw new Error('No data has been fit.'); + } + + const nNeighbors = Math.floor(this.nNeighbors * this.transformQueueSize); + const init = nnDescent.initializeSearch( + this.rpForest, + rawData, + toTransform, + nNeighbors, + this.initFromRandom, + this.initFromTree + ); + + const result = this.search(rawData, this.searchGraph, init, toTransform); + + let { indices, weights: distances } = heap.deheapSort(result); + + indices = indices.map(x => x.slice(0, this.nNeighbors)); + distances = distances.map(x => x.slice(0, this.nNeighbors)); + + const adjustedLocalConnectivity = Math.max(0, this.localConnectivity - 1); + const { sigmas, rhos } = this.smoothKNNDistance( + distances, + this.nNeighbors, + adjustedLocalConnectivity + ); + + const { rows, cols, vals } = this.computeMembershipStrengths( + indices, + distances, + sigmas, + rhos + ); + + const size = [toTransform.length, rawData.length]; + let graph = new matrix.SparseMatrix(rows, cols, vals, size); + + // This was a very specially constructed graph with constant degree. + // That lets us do fancy unpacking by reshaping the csr matrix indices + // and data. Doing so relies on the constant degree assumption! + + const normed = matrix.normalize(graph, matrix.NormType.l1); + + const csrMatrix = matrix.getCSR(normed); + const nPoints = toTransform.length; + + const eIndices = utils.reshape2d( + csrMatrix.indices, + nPoints, + this.nNeighbors + ); + + const eWeights = utils.reshape2d( + csrMatrix.values, + nPoints, + this.nNeighbors + ); + + const embedding = initTransform(eIndices, eWeights, this.embedding); + + const nEpochs = this.nEpochs + ? this.nEpochs / 3 + : graph.nRows <= 10000 + ? 100 + : 30; + + const graphMax = graph + .getValues() + .reduce((max, val) => (val > max ? val : max), 0); + graph = graph.map(value => (value < graphMax / nEpochs ? 0 : value)); + graph = matrix.eliminateZeros(graph); + + const epochsPerSample = this.makeEpochsPerSample( + graph.getValues(), + nEpochs + ); + const head = graph.getRows(); + const tail = graph.getCols(); + + // Initialize optimization slightly differently than the fit method. + this.assignOptimizationStateParameters({ + headEmbedding: embedding, + tailEmbedding: this.embedding, + head, + tail, + currentEpoch: 0, + nEpochs, + nVertices: graph.getDims()[1], + epochsPerSample, + }); + this.prepareForOptimizationLoop(); + + return this.optimizeLayout(); + } + /** * Checks if we're using supervised projection, then process the graph * accordingly. @@ -318,10 +520,7 @@ export class UMAP { * Manually step through the optimization process one epoch at a time. */ step() { - const { currentEpoch, isInitialized } = this.optimizationState; - if (!isInitialized) { - this.initializeOptimization(); - } + const { currentEpoch } = this.optimizationState; if (currentEpoch < this.getNEpochs()) { this.optimizeLayoutStep(currentEpoch); @@ -354,9 +553,9 @@ export class UMAP { const nTrees = 5 + Math.floor(round(X.length ** 0.5 / 20.0)); const nIters = Math.max(5, Math.floor(Math.round(log2(X.length)))); - const rpForest = tree.makeForest(X, nNeighbors, nTrees, this.random); + this.rpForest = tree.makeForest(X, nNeighbors, nTrees, this.random); - const leafArray = tree.makeLeafArray(rpForest); + const leafArray = tree.makeLeafArray(this.rpForest); const { indices, weights } = metricNNDescent( X, leafArray, @@ -377,10 +576,9 @@ export class UMAP { private fuzzySimplicialSet( X: Vectors, nNeighbors: number, - localConnectivity = 1.0, setOpMixRatio = 1.0 ) { - const { knnIndices = [], knnDistances = [] } = this; + const { knnIndices = [], knnDistances = [], localConnectivity } = this; const { sigmas, rhos } = this.smoothKNNDistance( knnDistances, @@ -633,29 +831,28 @@ export class UMAP { } /** - * Initializes optimization state for stepwise optimization + * Assigns optimization state parameters from a partial optimization state. */ - private initializeOptimization() { - // Algorithm state - const headEmbedding = this.embedding; - const tailEmbedding = this.embedding; - - // Initialized in initializeSimplicialSetEmbedding() - const { head, tail, epochsPerSample } = this.optimizationState; + private assignOptimizationStateParameters(state: Partial) { + Object.assign(this.optimizationState, state); + } + /** + * Sets a few optimization state parameters that are necessary before entering + * the optimization step loop. + */ + private prepareForOptimizationLoop() { // Hyperparameters - const gamma = 1.0; - const initialAlpha = 1.0; - const negativeSampleRate = 5; + const { repulsionStrength, learningRate, negativeSampleRate } = this; - const nEpochs = this.getNEpochs(); - const nVertices = this.graph.nCols; - - const { a, b } = findABParams(this.spread, this.minDist); + const { + epochsPerSample, + headEmbedding, + tailEmbedding, + } = this.optimizationState; const dim = headEmbedding[0].length; const moveOther = headEmbedding.length === tailEmbedding.length; - let alpha = initialAlpha; const epochsPerNegativeSample = epochsPerSample.map( e => e / negativeSampleRate @@ -663,23 +860,42 @@ export class UMAP { const epochOfNextNegativeSample = [...epochsPerNegativeSample]; const epochOfNextSample = [...epochsPerSample]; - Object.assign(this.optimizationState, { - isInitialized: true, + this.assignOptimizationStateParameters({ + epochOfNextSample, + epochOfNextNegativeSample, + epochsPerNegativeSample, + moveOther, + initialAlpha: learningRate, + alpha: learningRate, + gamma: repulsionStrength, + dim, + }); + } + + /** + * Initializes optimization state for stepwise optimization. + */ + private initializeOptimization() { + // Algorithm state + const headEmbedding = this.embedding; + const tailEmbedding = this.embedding; + + // Initialized in initializeSimplicialSetEmbedding() + const { head, tail, epochsPerSample } = this.optimizationState; + + const nEpochs = this.getNEpochs(); + const nVertices = this.graph.nCols; + + const { a, b } = findABParams(this.spread, this.minDist); + + this.assignOptimizationStateParameters({ headEmbedding, tailEmbedding, head, tail, epochsPerSample, - epochOfNextSample, - epochOfNextNegativeSample, - epochsPerNegativeSample, - moveOther, - initialAlpha, - alpha, - gamma, a, b, - dim, nEpochs, nVertices, }); @@ -777,8 +993,7 @@ export class UMAP { optimizationState.alpha = initialAlpha * (1.0 - n / nEpochs); optimizationState.currentEpoch += 1; - this.embedding = headEmbedding; - return optimizationState.currentEpoch; + return headEmbedding; } /** @@ -788,18 +1003,15 @@ export class UMAP { * sampling edges based on their membership strength (with the (1-p) terms * coming from negative sampling similar to word2vec). */ - private optimizeLayout( + private optimizeLayoutAsync( epochCallback: (epochNumber: number) => void | boolean = () => true ): Promise { - if (!this.optimizationState.isInitialized) { - this.initializeOptimization(); - } - return new Promise((resolve, reject) => { const step = async () => { try { const { nEpochs, currentEpoch } = this.optimizationState; - const epochCompleted = this.optimizeLayoutStep(currentEpoch); + this.embedding = this.optimizeLayoutStep(currentEpoch); + const epochCompleted = this.optimizationState.currentEpoch; const shouldStop = epochCallback(epochCompleted) === false; const isFinished = epochCompleted === nEpochs; if (!shouldStop && !isFinished) { @@ -815,6 +1027,28 @@ export class UMAP { }); } + /** + * Improve an embedding using stochastic gradient descent to minimize the + * fuzzy set cross entropy between the 1-skeletons of the high dimensional + * and low dimensional fuzzy simplicial sets. In practice this is done by + * sampling edges based on their membership strength (with the (1-p) terms + * coming from negative sampling similar to word2vec). + */ + private optimizeLayout( + epochCallback: (epochNumber: number) => void | boolean = () => true + ): Vectors { + let isFinished = false; + let embedding: Vectors = []; + while (!isFinished) { + const { nEpochs, currentEpoch } = this.optimizationState; + embedding = this.optimizeLayoutStep(currentEpoch); + const epochCompleted = this.optimizationState.currentEpoch; + const shouldStop = epochCallback(epochCompleted) === false; + isFinished = epochCompleted === nEpochs || shouldStop; + } + return embedding; + } + /** * Gets the number of epochs for optimizing the projection. * NOTE: This heuristic differs from the python version @@ -873,7 +1107,6 @@ export function cosine(x: Vector, y: Vector) { */ class OptimizationState { currentEpoch = 0; - isInitialized = false; // Data tracked during optimization steps. headEmbedding: number[][] = []; @@ -989,3 +1222,28 @@ export function resetLocalConnectivity(simplicialSet: matrix.SparseMatrix) { ); return matrix.eliminateZeros(simplicialSet); } + +/** + * Given indices and weights and an original embeddings + * initialize the positions of new points relative to the + * indices and weights (of their neighbors in the source data). + */ +export function initTransform( + indices: number[][], + weights: number[][], + embedding: Vectors +) { + const result = utils + .zeros(indices.length) + .map(z => utils.zeros(embedding[0].length)); + + for (let i = 0; i < indices.length; i++) { + for (let j = 0; j < indices[0].length; j++) { + for (let d = 0; d < embedding[0].length; d++) { + const a = indices[i][j]; + result[i][d] += weights[i][j] * embedding[a][d]; + } + } + } + return result; +} diff --git a/src/utils.ts b/src/utils.ts index 6dba21d..6b67b16 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -57,7 +57,6 @@ export function tauRandInt(n: number, random = Math.random) { export function tauRand(random = Math.random) { return random(); } - /** * Compute the (standard l2) norm of a vector. */ @@ -154,3 +153,54 @@ export function max2d(input: number[][]): number { } return max; } + +/** + * Generate nSamples many integers from 0 to poolSize such that no + * integer is selected twice. The duplication constraint is achieved via + * rejection sampling. + */ +export function rejectionSample(nSamples: number, poolSize: number): number[] { + const result = zeros(nSamples); + for (let i = 0; i < nSamples; i++) { + let rejectSample = true; + while (rejectSample) { + const j = tauRandInt(poolSize); + let broken = false; + for (let k = 0; k < i; k++) { + if (j === result[k]) { + broken = true; + break; + } + } + if (!broken) { + rejectSample = false; + } + result[i] = j; + } + } + return result; +} + +/** + * Reshapes a 1d array into a 2D of given dimensions. + */ +export function reshape2d(x: T[], a: number, b: number): T[][] { + const rows: T[][] = []; + let count = 0; + let index = 0; + + if (x.length !== a * b) { + throw new Error('Array dimensions must match input length.'); + } + + for (let i = 0; i < a; i++) { + const col: T[] = []; + for (let j = 0; j < b; j++) { + col.push(x[index]); + index += 1; + } + rows.push(col); + count += 1; + } + return rows; +} diff --git a/test/matrix.test.ts b/test/matrix.test.ts index f8e3048..b7fbff8 100644 --- a/test/matrix.test.ts +++ b/test/matrix.test.ts @@ -24,10 +24,12 @@ import { pairwiseMultiply, add, subtract, + maximum, multiplyScalar, eliminateZeros, normalize, NormType, + getCSR, } from '../src/matrix'; describe('sparse matrix', () => { @@ -133,6 +135,12 @@ describe('helper methods', () => { expect(X.toArray()).toEqual([[0, 0], [0, 0]]); }); + test('element-wise maximum method', () => { + const I = multiplyScalar(identity([2, 2]), 8); + const X = maximum(A, I); + expect(X.toArray()).toEqual([[8, 2], [3, 8]]); + }); + test('scalar multiply method', () => { const X = multiplyScalar(A, 3); expect(X.toArray()).toEqual([[3, 6], [9, 12]]); @@ -197,4 +205,11 @@ describe('normalize method', () => { const n = normalize(A); expect(n.toArray()).toEqual(expected); }); + + test('getCSR function', () => { + const { indices, values, indptr } = getCSR(A); + expect(indices).toEqual([0, 1, 2, 0, 0, 1, 2, 1, 2]); + expect(values).toEqual([1, 2, 3, 7, 4, 5, 6, 8, 9]); + expect(indptr).toEqual([0, 3, 4, 7]); + }); }); diff --git a/test/nn_descent.test.ts b/test/nn_descent.test.ts index 84eb2a2..e6ae7b5 100644 --- a/test/nn_descent.test.ts +++ b/test/nn_descent.test.ts @@ -30,4 +30,19 @@ describe('umap nnDescent methods', () => { expect(nnDescentFn instanceof Function).toBe(true); }); + + test('returns an initialized nearest neighbors search function', () => { + const nnSearchFn = nnDescent.makeInitializedNNSearch(euclidean); + + expect(nnSearchFn instanceof Function).toBe(true); + }); + + test('returns initialization functions', () => { + const { initFromRandom, initFromTree } = nnDescent.makeInitializations( + euclidean + ); + + expect(initFromRandom instanceof Function).toBe(true); + expect(initFromTree instanceof Function).toBe(true); + }); }); diff --git a/test/test_data.ts b/test/test_data.ts index 6a0665a..8876cf5 100644 --- a/test/test_data.ts +++ b/test/test_data.ts @@ -118,10 +118,20 @@ export const testData: number[][] = [ [0,0,0,3,14,1,0,0,0,0,0,13,12,1,0,0,0,0,7,16,5,3,0,0,0,3,15,11,5,16,2,0,0,5,16,11,11,16,6,0,0,0,6,12,16,13,3,0,0,0,0,1,15,7,0,0,0,0,0,2,16,7,0,0], [0,2,15,16,16,13,2,0,0,1,10,8,14,16,8,0,0,0,0,0,16,15,1,0,0,0,0,0,16,8,0,0,0,0,0,0,14,14,0,0,0,0,0,0,11,16,1,0,0,2,14,13,16,16,3,0,0,2,15,16,14,5,0,0], [0,0,1,15,13,0,0,0,0,0,1,16,16,5,0,0,0,0,7,16,16,0,0,0,0,0,13,16,13,0,0,0,0,7,16,16,13,0,0,0,0,1,11,16,13,0,0,0,0,0,2,16,16,0,0,0,0,0,1,14,16,3,0,0] -] +]; + +export const additionalData = [ + [0, 0, 0, 2, 13, 0, 0, 0, 0, 0, 0, 8, 15, 0, 0, 0, 0, 0, 5, 16, 5, 2, 0, 0, 0, 0, 15, 12, 1, 16, 4, 0, 0, 4, 16, 2, 9, 16, 8, 0, 0, 0, 10, 14, 16, 16, 4, 0, 0, 0, 0, 0, 13, 8, 0, 0, 0, 0, 0, 0, 13, 6, 0, 0], + [0, 0, 1, 12, 5, 0, 0, 0, 0, 0, 9, 16, 14, 3, 0, 0, 0, 2, 16, 14, 11, 13, 0, 0, 0, 2, 16, 10, 0, 14, 4, 0, 0, 4, 16, 0, 0, 12, 4, 0, 0, 4, 16, 3, 0, 11, 10, 0, 0, 0, 13, 12, 8, 14, 6, 0, 0, 0, 3, 10, 16, 12, 1, 0], + [0, 0, 12, 16, 16, 8, 0, 0, 0, 3, 16, 13, 8, 5, 0, 0, 0, 2, 16, 3, 0, 0, 0, 0, 0, 0, 16, 13, 9, 0, 0, 0, 0, 0, 10, 16, 16, 7, 0, 0, 0, 0, 0, 1, 10, 13, 0, 0, 0, 0, 2, 11, 16, 10, 0, 0, 0, 0, 11, 16, 12, 0, 0, 0], +]; + +export const transformResult2d = [-0.11142072379623405, 1.6251469561184166]; export const testLabels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 9, 5, 5, 6, 5, 0, 9, 8, 9, 8, 4, 1, 7, 7, 3, 5, 1, 0, 0, 2, 2, 7, 8, 2, 0, 1, 2, 6, 3, 3, 7, 3, 3, 4, 6, 6, 6, 4, 9, 1, 5, 0, 9, 5, 2, 8, 2, 0, 0, 1, 7, 6, 3, 2, 1, 7, 4, 6, 3, 1, 3, 9, 1, 7, 6, 8, 4, 3, 1]; +export const additionalLabels = [4, 0, 5]; + export const testResults2D = [ [-2.904975618700953, 3.683494083841041], [-0.879124321765863, -0.4426951405143409], diff --git a/test/umap.test.ts b/test/umap.test.ts index 8ea0160..1e751c3 100644 --- a/test/umap.test.ts +++ b/test/umap.test.ts @@ -20,10 +20,13 @@ import { UMAP, findABParams, euclidean, TargetMetric } from '../src/umap'; import * as utils from '../src/utils'; import { + additionalData, + additionalLabels, testData, testLabels, testResults2D, testResults3D, + transformResult2d, } from './test_data'; import Prando from 'prando'; @@ -66,7 +69,6 @@ describe('UMAP', () => { test('UMAP step method', () => { const umap = new UMAP({ random }); - const nEpochs = umap.initializeFit(testData); for (let i = 0; i < nEpochs; i++) { @@ -148,47 +150,90 @@ describe('UMAP', () => { expect(diff(params.b, b)).toBeLessThanOrEqual(epsilon); }); - const computeMeanDistances = (vectors: number[][]) => { - return vectors.map(vector => { - return utils.mean( - vectors.map(other => { - return euclidean(vector, other); - }) - ); - }); - }; - - /** - * Check the ratio between distances within a cluster and for all points to - * indicate "clustering" - */ - const checkClusters = ( - embeddings: number[][], - labels: number[], - expectedClusterRatio: number - ) => { - const distances = computeMeanDistances(embeddings); - const overallMeanDistance = utils.mean(distances); - - const embeddingsByLabel = new Map(); - for (let i = 0; i < labels.length; i++) { - const label = labels[i]; - const embedding = embeddings[i]; - const group = embeddingsByLabel.get(label) || []; - group.push(embedding); - embeddingsByLabel.set(label, group); - } + test('transforms an additional point after fitting', () => { + const umap = new UMAP({ random, nComponents: 2 }); + const embedding = umap.fit(testData); + + const additional = additionalData[0]; + const transformed = umap.transform([additional]); + + const nearestIndex = getNearestNeighborIndex(embedding, transformed[0]); + const nearestLabel = testLabels[nearestIndex]; + expect(nearestLabel).toEqual(additionalLabels[0]); + }); + + test('transforms additional points after fitting', () => { + const umap = new UMAP({ random, nComponents: 2 }); + const embedding = umap.fit(testData); + + const transformed = umap.transform(additionalData); - let totalIntraclusterDistance = 0; - for (let label of embeddingsByLabel.keys()) { - const group = embeddingsByLabel.get(label)!; - const distances = computeMeanDistances(group); - const meanDistance = utils.mean(distances); - totalIntraclusterDistance += meanDistance * group.length; + for (let i = 0; i < transformed.length; i++) { + const nearestIndex = getNearestNeighborIndex(embedding, transformed[i]); + const nearestLabel = testLabels[nearestIndex]; + expect(nearestLabel).toEqual(additionalLabels[i]); } - const meanInterclusterDistance = - totalIntraclusterDistance / embeddings.length; - const clusterRatio = meanInterclusterDistance / overallMeanDistance; - expect(clusterRatio).toBeLessThan(expectedClusterRatio); - }; + }); }); + +function computeMeanDistances(vectors: number[][]) { + return vectors.map(vector => { + return utils.mean( + vectors.map(other => { + return euclidean(vector, other); + }) + ); + }); +} + +/** + * Check the ratio between distances within a cluster and for all points to + * indicate "clustering" + */ +function checkClusters( + embeddings: number[][], + labels: number[], + expectedClusterRatio: number +) { + const distances = computeMeanDistances(embeddings); + const overallMeanDistance = utils.mean(distances); + + const embeddingsByLabel = new Map(); + for (let i = 0; i < labels.length; i++) { + const label = labels[i]; + const embedding = embeddings[i]; + const group = embeddingsByLabel.get(label) || []; + group.push(embedding); + embeddingsByLabel.set(label, group); + } + + let totalIntraclusterDistance = 0; + for (let label of embeddingsByLabel.keys()) { + const group = embeddingsByLabel.get(label)!; + const distances = computeMeanDistances(group); + const meanDistance = utils.mean(distances); + totalIntraclusterDistance += meanDistance * group.length; + } + const meanInterclusterDistance = + totalIntraclusterDistance / embeddings.length; + const clusterRatio = meanInterclusterDistance / overallMeanDistance; + expect(clusterRatio).toBeLessThan(expectedClusterRatio); +} + +function getNearestNeighborIndex( + items: number[][], + otherPoint: number[], + distanceFn = euclidean +) { + const nearest = items.reduce( + (result, point, pointIndex) => { + const pointDistance = distanceFn(point, otherPoint); + if (pointDistance < result.distance) { + return { index: pointIndex, distance: pointDistance }; + } + return result; + }, + { index: 0, distance: Infinity } + ); + return nearest.index; +} diff --git a/test/utils.test.ts b/test/utils.test.ts index 2d2dd49..e50847b 100644 --- a/test/utils.test.ts +++ b/test/utils.test.ts @@ -91,4 +91,21 @@ describe('umap utils', () => { const results = utils.max2d([[1, 2, 3], [4, 5, 6]]); expect(results).toEqual(6); }); + + test('rejection sample', () => { + const results = utils.rejectionSample(5, 10); + const entries = new Set(); + for (const r of results) { + expect(entries.has(r)).toBe(false); + entries.add(r); + } + }); + + test('reshape2d function', () => { + const input = [1, 2, 3, 4, 5, 6]; + expect(utils.reshape2d(input, 2, 3)).toEqual([[1, 2, 3], [4, 5, 6]]); + expect(utils.reshape2d(input, 3, 2)).toEqual([[1, 2], [3, 4], [5, 6]]); + + expect(() => utils.reshape2d(input, 3, 3)).toThrow(); + }); });