Skip to content

Commit

Permalink
add NGT to T1 and T2 (#66)
Browse files Browse the repository at this point in the history
* add NGT

* add random-xs

* add ngt entries to benchmarks.yml

* add prebuilt indices

* update algos.yaml for bigann and deep

---------

Co-authored-by: Harsha Vardhan Simhadri <harsha-simhadri@users.noreply.github.com>
  • Loading branch information
masajiro and harsha-simhadri authored Apr 6, 2023
1 parent 80e6293 commit 30adce6
Show file tree
Hide file tree
Showing 5 changed files with 468 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ jobs:
- algorithm: httpann_example
dataset: random-range-xs
library: httpann_example
- algorithm: ngt-t1
dataset: random-range-xs
library: ngt
- algorithm: ngt-t2
dataset: random-range-xs
library: ngt
- algorithm : kst_ann_t1
dataset: random-xs
library: kst_ann_t1
Expand Down
205 changes: 205 additions & 0 deletions algos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,30 @@ random-xs:
args:
- [ 0.2, 0.8, 1.0 ]
query-args: [ ]
ngt-t1:
# ngt-t1: random-xs
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t1
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 10, "s": 600, "b": 10000, "rs": 1000, "ri": 100, "rx": 10}]
query-args: |
[{"epsilon":0.10, "edge":0, "blob":500}]
ngt-t2:
# ngt-t2: random-xs
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t1
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 10, "s": 600, "b": 10000, "rs": 1000, "ri": 100, "rx": 10}]
query-args: |
[{"epsilon":0.10, "edge":0, "blob":500, "expansion":2.0}]
bbann:
docker-tag: billion-scale-benchmark-bbann
module: benchmark.algorithms.bbann
Expand Down Expand Up @@ -429,6 +453,49 @@ deep-1B:
{"Ls":70, "BW":4, "T":16},
{"Ls":80, "BW":4, "T":16},
{"Ls":100, "BW":4, "T":16}]
ngt-t1:
# ngt-t1: deep-1B
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t1
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 1000000, "s": 100, "b": 1000000, "rs": 1000000, "ri": 600, "rx": 10,
"index": "https://ngtf-rlab.east.edge.storage-yahoo.jp/neurips21/indices/1128/Deep1BDataset-1000000000.tar"}]
query-args: |
[{"epsilon":0.10, "edge":600, "blob":200},
{"epsilon":0.15, "edge":600, "blob":200},
{"epsilon":0.20, "edge":600, "blob":200},
{"epsilon":0.24, "edge":600, "blob":200},
{"epsilon":0.26, "edge":600, "blob":200},
{"epsilon":0.28, "edge":600, "blob":200},
{"epsilon":0.30, "edge":600, "blob":200},
{"epsilon":0.32, "edge":600, "blob":200},
{"epsilon":0.35, "edge":600, "blob":200},
{"epsilon":0.40, "edge":600, "blob":200}]
ngt-t2:
# ngt-t2: deep-1B
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t2
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 1000000, "s": 100, "b": 1000000, "rs": 1000000, "ri": 600, "rx": 10,
"index": "https://ngtf-rlab.east.edge.storage-yahoo.jp/neurips21/indices/1128/Deep1BDataset-1000000000.tar"}]
query-args: |
[{"epsilon":0.16, "edge":100, "blob":500, "expansion":4.0},
{"epsilon":0.16, "edge":100, "blob":500, "expansion":5.0},
{"epsilon":0.16, "edge":100, "blob":500, "expansion":6.0},
{"epsilon":0.16, "edge":100, "blob":500, "expansion":7.0},
{"epsilon":0.16, "edge":100, "blob":500, "expansion":8.0},
{"epsilon":0.16, "edge":100, "blob":500, "expansion":9.0},
{"epsilon":0.16, "edge":100, "blob":500, "expansion":10.0},
{"epsilon":0.16, "edge":100, "blob":500, "expansion":11.0},
{"epsilon":0.16, "edge":100, "blob":500, "expansion":12.0}]
msspacev-1B:
kst_ann_t1:
docker-tag: billion-scale-benchmark-kst_ann_t1
Expand Down Expand Up @@ -537,6 +604,51 @@ msspacev-1B:
{"Ls":110, "BW":4, "T":16},
{"Ls":120, "BW":4, "T":16},
{"Ls":130, "BW":4, "T":16}]
ngt-t1:
# ngt-t1: msspacev-1B
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t1
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 400000, "s": 500, "b": 1000000, "rs": 1000000, "ri": 600, "rx": 10,
"index": "https://ngtf-rlab.east.edge.storage-yahoo.jp/neurips21/indices/1115/MSSPACEV1B-1000000000.tar"}]
query-args: |
[{"epsilon":0.10, "edge":0, "blob":500},
{"epsilon":0.15, "edge":0, "blob":500},
{"epsilon":0.18, "edge":0, "blob":500},
{"epsilon":0.20, "edge":0, "blob":500},
{"epsilon":0.22, "edge":0, "blob":500},
{"epsilon":0.24, "edge":0, "blob":500},
{"epsilon":0.26, "edge":0, "blob":500},
{"epsilon":0.30, "edge":0, "blob":500},
{"epsilon":0.32, "edge":0, "blob":500},
{"epsilon":0.34, "edge":0, "blob":500},
{"epsilon":0.36, "edge":0, "blob":500}]
ngt-t2:
# ngt-t2: msspacev-1B
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t2
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 400000, "s": 500, "b": 1000000, "rs": 1000000, "ri": 600, "rx": 10,
"index": "https://ngtf-rlab.east.edge.storage-yahoo.jp/neurips21/indices/1115/MSSPACEV1B-1000000000.tar"}]
query-args: |
[{"epsilon":0.18, "edge":0, "blob":500, "expansion":2},
{"epsilon":0.18, "edge":0, "blob":500, "expansion":3},
{"epsilon":0.18, "edge":0, "blob":500, "expansion":4},
{"epsilon":0.18, "edge":0, "blob":500, "expansion":5},
{"epsilon":0.18, "edge":0, "blob":500, "expansion":6},
{"epsilon":0.18, "edge":0, "blob":500, "expansion":7},
{"epsilon":0.18, "edge":0, "blob":500, "expansion":8},
{"epsilon":0.18, "edge":0, "blob":500, "expansion":9},
{"epsilon":0.18, "edge":0, "blob":500, "expansion":10}]
bbann:
docker-tag: billion-scale-benchmark-bbann
module: benchmark.algorithms.bbann
Expand Down Expand Up @@ -691,6 +803,49 @@ msturing-1B:
{"Ls":70, "BW":4, "T":16},
{"Ls":80, "BW":4, "T":16},
{"Ls":100, "BW":4, "T":16}]
ngt-t1:
# ngt-t1: msturing-1B:
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t1
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 400000, "s": 200, "b": 1000000, "rs": 1000000, "ri": 600, "rx": 10,
"index": "https://ngtf-rlab.east.edge.storage-yahoo.jp/neurips21/indices/1112/MSTuringANNS-1000000000.tar"}]
query-args: |
[{"epsilon":0.10, "edge":0, "blob":500},
{"epsilon":0.12, "edge":0, "blob":500},
{"epsilon":0.14, "edge":0, "blob":500},
{"epsilon":0.16, "edge":0, "blob":500},
{"epsilon":0.18, "edge":0, "blob":500},
{"epsilon":0.20, "edge":0, "blob":500},
{"epsilon":0.22, "edge":0, "blob":500},
{"epsilon":0.24, "edge":0, "blob":500},
{"epsilon":0.26, "edge":0, "blob":500},
{"epsilon":0.30, "edge":0, "blob":500}]
ngt-t2:
# ngt-t2: msturing-1B:
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t2
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 400000, "s": 200, "b": 1000000, "rs": 1000000, "ri": 600, "rx": 10,
"index": "https://ngtf-rlab.east.edge.storage-yahoo.jp/neurips21/indices/1112/MSTuringANNS-1000000000.tar"}]
query-args: |
[{"epsilon":0.20, "edge":0, "blob":500, "expansion":1.0},
{"epsilon":0.20, "edge":0, "blob":500, "expansion":1.2},
{"epsilon":0.20, "edge":0, "blob":500, "expansion":1.4},
{"epsilon":0.20, "edge":0, "blob":500, "expansion":1.6},
{"epsilon":0.20, "edge":0, "blob":500, "expansion":1.8},
{"epsilon":0.20, "edge":0, "blob":500, "expansion":2.0},
{"epsilon":0.20, "edge":0, "blob":500, "expansion":2.2},
{"epsilon":0.20, "edge":0, "blob":500, "expansion":2.4},
{"epsilon":0.20, "edge":0, "blob":500, "expansion":2.6}]
bigann-1B:
kst_ann_t1:
docker-tag: billion-scale-benchmark-kst_ann_t1
Expand Down Expand Up @@ -847,6 +1002,56 @@ bigann-1B:
{"Ls":70, "BW":4, "T":16},
{"Ls":80, "BW":4, "T":16},
{"Ls":100, "BW":4, "T":16}]
ngt-t1:
# ngt-t1: bigann-1B
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t1
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 500000, "s": 500, "b": 1000000, "rs": 1000000, "ri": 600, "rx": 10,
"index": "https://ngtf-rlab.east.edge.storage-yahoo.jp/neurips21/indices/1119/BigANNDataset-1000000000.tar"}]
query-args: |
[{"epsilon":0.10, "edge":600, "blob":200},
{"epsilon":0.20, "edge":600, "blob":200},
{"epsilon":0.25, "edge":600, "blob":200},
{"epsilon":0.27, "edge":600, "blob":200},
{"epsilon":0.29, "edge":600, "blob":200},
{"epsilon":0.30, "edge":600, "blob":200},
{"epsilon":0.31, "edge":600, "blob":200},
{"epsilon":0.32, "edge":600, "blob":200},
{"epsilon":0.33, "edge":600, "blob":200},
{"epsilon":0.34, "edge":600, "blob":200},
{"epsilon":0.35, "edge":600, "blob":200},
{"epsilon":0.40, "edge":600, "blob":200},
{"epsilon":0.45, "edge":600, "blob":200},
{"epsilon":0.50, "edge":600, "blob":200}]
ngt-t2:
# ngt-t2: bigann-1B
docker-tag: billion-scale-benchmark-ngt
module: benchmark.algorithms.ngt_t2
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"q": 500000, "s": 500, "b": 1000000, "rs": 1000000, "ri": 600, "rx": 10,
"index": "https://ngtf-rlab.east.edge.storage-yahoo.jp/neurips21/indices/1119/BigANNDataset-1000000000.tar"}]
query-args: |
[{"epsilon":0.18, "edge":600, "blob":1000, "expansion":2},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":3},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":4},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":5},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":6},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":7},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":8},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":9},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":10},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":11},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":12},
{"epsilon":0.18, "edge":600, "blob":1000, "expansion":13}]
ssnpp-1B:
kota-t2:
docker-tag: billion-scale-benchmark-kota
Expand Down
122 changes: 122 additions & 0 deletions benchmark/algorithms/ngt_t1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from __future__ import absolute_import
import numpy as np
import os
import time
import subprocess
from benchmark.algorithms.base import BaseANN
from benchmark.datasets import DATASETS, download_accelerated

import ngtpy

class NGT(BaseANN):
def __init__(self, metric, params):
self._params = params
self._metric = metric
self._quantization = params.get("q", 1024)
self._quantization_sample = params.get("s", 100)
self._blob = params.get("b", 10000)
self._num_of_r_samples = params.get("rs", 1000)
self._num_of_r_iterations = params.get("ri", 10)
self._r_step = params.get("rx", 2)
#self._ngt_root = "data/ngt" # debug
self._ngt_root = "ngt"
self._ngt_index_root = self._ngt_root + "/indexes/"
self._is_open = False

def setIndexPath(self, dataset):
self._path = f"data/indices/trackT1/algo.NGT:q{self._quantization}-s{self._quantization_sample}-b{self._blob}-rs{self._num_of_r_samples}-ri{self._num_of_r_iterations}"
os.makedirs(self._path, exist_ok=True)
self._index_path = os.path.join(self._path, DATASETS[dataset]().short_name())

def fit(self, dataset):
index_params = self._params
ds = DATASETS[dataset]()
if ds.d <= 128:
pseudo_dimension = 128
subvector_dimension = 2
elif ds.d <= 256:
pseudo_dimension = 256
subvector_dimension = 4
self.setIndexPath(dataset)
print("NGT dataset:", dataset)
print("NGT dataset str:", ds.__str__())
print("NGT distance:", ds.distance())
print("NGT dimension:", ds.d)
print("NGT type:", ds.dtype)
print("NGT nb:", ds.nb)
print("NGT dataset file name:", ds.get_dataset_fn())
print("NGT quantization (q):", self._quantization)
print("NGT quantization sample (s):", self._quantization_sample)
print("NGT blob (b):", self._blob)
print("NGT # of r samples (rs):", self._num_of_r_samples)
print("NGT # of r iterations (rs):", self._num_of_r_iterations)
print("NGT ngt root:", self._ngt_root)
print("NGT index path:", self._index_path)
print("NGT build.sh:", self._ngt_root + '/build.sh')
args = ['/bin/bash', self._ngt_root + '/build.sh',
#'--search',
'--root=' + self._ngt_root,
'--object=' + ds.get_dataset_fn(),
'--benchmark=' + self._index_path, '-d=' + str(pseudo_dimension),
'-D=' + str(subvector_dimension),
'-f', '-m=KMEANS_QUANTIZATION', '-n=' + str(ds.nb),
'-q=' + str(self._quantization), '-E=' + str(self._quantization_sample),
'-b=' + str(self._blob),
'-r=' + str(self._num_of_r_samples), '-R=' + str(self._num_of_r_iterations),
'-X=' + str(self._r_step)]
print(args)
subprocess.call(args)

if not self._is_open:
print("NGT: opening the index...")
self._index = ngtpy.QuantizedBlobIndex(self._index_path)
self._is_open = True

def load_index(self, dataset):
self.setIndexPath(dataset)

if not os.path.exists(self._index_path + "/grp"):
if "index" not in self._params:
return False
if not os.path.exists(self._index_path + ".tar"):
print(f"NGT: dowinloading the index... index={self._params['index']}->{self._index_path}")
download_accelerated(self._params["index"], self._index_path + ".tar", quiet=True)
args = ['tar', 'xf', self._index_path + ".tar", "-C", self._path]
print(args)
subprocess.call(args)
args = ['rm', '-r', self._index_path + ".tar"]
print(args)
subprocess.call(args)

if not self._is_open:
print("NGT: opening the index...")
self._index = ngtpy.QuantizedBlobIndex(self._index_path)
self._is_open = True

def set_query_arguments(self, query_args):
self._epsilon = query_args.get("epsilon", 0.1)
self._edge_size = query_args.get("edge", 0)
self._exploration_size = query_args.get("blob", 120)
# only this part is different between t1 and t2
#self._exact_result_expansion = query_args.get("expansion", 2.0)
self._exact_result_expansion = 0.0
self._index.set(epsilon=self._epsilon, blob_epsilon=0.0, edge_size=self._edge_size,
exploration_size=self._exploration_size,
exact_result_expansion=self._exact_result_expansion)
print(f"NGT: epsilon={self._epsilon} edge={self._edge_size} blob={self._exploration_size}")

def query(self, X, n):
self._results = ngtpy.BatchResults()
self._index.batchSearch(X, self._results, n)

def range_query(self, X, radius):
print("NGT: range_query")

def get_results(self):
return self._results.getIDs()

def get_range_results(self):
return self._results.getIndex(), self._results.getIndexedIDs(), self._results.getIndexedDistances()

def __str__(self):
return f"NGT-T1:q{self._quantization}-b{self._blob}-rs{self._num_of_r_samples}-ri{self._num_of_r_iterations}-e{self._epsilon:.3f}-b{self._exploration_size}"
Loading

0 comments on commit 30adce6

Please sign in to comment.