Skip to content

Commit

Permalink
add ngt
Browse files Browse the repository at this point in the history
  • Loading branch information
masajiro committed Oct 27, 2023
1 parent 9b3a10a commit d7f8cf0
Show file tree
Hide file tree
Showing 4 changed files with 262 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/neurips23.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ jobs:
- algorithm: diskann
dataset: random-xs
track: ood
- algorithm: ngt
dataset: random-xs
track: ood
fail-fast: false

steps:
Expand Down
12 changes: 12 additions & 0 deletions neurips23/ood/ngt/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM neurips23

RUN apt-get update
RUN apt-get install -y git cmake liblapack-dev bc
RUN pip3 install wheel pybind11
RUN git clone https://github.com/masajiro/NGT-neurips23.git NGT
RUN cd NGT && git log -n 1
RUN cd NGT && mkdir build && cd build && cmake ..
RUN cd NGT/build && make -j 8 && make install
RUN ldconfig
RUN cd NGT/python && python3 setup.py bdist_wheel
RUN pip3 install NGT/python/dist/ngt-*-linux_x86_64.whl
34 changes: 34 additions & 0 deletions neurips23/ood/ngt/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
random-xs:
ngt:
docker-tag: neurips23-ood-ngt
module: neurips23.ood.ngt.module
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"edge": 50, "outdegree": 10, "indegree": 100,
"epsilon": 0.1, "reduction": 0.39}]
# "url": "https://public-rlab.east.edge.storage-yahoo.jp/neurips23/indexes/onng-random-50-10-100-0.10-0.39.tgz"}]
query-args: |
[{"epsilon": 1.1}]
text2image-10M:
ngt:
docker-tag: neurips23-ood-ngt
module: neurips23.ood.ngt.module
constructor: NGT
base-args: ["@metric"]
run-groups:
base:
args: |
[{"edge": 140, "outdegree": 10, "indegree": 180,
"epsilon": 0.1, "reduction": 0.39}]
# "url": "https://public-rlab.east.edge.storage-yahoo.jp/neurips23/indexes/onng-text2image-140-10-180-0.10-0.39.tgz"}]
query-args: |
[{"epsilon": 1.010},
{"epsilon": 1.014},
{"epsilon": 1.016},
{"epsilon": 1.017},
{"epsilon": 1.018},
{"epsilon": 1.020},
{"epsilon": 1.025}]
213 changes: 213 additions & 0 deletions neurips23/ood/ngt/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
import os
import subprocess
import time

from neurips23.ood.base import BaseOODANN
from benchmark.datasets import DATASETS, download_accelerated

import ngtpy

class NGT(BaseOODANN):
def __init__(self, metric, params):
metrics = {"euclidean": "2", "angular": "E", "ip": "i"}
self._params = params
self._edge_size = int(params["edge"])
self._outdegree = int(params["outdegree"])
self._indegree = int(params["indegree"])
self._metric = metrics[metric]
self._edge_size_for_search = int(params["search_edge"]) if "search_edge" in params.keys() else 0
self._build_time_limit = float(params["timeout"]) if "timeout" in params.keys() else 12
self._epsilon = float(params["epsilon"]) if "epsilon" in params.keys() else 0.1
self._reduction_range = float(params["reduction"]) if "reduction" in params.keys() else 1.8
print("ONNG: edge_size:", self._edge_size)
print("ONNG: outdegree:", self._outdegree)
print("ONNG: indegree=:", self._indegree)
print("ONNG: edge_size_for_search:", self._edge_size_for_search)
print("ONNG: epsilon:", self._epsilon)
print("ONNG: reduction range:", self._reduction_range)
print("ONNG: metric:", metric)

def get_title(self):
return "index-%s-%s-%s-%.2f-%.2f" % (
self._edge_size,
self._outdegree,
self._indegree,
self._epsilon,
self._reduction_range,
)

def set_index_path(self, dataset):
self._index_dir = os.path.join("data", "indices", "ood", "ngt", self.get_title())
self._index_path = os.path.join(self._index_dir, "onng")
self._sanng_path = os.path.join(self._index_dir, "sanng")
self._anng_path = os.path.join(self._index_dir, "anng-" + str(self._edge_size))

def fit(self, dataset):
print("ONNG: start indexing...")
ds = DATASETS[dataset]()
print("ONNG: dataset:", dataset)
print("ONNG: dataset str:", ds.__str__())
print("ONNG: distance:", ds.distance())
print("ONNG: dimension:", ds.d)
print("ONNG: type:", ds.dtype)
print("ONNG: nb:", ds.nb)
print("ONNG: dataset file name:", ds.get_dataset_fn())
print("ONNG: index path:", self._index_path)
self.set_index_path(dataset)
if not os.path.exists(self._index_dir):
os.makedirs(self._index_dir)
print("ONNG: index:", self._index_path)
dim = ds.d
if (not os.path.exists(self._index_path)) and (not os.path.exists(self._sanng_path)):
print("ONNG: create a sparse ANNG to optimize the graph.")
t = time.time()
args = [
"ngt",
"create",
"-v",
"-it",
"-p8",
"-b500",
"-ga",
"-of",
"-D" + self._metric,
"-d" + str(dim),
"-E5",
"-S-2",
"-e0.0",
"-P0",
"-B30",
"-T0",
self._sanng_path,
]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: append for SANNG")
args = ["ngt", "append", "-mb",
"-n" + str(ds.nb),
self._sanng_path,
ds.get_dataset_fn()]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: SANNG appending time(sec)=" + str(time.time() - t))
print("ONNG: build a sparse ANNG index.")
t = time.time()
args = ["ngt", "construct-graph", "-v", "-G-", "-E0.0", "-S100",
self._sanng_path]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: SANNG index build time(sec)=", str(time.time() - t))

if (not os.path.exists(self._index_path)) and (not os.path.exists(self._anng_path)):
print("ONNG: build ANNG")
t = time.time()
args = [
"ngt",
"create",
"-v",
"-it",
"-p8",
"-b20",
"-ga",
"-of",
"-D" + self._metric,
"-d" + str(dim),
"-E18",
"-S" + str(self._edge_size_for_search),
"-e" + str(self._epsilon),
"-P0",
"-B30",
"-T" + str(self._build_time_limit),
self._anng_path,
]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: degree adjustment")
t = time.time()
args = [
"ngt",
"construct-graph",
"-v",
"-Go",
"-T0",
"-P0",
"-N" + str(self._edge_size),
"-O" + str(self._outdegree),
"-I" + str(self._indegree),
self._anng_path,
self._sanng_path,
]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: degree ajustment time(sec)=" + str(time.time() - t))
if not os.path.exists(self._index_path):
print("ONNG: shortcut reduction")
t = time.time()
args = [
"ngt",
"reconstruct-graph",
"-v",
"-R" + str(self._reduction_range),
"-mS",
"-Ps",
"-sp",
"-o0",
"-i0",
self._anng_path,
self._index_path,
]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
print("ONNG: shortcut reduction time(sec)=" + str(time.time() - t))
if os.path.exists(self._index_path):
print("ONNG: index already exists!", self._index_path)
t = time.time()
self.index = ngtpy.Index(self._index_path, read_only=True, tree_disabled=False)
self.indexName = self._index_path
print("ONNG: open time(sec)=" + str(time.time() - t))
else:
print("ONNG: something wrong...")
print("ONNG: end of fit")

def load_index(self, dataset):
self.set_index_path(dataset)
if not os.path.exists(self._index_path + "/grp"):
if "url" not in self._params:
return False
if not os.path.exists(self._index_dir):
os.makedirs(self._index_dir)
tar_file = self._index_path + ".tgz";
if not os.path.exists(tar_file):
print("ONNG: downloading the index... index={} => {}".format(self._params["url"], self._index_path))
download_accelerated(self._params["url"], tar_file, quiet=True)
args = ["tar", "zxf", tar_file, "-C", self._index_dir]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
args = ["rm", "-r", tar_file]
print("ONNG: '{}'".format(" ".join(args)))
subprocess.run(args, check=True)
os.makedirs(self._sanng_path)
os.makedirs(self._anng_path)

def set_query_arguments(self, query_args):
epsilon = query_args.get("epsilon", 1.0)
edge_size = query_args.get("edge", 0)
print("ONNG: edge_size:", edge_size)
print("ONNG: epsilon:", epsilon)
self.name = "ngt-onng(%s, %s, %s, %s, %s)" % (
self._edge_size,
self._outdegree,
self._indegree,
self._reduction_range,
epsilon,
)
epsilon = epsilon - 1.0
self.index.set(epsilon=epsilon, edge_size=edge_size)

def query(self, X, n):
self._results = ngtpy.BatchResults()
return self.index.batch_search(X, self._results, n, with_distance=False)

def get_results(self):
return self._results.get_ids()

0 comments on commit d7f8cf0

Please sign in to comment.