Skip to content

Commit

Permalink
add Puck algorithm for track 1. (#60)
Browse files Browse the repository at this point in the history
* add Puck algorithm for track 1.

* Update benchmarks.yml

* Update algos.yaml

* Update algos.yaml

* Update algos.yaml

* download 5 index files one by one

* Update algos.yaml

* Update algos.yaml

text2images, Increase compression ratio

* Update puck_t1.py

* Update puck_t1.py

* Update puck_t1.py

* Update algos.yaml

add msturing-1B

* Update algos.yaml

* Update algos.yaml

---------

Co-authored-by: Harsha Vardhan Simhadri <harsha-simhadri@users.noreply.github.com>
  • Loading branch information
nk2014yj and harsha-simhadri authored Apr 6, 2023
1 parent 30adce6 commit e5cd244
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ jobs:
- algorithm: faiss-t1
dataset: random-xs
library: faissconda
- algorithm: puck-t1
dataset: random-xs
library: puck
- algorithm: faiss-t1
dataset: random-range-xs
library: faissconda
Expand Down
113 changes: 113 additions & 0 deletions algos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,20 @@ random-xs:
args:
- [ 0.2, 0.8, 1.0 ]
query-args: [ ]
puck-t1:
docker-tag: billion-scale-benchmark-puck
module: benchmark.algorithms.puck_t1
constructor: Puck
base-args: ["@metric"]
run-groups:
base:
args: |
[{"indexkey": "C10_F10_F5_FN20",
"url": "https://gips-test-bucket-0-gz.gz.bcebos.com/similar/random-xs.C10_F10_F5_FN20.puckindex"}]
query-args: |
[
"10,20,2000,200",
"10,30,3000,200"]
ngt-t1:
# ngt-t1: random-xs
docker-tag: billion-scale-benchmark-ngt
Expand Down Expand Up @@ -263,6 +277,7 @@ random-xs:
[{"hnswM":32, "hnswefC":200, "K1":2, "blockSize":4096, "identifier": "bbann"}]
query-args: |
[{"nProbe":5, "efSearch":100, "K1":2, "blockSize":4096, "identifier": "bbann"}]
deep-10M:
faiss-t1:
docker-tag: billion-scale-benchmark-faissconda
Expand Down Expand Up @@ -409,6 +424,32 @@ deep-1B:
"nprobe=128,quantizer_efSearch=512",
"nprobe=256,quantizer_efSearch=64",
"nprobe=256,quantizer_efSearch=128"]
puck-t1:
docker-tag: billion-scale-benchmark-puck
module: benchmark.algorithms.puck_t1
constructor: Puck
base-args: ["@metric"]
run-groups:
base:
args: |
[{"indexkey": "C10000_F5000_FN16_N32",
"url": "https://gips-test-bucket-0-gz.gz.bcebos.com/similar/deep-1B.puckindex"}]
query-args: |
[
"10,20,12000,200",
"10,30,13000,200",
"10,30,14000,200",
"10,30,15000,200",
"10,40,19000,200",
"10,30,20000,300",
"10,30,21000,200",
"10,40,30000,200",
"10,90,140000,1100",
"10,100,200000,3000"
]
team11:
docker-tag: billion-scale-benchmark-faissconda
module: benchmark.algorithms.faiss_t1_plus
Expand Down Expand Up @@ -757,6 +798,28 @@ msturing-1B:
"nprobe=128,quantizer_efSearch=512",
"nprobe=256,quantizer_efSearch=256",
"nprobe=256,quantizer_efSearch=512"]
puck-t1:
docker-tag: billion-scale-benchmark-puck
module: benchmark.algorithms.puck_t1
constructor: Puck
base-args: ["@metric"]
run-groups:
base:
args: |
[{"indexkey": "C8000_F5000_FN13_N34",
"url": "https://gips-test-bucket-0-gz.gz.bcebos.com/similar/msturing-1B.puckindex"}]
query-args: |
[
"10,40,19000,200",
"10,60,19000,200",
"10,80,19000,600",
"10,30,20000,200",
"10,50,20000,300",
"10,20,22000,300",
"10,60,23000,200",
"10,70,24000,300",
"10,70,25000,300",
"10,90,100000,900"]
team11:
docker-tag: billion-scale-benchmark-faissconda
module: benchmark.algorithms.faiss_t1_plus
Expand Down Expand Up @@ -934,6 +997,30 @@ bigann-1B:
"nprobe=256,quantizer_efSearch=64",
"nprobe=256,quantizer_efSearch=128",
"nprobe=256,quantizer_efSearch=512"]
puck-t1:
docker-tag: billion-scale-benchmark-puck
module: benchmark.algorithms.puck_t1
constructor: Puck
base-args: ["@metric"]
run-groups:
base:
args: |
[{"indexkey": "C10000_F5000_FN16_N32",
"url": "https://gips-test-bucket-0-gz.gz.bcebos.com/similar/bigann-1B.puckindex"
}]
query-args: |
[
"10,30,11000,100",
"10,30,12000,100",
"10,20,13000,200",
"10,30,13000,200",
"10,30,14000,200",
"10,30,15000,100",
"10,30,19000,200",
"10,30,20000,300",
"10,90,200000,1400",
"10,100,230000,1300"
]
buddy-t1:
docker-tag: billion-scale-benchmark-faissconda
module: benchmark.algorithms.buddy_t1
Expand Down Expand Up @@ -1195,6 +1282,32 @@ text2image-1B:
"nprobe=128,quantizer_efSearch=512,ht=256",
"nprobe=256,quantizer_efSearch=512,ht=120",
"nprobe=256,quantizer_efSearch=512,ht=122"]
puck-t1:
docker-tag: billion-scale-benchmark-puck
module: benchmark.algorithms.puck_t1
constructor: Puck
base-args: ["@metric"]
run-groups:
base:
args: |
[{"indexkey": "C10000_F5000_FN13_N34",
"url": "https://gips-test-bucket-0-gz.gz.bcebos.com/similar/text2image-1B.puckindex"
}]
query-args: |
[
"10,10,6000,500",
"10,10,8000,500",
"10,20,9000,700",
"10,20,10000,1000",
"10,30,11000,1000",
"10,30,12000,800",
"10,30,14000,600",
"10,20,19000,700",
"10,20,20000,1000",
"10,20,20000,2000"
]
diskann-t2:
docker-tag: billion-scale-benchmark-diskann
module: benchmark.algorithms.diskann-t2
Expand Down
92 changes: 92 additions & 0 deletions benchmark/algorithms/puck_t1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#-*- coding:utf-8 -*-
################################################################################
#
# Copyright (c) 2021 Baidu.com, Inc. All Rights Reserved
#
################################################################################
"""
@file: puck_t1.py
@author: yinjie06(yinjie06@baidu.com)
@date: 2021-10-06 13:44
@brief:
"""
from benchmark.algorithms.base import BaseANN
from benchmark.algorithms.base import BaseANN
from benchmark.datasets import DATASETS, download_accelerated
from puck import py_puck_api
import os
import numpy as np
import time

swig_ptr = py_puck_api.swig_ptr
class Puck(BaseANN):
def __init__(self, metric, index_params):
self._index_params = index_params
self._metric = metric
self._query_bs = -1
self.indexkey = index_params.get("indexkey", "NA")

if 'query_bs' in index_params:
self._query_bs = index_params['query_bs']
self.index = py_puck_api.PySearcher()
self.topk = 10
self.n = 0

def track(self):
return "T1"

def fit(self, dataset):
print("Puck provide the index-data and the Docker image for search. We will open Puck to open source community at the end of this year.")

def index_name(self, name):
return f"data/{name}.{self.indexkey}.puckindex"

def index_tag_name(self, name):
return f"{name}.{self.indexkey}.puckindex"

def load_index(self, dataset):
index_components = ["filer_data.dat","GNOIMI_coarse.dat","GNOIMI_fine.dat","index.dat","learn_assign.dat"]
############ download index && update links
print(self.index_name(dataset))
if not os.path.exists(self.index_name(dataset)):
if 'url' not in self._index_params:
return False
#5 index files will be downloaded in this lib
index_dir = os.path.join(os.getcwd(), self.index_name(dataset))
print(index_dir)
os.makedirs(index_dir, mode=0o777, exist_ok=True)
print('Downloading index in background. This can take a while.')
for component in index_components:
download_accelerated(self._index_params['url']+"_"+component, self.index_name(dataset)+"/"+component, quiet=True)
time.sleep(60)
print("Loading index")
index_tag = self.index_tag_name(dataset)
cmd = " ln -s %s ./puck_index"%(self.index_name(dataset))
print(cmd)
os.system(cmd)
cmd = " ls -al puck_index/"
os.system(cmd)
self.index.init()
self.index.show()
ds = DATASETS[dataset]()
self.n = ds.nq
return True

def set_query_arguments(self, query_args):
query_args_list = query_args.strip().split(',')
self.index.update_params(int(query_args_list[0]), int(query_args_list[1]), int(query_args_list[2]),int(query_args_list[3]))
self.topk = int(query_args_list[0])
self.res = (np.empty((self.n, self.topk), dtype='float32'), np.empty((self.n, self.topk), dtype='uint32'))
self.qas = query_args
print(type(self.res[0]), len(self.res[0]))

def query(self, X, topK):
n, d = X.shape
self.index.search(n, swig_ptr(X), swig_ptr(self.res[0]), swig_ptr(self.res[1]))

def get_results(self):
return self.res[1]

def __str__(self):
return f'Puck({self.qas})'

6 changes: 6 additions & 0 deletions install/Dockerfile.puck
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM billion-scale-benchmark

RUN wget https://gips-test-bucket-0-gz.gz.bcebos.com/similar/puck_to_python.tar.gz
RUN tar zxvf puck_to_python.tar.gz
RUN mv lib puck
RUN python3 -c 'from puck import py_puck_api'

0 comments on commit e5cd244

Please sign in to comment.