Skip to content

Commit

Permalink
add msturing 100k runbook
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu authored and Ubuntu committed Feb 4, 2025
1 parent f3a7ad9 commit 2b54f40
Show file tree
Hide file tree
Showing 4 changed files with 937 additions and 6 deletions.
11 changes: 6 additions & 5 deletions benchmark/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,16 +433,15 @@ def distance(self):
# 1M slice of MSTuring dataset, with ground truth corresponding to the 10K query set
# this is needed for backwards compatibility with the streaming code
class MSTuringANNSPQ(BillionScaleDatasetCompetitionFormat):
def __init__(self, nb_M=1):
self.nb_M = nb_M
self.nb = 10**6 * nb_M
def __init__(self, nb):
self.nb = nb
self.d = 100
self.nq = 10000
self.dtype = "float32"
self.ds_fn = "base1b.fbin"
self.qs_fn = "testQuery10K.fbin"
self.gt_fn = (
"msturing-1M-private-gt100" if self.nb_M == 1 else
"msturing-1M-private-gt100" if self.nb == 1000000 else
None
)
self.base_url = "https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/"
Expand Down Expand Up @@ -1309,7 +1308,9 @@ def short_name(self):
'msturing-10M': lambda : MSTuringANNS(10),
'msturing-1M': lambda : MSTuringANNS(1),

'msturingpq-1M': lambda : MSTuringANNSPQ(1),
'msturingpq-100K': lambda : MSTuringANNSPQ(100000),
'msturingpq-1M': lambda : MSTuringANNSPQ(1000000),
'msturingpq-10M': lambda : MSTuringANNSPQ(10000000),

'msturing-10M-clustered': lambda: MSTuringClustered10M(),
'msturing-30M-clustered': lambda: MSTuringClustered30M(),
Expand Down
11 changes: 11 additions & 0 deletions neurips23/runbooks/gen_expiration_time_runbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,14 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
max_points = 270000
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, max_points)

ratios = (0, 4, 18)
timesteps = (0, 50, 5)
seed = 5554
dataset_file = 'msturingpq-100K_expiration_time_runbook.yaml'
dataset_name = 'msturingpq-100K'
dataset_size = 100000
max_t = 100
gt_url = None
max_points = 28000
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, gt_url, True, max_points)

2 changes: 1 addition & 1 deletion neurips23/runbooks/msturing-10M_slidingwindow_runbook.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
msturing-10M:
msturingpq-10M:
1:
end: 50000
operation: insert
Expand Down
Loading

0 comments on commit 2b54f40

Please sign in to comment.