Skip to content

Commit

Permalink
..
Browse files Browse the repository at this point in the history
Signed-off-by: min.tian <min.tian.cn@gmail.com>
  • Loading branch information
alwayslove2013 committed Aug 6, 2024
1 parent 153a518 commit 8b6dfc8
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 18 deletions.
2 changes: 2 additions & 0 deletions vectordb_bench/backend/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def metric_type_map(s: str) -> MetricType:
class LabelFilterPerformanceCase(PerformanceCase):
case_id: CaseType = CaseType.LabelFilterPerformanceCase
with_scalar_labels: bool = True
dataset_with_size_type: DatasetWithSizeType

def __init__(
self,
Expand All @@ -366,6 +367,7 @@ def __init__(
optimize_timeout=optimize_timeout,
filter_rate=filter_rate,
filters=filter,
dataset_with_size_type=dataset_with_size_type,
**kwargs,
)

Expand Down
22 changes: 16 additions & 6 deletions vectordb_bench/backend/clients/milvus/milvus.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def __init__(
if drop_old and utility.has_collection(self.collection_name):
log.info(f"{self.name} client drop_old collection: {self.collection_name}")
utility.drop_collection(self.collection_name)
else:
# TODO: maybe need check have no scalar_labels
pass

if not utility.has_collection(self.collection_name):
fields = [
Expand Down Expand Up @@ -227,12 +230,19 @@ def insert_embeddings(
batch_end_offset = min(
batch_start_offset + self.batch_size, len(embeddings)
)
insert_data = [
metadata[batch_start_offset:batch_end_offset],
metadata[batch_start_offset:batch_end_offset],
embeddings[batch_start_offset:batch_end_offset],
labels_data[batch_start_offset:batch_end_offset],
]
if self.with_scalar_labels:
insert_data = [
metadata[batch_start_offset:batch_end_offset],
metadata[batch_start_offset:batch_end_offset],
embeddings[batch_start_offset:batch_end_offset],
labels_data[batch_start_offset:batch_end_offset],
]
else:
insert_data = [
metadata[batch_start_offset:batch_end_offset],
metadata[batch_start_offset:batch_end_offset],
embeddings[batch_start_offset:batch_end_offset],
]
res = self.col.insert(insert_data)
insert_count += len(res.primary_keys)
except MilvusException as e:
Expand Down
4 changes: 4 additions & 0 deletions vectordb_bench/backend/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def verify_size(cls, v):
def label(self) -> str:
return self._size_label.get(self.size).label

@property
def full_name(self) -> str:
return f"{self.name.capitalize()} ({self.label.capitalize()})"

@property
def dir_name(self) -> str:
return f"{self.name}_{self.label}_{utils.numerize(self.size)}".lower()
Expand Down
11 changes: 8 additions & 3 deletions vectordb_bench/frontend/components/check_results/charts.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from vectordb_bench.backend.cases import Case
from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
from vectordb_bench.frontend.components.check_results.expanderStyle import (
initMainExpanderStyle,
)
from vectordb_bench.metric import metricOrder, isLowerIsBetterMetric, metricUnitMap
from vectordb_bench.frontend.config.styles import *
from vectordb_bench.frontend.config.styles import (
COLOR_MAP,
PATTERN_SHAPES,
getPatternShape,
)
from vectordb_bench.models import ResultLabel
import plotly.express as px

Expand Down
11 changes: 10 additions & 1 deletion vectordb_bench/frontend/components/check_results/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,16 @@ def mergeTasks(tasks: list[CaseResult]):
db = task.task_config.db.value
db_label = task.task_config.db_config.db_label or ""
version = task.task_config.db_config.version or ""
case_name = task.task_config.case_config.case_name
case = task.task_config.case_config.case
case_name = case.name
dataset_name = case.dataset.data.full_name
filter_rate = case.filters.filter_rate
dbCaseMetricsMap[db_name][case_name] = {
"db": db,
"db_label": db_label,
"version": version,
"dataset_name": dataset_name,
"filter_rate": filter_rate,
"metrics": mergeMetrics(
dbCaseMetricsMap[db_name][case_name].get("metrics", {}),
asdict(task.metrics),
Expand All @@ -59,12 +64,16 @@ def mergeTasks(tasks: list[CaseResult]):
db_label = metricInfo["db_label"]
version = metricInfo["version"]
label = metricInfo["label"]
dataset_name = metricInfo["dataset_name"]
filter_rate = metricInfo["filter_rate"]
if label == ResultLabel.NORMAL:
mergedTasks.append(
{
"db_name": db_name,
"db": db,
"db_label": db_label,
"dataset_name": dataset_name,
"filter_rate": filter_rate,
"version": version,
"case_name": case_name,
"metricsSet": set(metrics.keys()),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from vectordb_bench.frontend.config.styles import *
from vectordb_bench.frontend.config.styles import PAGE_TITLE, FAVICON


def initResultsPageConfig(st):
Expand All @@ -9,10 +9,11 @@ def initResultsPageConfig(st):
# initial_sidebar_state="collapsed",
)


def initRunTestPageConfig(st):
st.set_page_config(
page_title=PAGE_TITLE,
page_icon=FAVICON,
# layout="wide",
initial_sidebar_state="collapsed",
)
)
65 changes: 65 additions & 0 deletions vectordb_bench/frontend/components/filter/charts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import plotly.express as px
from vectordb_bench.frontend.config.styles import COLOR_MAP
from vectordb_bench.metric import metricUnitMap


def drawCharts(st, allData, failedTasks, _):
dataset_names = list(set([data["dataset_name"] for data in allData]))
dataset_names.sort()
for dataset_name in dataset_names:
container = st.container()
container.subheader(dataset_name)
data = [d for d in allData if d["dataset_name"] == dataset_name]
drawChartByMetric(container, data)


def drawChartByMetric(st, data):
metrics = ["qps", "recall"]
columns = st.columns(len(metrics))
for i, metric in enumerate(metrics):
container = columns[i]
container.markdown(f"#### {metric}")
drawChart(container, data, metric)


def getRange(metric, data, padding_multipliers):
minV = min([d.get(metric, 0) for d in data])
maxV = max([d.get(metric, 0) for d in data])
padding = maxV - minV
rangeV = [
minV - padding * padding_multipliers[0],
maxV + padding * padding_multipliers[1],
]
return rangeV


def drawChart(st, data: list[object], metric):
unit = metricUnitMap.get(metric, "")
x = "filter_rate"
xrange = getRange(x, data, [0.05, 0.1])

y = metric
yrange = getRange(y, data, [0.2, 0.1])

data.sort(key=lambda a: a[x])

fig = px.line(
data,
x=x,
y=y,
color="db",
line_group="db_name",
color_discrete_map=COLOR_MAP,
text=metric,
markers=True,
)
fig.update_xaxes(range=xrange)
fig.update_yaxes(range=yrange)
fig.update_traces(textposition="bottom right", texttemplate="%{y:,.4~r}" + unit)
fig.update_layout(
margin=dict(l=0, r=0, t=40, b=0, pad=8),
legend=dict(
orientation="h", yanchor="bottom", y=1, xanchor="right", x=1, title=""
),
)
st.plotly_chart(fig, use_container_width=True)
Empty file.
19 changes: 13 additions & 6 deletions vectordb_bench/frontend/pages/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,30 @@
NavToQuriesPerDollar,
NavToRunTest,
)
from vectordb_bench.frontend.components.check_results.charts import drawCharts
from vectordb_bench.frontend.components.filter.charts import drawCharts
from vectordb_bench.frontend.components.check_results.filters import getshownData
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
from vectordb_bench.frontend.config.styles import FAVICON

# from vectordb_bench.frontend.config.styles import *
from vectordb_bench.interface import benchMarkRunner


def main():
# set page config
initResultsPageConfig(st)
st.set_page_config(
page_title="Label Filter",
page_icon=FAVICON,
layout="wide",
# initial_sidebar_state="collapsed",
)

# header
drawHeaderIcon(st)

allResults = benchMarkRunner.get_results()

st.title("Vector Database Benchmark")
st.caption(
"Note that all testing was completed in July 2023, except for the times already noted."
)
st.title("Vector Database Benchmark (Label Filter)")

# results selector and filter
resultSelectorContainer = st.sidebar.container()
Expand All @@ -44,6 +47,10 @@ def main():
NavToRunTest(navContainer)
NavToQuriesPerDollar(navContainer)

# charts
print(showCaseNames)
drawCharts(st, shownData, failedTasks, showCaseNames)

# footer
footer(st.container())

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"run_id": "e54a054431834e7eae59c62acbc226b0", "task_label": "2024080517", "results": [{"metrics": {"max_load_count": 0, "load_duration": 0.0, "qps": 0.0, "serial_latency_p99": 0.0, "recall": 0.0, "ndcg": 0.0, "conc_num_list": [], "conc_qps_list": [], "conc_latency_p99_list": []}, "task_config": {"db": "Milvus", "db_config": {"db_label": "", "version": "", "note": "", "uri": "**********"}, "db_case_config": {"index": "HNSW", "metric_type": "COSINE", "M": 30, "efConstruction": 360, "ef": 100}, "case_config": {"case_id": 50, "custom_case": null, "k": 100, "concurrency_search_config": {"num_concurrency": [10], "concurrency_duration": 30}}, "stages": ["drop_old", "load", "search_serial", "search_concurrent"]}, "label": "x"}], "file_fmt": "result_{}_{}_{}.json"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"run_id": "a509694394034f35b6c1c478f038ed47", "task_label": "2024080518", "results": [{"metrics": {"max_load_count": 0, "load_duration": 0.0, "qps": 0.0, "serial_latency_p99": 0.0, "recall": 0.0, "ndcg": 0.0, "conc_num_list": [], "conc_qps_list": [], "conc_latency_p99_list": []}, "task_config": {"db": "Milvus", "db_config": {"db_label": "", "version": "", "note": "", "uri": "**********"}, "db_case_config": {"index": "HNSW", "metric_type": "COSINE", "M": 30, "efConstruction": 360, "ef": 100}, "case_config": {"case_id": 50, "custom_case": null, "k": 100, "concurrency_search_config": {"num_concurrency": [10], "concurrency_duration": 30}}, "stages": ["drop_old", "load", "search_serial", "search_concurrent"]}, "label": "x"}], "file_fmt": "result_{}_{}_{}.json"}

0 comments on commit 8b6dfc8

Please sign in to comment.