-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BFCL] Introducing custom handlers #756
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
write_list_of_dicts_to_file, | ||
) | ||
from tqdm import tqdm | ||
from bfcl.eval_checker.metadata_loader import metadata_loader | ||
|
||
|
||
def api_status_sanity_check_rest(): | ||
|
@@ -244,6 +245,9 @@ def get_cost_letency_info(model_name, cost_data, latency_data): | |
def generate_leaderboard_csv( | ||
leaderboard_table, output_path, eval_models=None, eval_categories=None | ||
): | ||
# Load metadata at the beginning of the function | ||
model_metadata, _, _ = metadata_loader.load_metadata() | ||
|
||
Comment on lines
+250
to
+252
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For dynamic load, we'd used |
||
print("📈 Aggregating data to generate leaderboard score table...") | ||
data_non_live = [] | ||
data_live = [] | ||
|
@@ -324,7 +328,7 @@ def generate_leaderboard_csv( | |
data_non_live.append( | ||
[ | ||
"N/A", | ||
MODEL_METADATA_MAPPING[model_name_escaped][0], | ||
model_metadata[model_name_escaped][0], | ||
overall_accuracy_non_live["accuracy"], | ||
summary_ast_non_live["accuracy"], | ||
summary_exec_non_live["accuracy"], | ||
|
@@ -385,7 +389,7 @@ def generate_leaderboard_csv( | |
data_live.append( | ||
[ | ||
"N/A", | ||
MODEL_METADATA_MAPPING[model_name_escaped][0], | ||
model_metadata[model_name_escaped][0], | ||
overall_accuracy_live["accuracy"], | ||
summary_ast_live["accuracy"], | ||
python_simple_ast_live["accuracy"], | ||
|
@@ -424,7 +428,7 @@ def generate_leaderboard_csv( | |
data_multi_turn.append( | ||
[ | ||
"N/A", | ||
MODEL_METADATA_MAPPING[model_name_escaped][0], | ||
model_metadata[model_name_escaped][0], | ||
overall_accuracy_multi_turn["accuracy"], | ||
multi_turn_base["accuracy"], | ||
multi_turn_miss_func["accuracy"], | ||
|
@@ -451,8 +455,8 @@ def generate_leaderboard_csv( | |
[ | ||
"N/A", | ||
total_overall_accuracy["accuracy"], | ||
MODEL_METADATA_MAPPING[model_name_escaped][0], | ||
MODEL_METADATA_MAPPING[model_name_escaped][1], | ||
model_metadata[model_name_escaped][0], | ||
model_metadata[model_name_escaped][1], | ||
cost, | ||
latency_mean, | ||
latency_std, | ||
|
@@ -481,8 +485,8 @@ def generate_leaderboard_csv( | |
# multi_turn_composite["accuracy"], | ||
total_relevance["accuracy"], | ||
total_irrelevance["accuracy"], | ||
MODEL_METADATA_MAPPING[model_name_escaped][2], | ||
MODEL_METADATA_MAPPING[model_name_escaped][3], | ||
model_metadata[model_name_escaped][2], | ||
model_metadata[model_name_escaped][3], | ||
] | ||
) | ||
|
||
|
@@ -554,6 +558,7 @@ def generate_leaderboard_csv( | |
|
||
def check_model_category_status(score_path): | ||
result_path = score_path.replace("score", "result") | ||
model_metadata, _, _ = metadata_loader.load_metadata() | ||
|
||
leaderboard_categories = [ | ||
"exec_simple", | ||
|
@@ -578,8 +583,8 @@ def check_model_category_status(score_path): | |
|
||
category_status = {} | ||
|
||
# Check for all models in MODEL_METADATA_MAPPING | ||
for model_name in MODEL_METADATA_MAPPING.keys(): | ||
# Check for all models in metadata | ||
for model_name in model_metadata.keys(): | ||
category_status[model_name] = { | ||
category: {"generated": False, "evaluated": False} | ||
for category in leaderboard_categories | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import json | ||
import os | ||
from typing import Dict, Any | ||
|
||
from bfcl.model_handler.handler_map import local_inference_handler_map | ||
from bfcl.eval_checker.model_metadata import ( | ||
MODEL_METADATA_MAPPING, | ||
OUTPUT_PRICE_PER_MILLION_TOKEN, | ||
NO_COST_MODELS, | ||
) | ||
|
||
class MetadataLoader: | ||
@staticmethod | ||
def load_metadata() -> tuple[Dict[str, Any], Dict[str, float], list[str]]: | ||
""" | ||
Load model metadata, pricing information, and list of no-cost models. | ||
|
||
Returns: | ||
tuple containing: | ||
- metadata: Dict mapping model names to their metadata | ||
- prices: Dict mapping model names to their prices | ||
- no_cost_models: List of model names that have no associated cost | ||
""" | ||
metadata = dict(MODEL_METADATA_MAPPING) | ||
prices = dict(OUTPUT_PRICE_PER_MILLION_TOKEN) | ||
no_cost = list(NO_COST_MODELS) | ||
|
||
# Check for additional metadata config file path in environment variables | ||
metadata_config_path = os.getenv("BFCL_MODEL_METADATA") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've implemented the same config-based handler loading functionality in the evaluate module that was previously available in generation. export BFCL_MODEL_METADATA=model_metadata.json
python3 -m bfcl evaluate --model allganize/Alpha-Ko-32B-GPTQ-Int8-202411 --test-category multi_turn_base There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is example of {
"metadata": {
"allganize/Alpha-Ko-32B-GPTQ-Int8-202411": [
"alpha-ko-32b-gptq-int8",
"https://huggingface.co/allganize/Alpha-Ko-32B-GPTQ-Int8-202411",
"Allganize",
"Custom License"
]
},
"prices": {
"allganize/Alpha-Ko-32B-GPTQ-Int8-202411": 0.0
},
"no_cost_models": [
"allganize/Alpha-Ko-32B-GPTQ-Int8-202411"
]
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
if metadata_config_path and os.path.exists(metadata_config_path): | ||
try: | ||
with open(metadata_config_path) as f: | ||
custom_config = json.load(f) | ||
|
||
# Add custom model metadata | ||
if "metadata" in custom_config: | ||
metadata.update(custom_config["metadata"]) | ||
|
||
# Add custom pricing information | ||
if "prices" in custom_config: | ||
prices.update(custom_config["prices"]) | ||
|
||
# Add additional no-cost models | ||
if "no_cost_models" in custom_config: | ||
no_cost.extend(custom_config["no_cost_models"]) | ||
|
||
except Exception as e: | ||
print(f"Error loading custom metadata config: {str(e)}") | ||
|
||
return metadata, prices, no_cost | ||
|
||
# Global metadata loader instance | ||
metadata_loader = MetadataLoader() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import json | ||
import importlib.util | ||
import os | ||
from pathlib import Path | ||
from typing import Type, Optional | ||
|
||
from bfcl.model_handler.base_handler import BaseHandler | ||
from bfcl.model_handler.handler_map import HANDLER_MAP | ||
|
||
class HandlerLoader: | ||
@staticmethod | ||
def load_handler_class(module_path: str, class_name: str) -> Optional[Type[BaseHandler]]: | ||
"""Dynamically load handler classes from a specified path""" | ||
try: | ||
abs_path = str(Path(module_path).resolve()) | ||
spec = importlib.util.spec_from_file_location("custom_module", abs_path) | ||
if spec is None or spec.loader is None: | ||
raise ImportError(f"Could not load spec for module: {module_path}") | ||
|
||
module = importlib.util.module_from_spec(spec) | ||
spec.loader.exec_module(module) | ||
|
||
handler_class = getattr(module, class_name, None) | ||
if handler_class is None: | ||
raise AttributeError(f"Class {class_name} not found in {module_path}") | ||
|
||
# Checking for BaseHandler Inheritance | ||
if not issubclass(handler_class, BaseHandler): | ||
raise TypeError(f"Class {class_name} must inherit from BaseHandler") | ||
|
||
return handler_class | ||
|
||
except Exception as e: | ||
print(f"Error loading handler class {class_name} from {module_path}: {str(e)}") | ||
return None | ||
|
||
@staticmethod | ||
def get_handler_class(model_name: str) -> Optional[Type[BaseHandler]]: | ||
"""Returns the handler class corresponding to the model name""" | ||
# Check the path to the handler mapping file in an environment variable | ||
handler_config_path = os.getenv("BFCL_HANDLER_CONFIG") | ||
|
||
if handler_config_path and os.path.exists(handler_config_path): | ||
try: | ||
with open(handler_config_path) as f: | ||
handler_config = json.load(f) | ||
|
||
if model_name in handler_config: | ||
config = handler_config[model_name] | ||
handler_class = HandlerLoader.load_handler_class( | ||
config["module_path"], | ||
config["class_name"] | ||
) | ||
if handler_class: | ||
return handler_class | ||
|
||
except Exception as e: | ||
print(f"Error loading custom handler config: {str(e)}") | ||
|
||
# Lookup in the default handler map | ||
return HANDLER_MAP.get(model_name) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously, you could simply use dictionary mapping to get handler.
For now, HandlerLoader will controll.
You can set environment variable for
BFCL_HANDLER_CONFIG
.This environ var. point to the path of json file.
and if this exists, Loader can find the path of json file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is example of json files
and example of setting env var.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At
/path/to/my_handlers.py
you can make custom handlers clasThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For detailed example, I've introduced my case.
berkeley-function-call-leaderboard/custom_handler.py
berkeley-function-call-leaderboard/handler_config.json
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The result works well when I enter below command
python3 -m bfcl generate --model allganize/Alpha-Ko-32B-GPTQ-Int8-202411 --test-category multi_turn_base --num-threads 40